Skip to content
Permalink
development
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/env python
"""
@author Peter Zaffetti 2018
"""
from logger import get_logger
from primatives.micelle_connection import MicelleConnection
from primatives.micelle import Micelle
from primatives.point import Point
from enum import Enum
import re
def parse_micelle_input_file(filename="test.dat"):
"""
Takes a file containing a set of points that make up a Micelle and parses the file contents into a Micelle object.
:param filename: the file to be opened and parsed.
:return: The Micelle object generated from the file.
"""
logger = get_logger(parse_micelle_input_file.__name__)
input_file = open(filename, "r")
micelle_points = list()
for line in input_file:
line = line.replace("(", "").replace(")", "") # PDZ- remove any parens
line = ''.join(line.split()) # PDZ- Remove all whitespace in the row
# check to see if the line starts with a comment (% or #), or is blank, or a non-digit, if so continue
if line.startswith("%") or line.startswith("#") or line == "" or (line[0].isdigit() is False and line[0] != "-"):
continue
comment_split_line = line.split("%")
point_values = comment_split_line[0]
comma_split_point_values = point_values.split(",")
logger.debug(comma_split_point_values)
if len(comma_split_point_values) != 3:
raise Exception("The micelle point file is not formatted properly")
else:
x_val = float(comma_split_point_values[0])
y_val = float(comma_split_point_values[1])
z_val = float(comma_split_point_values[2])
# indices are implicit when the list points don't have indices. They end up being the index in the list, which ends up being the current length.
micelle_points.append(Point(len(micelle_points), x_val, y_val, z_val))
# TODO: PDZ- figure out what the identifier should be for X,Y,Z input files. Right now it is hardcoded to 1.
return [(1, Micelle(micelle_points))]
def parse_connection_indices_file(filename):
"""
Takes a file containing a set of indices that for all connections for a Micelle input file (not required). Generates
a micelle connection object to be used.
:param filename: the file to be opened and parsed.
:return: the Connection object generated from the file.
"""
logger = get_logger(parse_connection_indices_file.__name__)
input_file = open(filename, "r")
connections = list()
for line in input_file:
logger.debug(line)
split_line = line.split()
# skip any comment lines or empty lines or lines that don't start with a digit
if line.startswith("%") or line.startswith("#") or line == "" or not line[0].isdigit():
continue
# make sure that a connection consists of 1 source index and at least 1 other connection index.
if len(split_line) <= 1:
raise RuntimeError("The connection line: {} is malformed. It should have a source index and at least 1 other connection index.".format(line))
# select the first point as the source index and all points from index 2 to the end for the connection indices.
# NOTE: Skip index 1 since it is just the number of connections for this index.
connections.append(MicelleConnection(split_line[0], split_line[2:]))
return connections
class PDBIndex(Enum):
TAG = 1
ATOM_NUM = 2
ATOM_NAME = 3
RESIDUE_NAME = 4
CLUSTER_SHAPE_ID = 5
X_COORD = 6
Y_COORD = 7
Z_COORD = 8
CLUSTER_ID = 9
CLUSTER_SIZE = 10
BETA = 11
OCCUPANCY = 12
def parse_micelle_from_pdb_file(filename="test.dat"):
"""
Parses micelles from a file in PDB format.
:param filename: the file containing the micelles.
:return: a list of micelles, parsed from the file.
"""
header_line_regex = re.compile(r"HEADER.*")
remark_line_regex = re.compile(r"REMARK.*")
'''
The groups of the regex below are:
1. the "ATOM" tag
2. atom num
3. atom name
4. residue name
5. cluster shape id (currently identified as a word but might just be digits)
6. x coord
7. y coord
8. z coord
9. cluster id
10. size of cluster
11. 1st annotation (beta/temperature_factor?)
12. 2nd annotation (occupancy?)
The regex below should match and group the following example line:
ATOM 69901 CH2C SLI 13 18.010 29.315 33.694 110 8 1 0
'''
atom_line_regex = re.compile(r"(\w*)\s*(\d*)\s*(\w*)\s*(\w*)\s*(\w*)\s*(-*\d*.?\d*)\s*(-*\d*.?\d*)\s*(-*\d*.?\d*)\s*(\d*)\s*(\d*)\s*(\d*)\s*(\d*)")
micelle_id_dict = dict()
input_file = open(filename, "r")
for line in input_file:
# if the line is a header or a remark, skip over it.
if header_line_regex.match(line):
continue
if remark_line_regex.match(line):
continue
match_results = atom_line_regex.match(line)
if match_results:
if match_results.group(1) != "ATOM":
continue
beta = int(match_results.group(PDBIndex.BETA.value))
num = int(match_results.group(PDBIndex.ATOM_NUM.value))
point_id = (int(match_results.group(PDBIndex.BETA.value)) * 100000) + int(match_results.group(PDBIndex.ATOM_NUM.value))
micelle_point = Point(point_id, float(match_results.group(PDBIndex.X_COORD.value)), float(match_results.group(PDBIndex.Y_COORD.value)), float(match_results.group(PDBIndex.Z_COORD.value)))
# get the id of the micelle which contains the point
micelle_id = match_results.group(PDBIndex.CLUSTER_ID.value)
# append the point to the micelle points corresponding to the id
try:
micelle_id_dict[micelle_id].append(micelle_point)
except KeyError:
micelle_id_dict[micelle_id] = [micelle_point]
# convert all the list of micelle points to objects
micelles = list()
# TODO: handle the id of the micelle
for id, points in micelle_id_dict.items():
micelles.append((id, Micelle(points)))
return micelles