-
Notifications
You must be signed in to change notification settings - Fork 1
/
config_checker.py
226 lines (210 loc) · 9.59 KB
/
config_checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
from typing import Tuple, Union
import argpass
import yaml
import os
from os import PathLike
from os import path as p
import multiprocessing as mp
#########################################################################
def read_and_validate_config() -> dict:
"""
Main script for config checker:
1. Accepts --config flag arg with argpass, reads yaml file into a dict
2. Checks paths in pathInfo to see if they are real
3. Checks inputs of cpuInfo to see if they are correct
4. Checks each dockingOrder in dockingOrders to see if they are correct
Returns:
- config (dict)
"""
print("checking config file...")
config: dict = read_input_yaml()
check_pathInfo(config)
check_cpuInfo(config)
check_dockingOrders(config)
print("... config file is correct")
return config
#########################################################################
def read_input_yaml() -> dict:
"""
Reads a YAML file using the "--config" flag with argpass
Reads YAML file into a dict
Returns:
- config (dict)
"""
# create an argpass parser, read config file,
parser = argpass.ArgumentParser()
parser.add_argument("--config")
args = parser.parse_args()
configFile: Union[PathLike, str] = args.config
# Read config.yaml into a dictionary
try:
with open(configFile, "r") as yamlFile:
config: dict = yaml.safe_load(yamlFile)
return config
except FileNotFoundError:
raise FileNotFoundError(f"config file {configFile} not found")
except yaml.YAMLError as exc:
raise yaml.YAMLError("Error parsing YAML file:", exc)
#########################################################################
def check_pathInfo(config: dict) -> None:
"""
Checks for pathInfo entry in config
Checks paths in pathInfo to see if they are real
Don't check outDir, this will be made automatically
"""
## check if pathInfo in config
pathInfo, = check_info_for_args(config, "config", ["pathInfo"], optional=False)
# check for required args in pathInfo
protDir, ligandDir, outDir = check_info_for_args(pathInfo, "pathInfo", ["protDir", "ligandDir", "outDir"], optional=False)
## make sure paths exist
for argValue, argName in zip([protDir, ligandDir], ["protDir", "ligandDir"]):
validate_path(argName, argValue)
#########################################################################
def check_cpuInfo(config: dict) -> None:
"""
Checks for cpuInfo in config
Checks that entries in cpuInfo are int types
Makes sure your computer has enough CPUs for the simulation
"""
## check for cpuInfo in config
cpuInfo, = check_info_for_args(config, "config", ["cpuInfo"], optional= False)
## check for required args in cpuInfo
totalCpuUseage, cpusPerRun = check_info_for_args(cpuInfo, "cpuInfo", ["totalCpuUsage", "cpusPerRun"], optional= False)
## check that cpuInfo arguments are int values and are positive
for argValue, argName in zip([totalCpuUseage, cpusPerRun], ["totalCpuUseage", "cpusPerRun"]):
if not isinstance(argValue, int):
raise TypeError(f"The config argument {argName} = {argValue} is not a an int type.")
if argValue < 1:
raise ValueError(f"The config argument {argName} = {argValue} must be a int greater than 1")
## check that your computer has enough CPUs
if totalCpuUseage > mp.cpu_count():
raise ValueError("totalCpuUseage argument exceeds your computers number of cores")
#########################################################################
def check_dockingOrders(config: dict) -> None:
"""
Checks for dockingOrders in config, makes sure it's a list and not empty
For each entry in dockingOrders:
- checks for required args
- ensures that input files exist in the expected directories
- checks for optional args
- ensures that residue dicts in optional args are formatted correctly
NOTE that this checks basic formatting - if you have specified the
wrong residue with the correct format, this will not ne caught.
The docking simulation prep will fail at some point later on!
"""
## we know pathInfo is all good, so we can use it here
pathInfo = config["pathInfo"]
protDir = pathInfo["protDir"]
ligandDir = pathInfo["ligandDir"]
## check for dockingOrders in config
dockingOrders, = check_info_for_args(config, "config", ["dockingOrders"], optional= False)
## ensure that dockingOrders is a non-zero-length list
if not isinstance(dockingOrders, list):
raise TypeError("dockingOrders must be a list containing dicts")
if len(dockingOrders) == 0:
raise ValueError("dockingOrders must have at least one entry")
## look through each entry in dockingOrders
for dockingOrder in dockingOrders:
## check for required args in dockingOrder
protName, ligandNames = check_info_for_args(dockingOrder, "dockingOrder", ["protein", "ligands"], optional= False)
## check to see if protein and ligand files exist in expected directories
protPdb: Union[PathLike, str] = p.join(protDir,f"{protName}.pdb")
if not p.isfile(protPdb):
raise FileNotFoundError(f"Protein PDB {protPdb} does not exist")
ligandNames: list = dockingOrder["ligands"]
for ligandName in ligandNames:
ligandPdb: Union[PathLike, str] = p.join(ligandDir, f"{ligandName}.pdb")
if not p.isfile(ligandPdb):
raise FileNotFoundError(f"Ligand PDB {ligandPdb} does not exist")
## check for optional args in dockingOrder
pocketResidues, keepResidues, flexibleResidues = check_info_for_args(dockingOrder, "dockingOrder", ["pocketResidues", "keepResidues", "flexibleResidues"], optional=True)
## ensure that these residue-specifying dicts are formatted correctly
for residues, dictName in zip([pocketResidues, keepResidues, flexibleResidues],["pocketResidues", "keepResidues", "flexibleResidues"]):
validate_residue_dict(residues, dictName)
#########################################################################
def check_info_for_args(info: dict, infoName: str, argNames: list, optional: bool) -> list:
"""
Simple check to see if list of keys is in a dict
If optional is set to "False", raises a KeyError
If all is as expected returns a list of values to be unpacked
"""
## init empty list to append to
unpackedDicts: list = []
for argName in argNames:
isArg, argValue = check_dict_for_key(info, argName)
## if a required arg is not in the dict, raise a KeyError
if not isArg and not optional:
raise KeyError(f"Argument {argName} not found in {infoName}")
unpackedDicts.append(argValue)
return unpackedDicts
#########################################################################
def check_dict_for_key(info: dict, key: any) -> Tuple[bool, any]:
"""
Checks to see if a key is in a dict
If it is, return "True" and the associated value
"""
if key in info:
if info[key] == False:
return True, False
else:
return True, info[key]
return False, False
#########################################################################
def validate_path(argName: str, argPath: Union[PathLike, str]) -> None:
"""
Check to see if a path variable is indeed the correct type
Check to see if the path exists
"""
if not isinstance(argPath, (os.PathLike, str)) :
raise TypeError(f"The config argument {argName} = {argPath} is not a PathLike.")
# Check if the path exists
if not p.exists(argPath):
raise FileNotFoundError(f"The config argument {argName} = {argPath} does not exist.")
#########################################################################
def validate_residue_dict(info: dict, infoName: str) -> None:
"""
Ensures that the residue-specifying dicts are correctly formatted, eg:
{"CHAIN_ID": "B", "RES_NAME: "GLY", "RES_ID": 123}
"""
aminoAcidThreeLetterNames = init_amino_acid_list()
for residue in info:
chainId, resName, resId = check_info_for_args(residue, "residue", ["CHAIN_ID", "RES_NAME", "RES_ID"], optional=False)
if not isinstance(chainId, str):
raise TypeError(f"CHAIN_ID in {infoName} must be a string")
if not isinstance(resName, str):
raise TypeError(f"RES_NAME in {infoName} must be a string")
if not resName in aminoAcidThreeLetterNames:
raise ValueError(f"RES_NAME in {infoName} must be a canonical amino acid three-letter, ALL-CAPS code")
if not isinstance(resId, int):
raise TypeError(f"RES_ID in {infoName} must be a int")
if resId < 1:
raise ValueError(f"RES_ID in {infoName} must be a positive int")
#########################################################################
def init_amino_acid_list() -> list:
"""
Creates list of the three-letter codes for the 20 amino acids
TODO: add method for dealing with non-naturals
"""
aminoAcidThreeLetterNames: list = [
"ALA", # Alanine
"ARG", # Arginine
"ASN", # Asparagine
"ASP", # Aspartic acid
"CYS", # Cysteine
"GLN", # Glutamine
"GLU", # Glutamic acid
"GLY", # Glycine
"HIS", # Histidine
"ILE", # Isoleucine
"LEU", # Leucine
"LYS", # Lysine
"MET", # Methionine
"PHE", # Phenylalanine
"PRO", # Proline
"SER", # Serine
"THR", # Threonine
"TRP", # Tryptophan
"TYR", # Tyrosine
"VAL" # Valine
]
return aminoAcidThreeLetterNames