From fea85d69be3ef01faccbd3d1cb5e7203d2ce9887 Mon Sep 17 00:00:00 2001 From: Travis Kessler Date: Mon, 25 Mar 2019 18:31:40 -0400 Subject: [PATCH 1/7] Added set_spawn_method, a fix for Unix multiprocessing --- ecnet/server.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ecnet/server.py b/ecnet/server.py index 0768af8..370a806 100644 --- a/ecnet/server.py +++ b/ecnet/server.py @@ -23,9 +23,9 @@ save_config, save_df, train_model, use_model # Stdlib imports -from os import listdir, makedirs, path, walk +from os import listdir, makedirs, name, path, walk from operator import itemgetter -from multiprocessing import Pool +from multiprocessing import Pool, set_start_method from shutil import rmtree from zipfile import ZipFile, ZIP_DEFLATED @@ -53,6 +53,9 @@ def __init__(self, model_config='config.yml', prj_file=None, self._num_processes = num_processes + if name != 'nt': + set_start_method('spawn', force=True) + if prj_file is not None: self._open_project(prj_file) return From 25003157bf57695f9de1ee30c203a7f55adfb3d0 Mon Sep 17 00:00:00 2001 From: Travis Kessler Date: Mon, 25 Mar 2019 18:58:18 -0400 Subject: [PATCH 2/7] Added option for creating database with fingerprints instead of descriptors --- ecnet/tools/conversions.py | 47 ++++++++++++++++++++++++++------------ ecnet/tools/database.py | 6 +++-- tests/test_create_db.py | 17 ++++++++++++-- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/ecnet/tools/conversions.py b/ecnet/tools/conversions.py index 4902953..29413e0 100644 --- a/ecnet/tools/conversions.py +++ b/ecnet/tools/conversions.py @@ -84,13 +84,15 @@ def smiles_to_mdl(smiles_file, mdl_file): continue -def mdl_to_descriptors(mdl_file, descriptors_csv): +def mdl_to_descriptors(mdl_file, descriptors_csv, fingerprints=False): '''Generates QSPR descriptors from supplied MDL file using PaDEL-Descriptor Args: mdl_file (str): path to source MDL file descriptors_csv (str): path to resulting CSV file w/ descriptors + fingerprints (bool): if True, generates molecular fingerprints instead + of QSPR descriptors Returns: list: list of dicts, where each dict is a molecule populated with @@ -105,20 +107,35 @@ def mdl_to_descriptors(mdl_file, descriptors_csv): dn = open(devnull, 'w') for attempt in range(3): try: - call([ - 'java', - '-jar', - _PADEL_PATH, - '-2d', - '-3d', - '-retainorder', - '-retain3d', - '-dir', - mdl_file, - '-file', - descriptors_csv - ], stdout=dn, stderr=dn, timeout=3600) - break + if fingerprints: + call([ + 'java', + '-jar', + _PADEL_PATH, + '-fingerprints', + '-retainorder', + '-retain3d', + '-dir', + mdl_file, + '-file', + descriptors_csv + ], stdout=dn, stderr=dn, timeout=600) + break + else: + call([ + 'java', + '-jar', + _PADEL_PATH, + '-2d', + '-3d', + '-retainorder', + '-retain3d', + '-dir', + mdl_file, + '-file', + descriptors_csv + ], stdout=dn, stderr=dn, timeout=600) + break except Exception as e: if attempt == 2: raise e diff --git a/ecnet/tools/database.py b/ecnet/tools/database.py index bbb02b3..e8061fc 100644 --- a/ecnet/tools/database.py +++ b/ecnet/tools/database.py @@ -20,7 +20,7 @@ def create_db(input_txt, output_name, id_prefix='', targets=None, form='name', smiles_file='mols.smi', mdl_file='mols.mdl', - desc_file='descriptors.csv', clean_up=True): + desc_file='descriptors.csv', clean_up=True, fingerprints=False): '''Create an ECNet-formatted database from either molecule names or SMILES strings @@ -37,6 +37,8 @@ def create_db(input_txt, output_name, id_prefix='', targets=None, form='name', desc_file (str): name of descriptors file generated by PaDEL-Descriptor clean_up (bool): if True, cleans up all files generated during processing except for the input text files and output database + fingerprints (bool): if True, generates molecular fingerprints instead + of QSPR descriptors ''' input_data = _read_txt(input_txt) @@ -67,7 +69,7 @@ def create_db(input_txt, output_name, id_prefix='', targets=None, form='name', target_data = [0 for _ in range(len(input_data))] smiles_to_mdl(smiles_file, mdl_file) - desc = mdl_to_descriptors(mdl_file, desc_file) + desc = mdl_to_descriptors(mdl_file, desc_file, fingerprints) desc_keys = list(desc[0].keys()) try: desc_keys.remove('Name') diff --git a/tests/test_create_db.py b/tests/test_create_db.py index 3386efa..c98ce76 100644 --- a/tests/test_create_db.py +++ b/tests/test_create_db.py @@ -22,7 +22,20 @@ def from_smiles(): ) +def fingerprints(): + + print('Creating database with fingerprints...') + create_db( + 'mols_smiles.txt', + 'db_with_fingerprints.csv', + targets='mols_targets.txt', + form='smiles', + fingerprints=True + ) + + if __name__ == '__main__': - from_names() - from_smiles() + # from_names() + # from_smiles() + fingerprints() From 4bf6718b7318504482c7ef62b16d1a9cf193b5e2 Mon Sep 17 00:00:00 2001 From: Travis Kessler Date: Mon, 25 Mar 2019 18:59:37 -0400 Subject: [PATCH 3/7] Uncommented out tests --- tests/test_create_db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_create_db.py b/tests/test_create_db.py index c98ce76..4fbb0b0 100644 --- a/tests/test_create_db.py +++ b/tests/test_create_db.py @@ -36,6 +36,6 @@ def fingerprints(): if __name__ == '__main__': - # from_names() - # from_smiles() + from_names() + from_smiles() fingerprints() From 6ab93a891e3c84a96538a38f757cd520c8477257 Mon Sep 17 00:00:00 2001 From: Travis Kessler Date: Mon, 25 Mar 2019 19:04:46 -0400 Subject: [PATCH 4/7] get_smiles now returns empty string instead of throwing exception --- ecnet/tools/conversions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ecnet/tools/conversions.py b/ecnet/tools/conversions.py index 29413e0..2f67520 100644 --- a/ecnet/tools/conversions.py +++ b/ecnet/tools/conversions.py @@ -39,7 +39,7 @@ def get_smiles(name): smiles = [m.isomeric_smiles for m in get_compounds(name, 'name')] if len(smiles) == 0: - raise IndexError('PubChem entry not found for {}'.format(name)) + return '' else: return smiles[0] From 4677727c42f5eae35d42c5fd6687411651b3147f Mon Sep 17 00:00:00 2001 From: Travis Kessler Date: Mon, 25 Mar 2019 19:33:50 -0400 Subject: [PATCH 5/7] Added default call locations for logs --- ecnet/tasks/limit_inputs.py | 1 + ecnet/tasks/remove_outliers.py | 1 + ecnet/tasks/tuning.py | 1 + 3 files changed, 3 insertions(+) diff --git a/ecnet/tasks/limit_inputs.py b/ecnet/tasks/limit_inputs.py index e0a55b9..38609f3 100644 --- a/ecnet/tasks/limit_inputs.py +++ b/ecnet/tasks/limit_inputs.py @@ -37,6 +37,7 @@ def limit_rforest(df, limit_num, num_estimators=1000, num_processes=1): if logger.file_level != 'disable': ditto_logger.log_dir = logger.log_dir ditto_logger.file_level = logger.file_level + ditto_logger.default_call_loc('LIMIT') item_collection = ItemCollection(df._filename) for inp_name in df.input_names: item_collection.add_attribute(Attribute(inp_name)) diff --git a/ecnet/tasks/remove_outliers.py b/ecnet/tasks/remove_outliers.py index 8f062dd..dc3af6d 100644 --- a/ecnet/tasks/remove_outliers.py +++ b/ecnet/tasks/remove_outliers.py @@ -38,6 +38,7 @@ def remove_outliers(df, leaf_size=40, num_processes=1): if logger.file_level != 'disable': ditto_logger.log_dir = logger.log_dir ditto_logger.file_level = logger.file_level + ditto_logger.default_call_loc('OUTLIERS') item_collection = ItemCollection(df._filename) for inp_name in df.input_names: item_collection.add_attribute(Attribute(inp_name)) diff --git a/ecnet/tasks/tuning.py b/ecnet/tasks/tuning.py index bb38ccb..b9dec17 100644 --- a/ecnet/tasks/tuning.py +++ b/ecnet/tasks/tuning.py @@ -73,6 +73,7 @@ def tune_hyperparameters(df, vars, num_employers, num_iterations, if logger.file_level != 'disable': abc._logger.log_dir = logger.log_dir abc._logger.file_level = logger.file_level + abc._logger.default_call_loc('TUNE') abc.create_employers() for i in range(num_iterations): logger.log('info', 'Iteration {}'.format(i + 1), call_loc='TUNE') From 4ac6cbcb8f050260d2dc780618d13cf490de5b3e Mon Sep 17 00:00:00 2001 From: Kessler Date: Tue, 26 Mar 2019 10:17:43 -0400 Subject: [PATCH 6/7] Typo fix --- ecnet/tasks/tuning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ecnet/tasks/tuning.py b/ecnet/tasks/tuning.py index b9dec17..a2fd016 100644 --- a/ecnet/tasks/tuning.py +++ b/ecnet/tasks/tuning.py @@ -93,7 +93,7 @@ def tune_hyperparameters(df, vars, num_employers, num_iterations, vars['beta_2'] = params[1] vars['decay'] = params[2] vars['epsilon'] = params[3] - vars['learning_date'] = params[4] + vars['learning_rate'] = params[4] for l_idx in range(len(vars['hidden_layers'])): vars['hidden_layers'][l_idx][0] = params[5 + l_idx] return vars From c0eda14fa2101fad03e3481a330dfbe031625aa8 Mon Sep 17 00:00:00 2001 From: Kessler Date: Wed, 27 Mar 2019 11:33:24 -0400 Subject: [PATCH 7/7] Version bump: 3.0.0 -> 3.0.1 --- ecnet/__init__.py | 2 +- ecnet/models/mlp.py | 2 +- ecnet/server.py | 2 +- ecnet/tasks/limit_inputs.py | 2 +- ecnet/tasks/remove_outliers.py | 2 +- ecnet/tasks/tuning.py | 2 +- ecnet/tools/conversions.py | 2 +- ecnet/tools/database.py | 2 +- ecnet/tools/project.py | 2 +- ecnet/utils/data_utils.py | 2 +- ecnet/utils/error_utils.py | 2 +- ecnet/utils/logging.py | 2 +- ecnet/utils/server_utils.py | 2 +- setup.py | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/ecnet/__init__.py b/ecnet/__init__.py index 6480a3d..8c0ddcf 100644 --- a/ecnet/__init__.py +++ b/ecnet/__init__.py @@ -1,2 +1,2 @@ from ecnet.server import Server -__version__ = '3.0.0' +__version__ = '3.0.1' diff --git a/ecnet/models/mlp.py b/ecnet/models/mlp.py index 5f5d07f..e07d53c 100644 --- a/ecnet/models/mlp.py +++ b/ecnet/models/mlp.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/models/mlp.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains the "MultilayerPerceptron" (feed-forward neural network) class diff --git a/ecnet/server.py b/ecnet/server.py index 370a806..e20db5e 100644 --- a/ecnet/server.py +++ b/ecnet/server.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/server.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains the "Server" class, which handles ECNet project creation, neural diff --git a/ecnet/tasks/limit_inputs.py b/ecnet/tasks/limit_inputs.py index 38609f3..5e25cb4 100644 --- a/ecnet/tasks/limit_inputs.py +++ b/ecnet/tasks/limit_inputs.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/tasks/limit_inputs.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains functions for selecting influential input parameters diff --git a/ecnet/tasks/remove_outliers.py b/ecnet/tasks/remove_outliers.py index dc3af6d..793d8ea 100644 --- a/ecnet/tasks/remove_outliers.py +++ b/ecnet/tasks/remove_outliers.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/tasks/remove_outliers.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains function for removing outliers from ECNet DataFrame diff --git a/ecnet/tasks/tuning.py b/ecnet/tasks/tuning.py index a2fd016..a9e49ca 100644 --- a/ecnet/tasks/tuning.py +++ b/ecnet/tasks/tuning.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/tasks/tuning.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains functions/fitness functions for tuning hyperparameters diff --git a/ecnet/tools/conversions.py b/ecnet/tools/conversions.py index 2f67520..161e0ee 100644 --- a/ecnet/tools/conversions.py +++ b/ecnet/tools/conversions.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/tools/conversions.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains functions for converting various chemical file formats diff --git a/ecnet/tools/database.py b/ecnet/tools/database.py index e8061fc..8d34e79 100644 --- a/ecnet/tools/database.py +++ b/ecnet/tools/database.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/tools/database.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains functions for creating ECNet-formatted databases diff --git a/ecnet/tools/project.py b/ecnet/tools/project.py index afb4c45..e4a63c6 100644 --- a/ecnet/tools/project.py +++ b/ecnet/tools/project.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/tools/project.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains functions for predicting data using pre-existing .prj files diff --git a/ecnet/utils/data_utils.py b/ecnet/utils/data_utils.py index 706a9fb..ee81d29 100644 --- a/ecnet/utils/data_utils.py +++ b/ecnet/utils/data_utils.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/utils/data_utils.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains functions/classes for loading data, saving data, saving results diff --git a/ecnet/utils/error_utils.py b/ecnet/utils/error_utils.py index 202bfb2..8394d3e 100644 --- a/ecnet/utils/error_utils.py +++ b/ecnet/utils/error_utils.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/utils/error_utils.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains functions for error calculations diff --git a/ecnet/utils/logging.py b/ecnet/utils/logging.py index 842bafd..560db76 100644 --- a/ecnet/utils/logging.py +++ b/ecnet/utils/logging.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/utils/logging.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains logger used by ECNet diff --git a/ecnet/utils/server_utils.py b/ecnet/utils/server_utils.py index 220f78e..084d313 100644 --- a/ecnet/utils/server_utils.py +++ b/ecnet/utils/server_utils.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # ecnet/utils/server_utils.py -# v.3.0.0 +# v.3.0.1 # Developed in 2019 by Travis Kessler # # Contains functions used by ecnet.Server diff --git a/setup.py b/setup.py index 82537df..5f33f4c 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='ecnet', - version='3.0.0', + version='3.0.1', description='UMass Lowell Energy and Combustion Research Laboratory Neural' ' Network Software', url='http://github.com/tjkessler/ecnet',