Merge pull request #34 from ECRL/dev

Type checking, improved unit testing
ecrl · Jun 3, 2019 · 2083718 · 2083718
2 parents 34f96a5 + 0fa367a
commit 2083718
Show file tree

Hide file tree

Showing 73 changed files with 746 additions and 3,915 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -32,7 +32,7 @@
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
-release = '3.0.0'
+release = '3.1.2'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/usage/installation.md b/docs/usage/installation.md
@@ -1,7 +1,7 @@
 # Installation
 
 ### Prerequisites
-- Have Python 3.5/3.6 installed
+- Have Python 3.7 installed
 - Have the ability to install Python packages
 
 ### Install via pip
@@ -15,7 +15,7 @@ Alternatively, in a Windows or virtualenv environment:
 pip install ecnet
 ```
 
-Note: if multiple Python releases are installed on your system (e.g. 2.7 and 3.6), you may need to execute the correct version of pip. For Python 3.6, change **"pip install ecnet"** to **"pip3 install ecnet"**.
+Note: if multiple Python releases are installed on your system (e.g. 2.7 and 3.7), you may need to execute the correct version of pip. For Python 3.7, change **"pip install ecnet"** to **"pip3 install ecnet"**.
 
 To update your version of ECNet to the latest release version, use:
 ```

diff --git a/ecnet/__init__.py b/ecnet/__init__.py
@@ -1,2 +1,2 @@
 from ecnet.server import Server
-__version__ = '3.1.1'
+__version__ = '3.1.2'
diff --git a/ecnet/models/mlp.py b/ecnet/models/mlp.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # ecnet/models/mlp.py
-# v.3.1.1
+# v.3.1.2
 # Developed in 2019 by Travis Kessler <[email protected]>
 #
 # Contains the "MultilayerPerceptron" (feed-forward neural network) class
@@ -15,6 +15,7 @@
 
 # 3rd party imports
 from tensorflow import get_default_graph, logging
+from numpy import array
 stderr = sys.stderr
 sys.stderr = open(devnull, 'w')
 from keras.backend import clear_session, reset_uids
@@ -36,7 +37,7 @@
 
 class MultilayerPerceptron:
 
-    def __init__(self, filename='model.h5'):
+    def __init__(self, filename: str='model.h5'):
         '''MultilayerPerceptron object: fits neural network to supplied inputs
         and targets
 
@@ -54,7 +55,8 @@ def __init__(self, filename='model.h5'):
         clear_session()
         self._model = Sequential(name=filename.lower().replace('.h5', ''))
 
-    def add_layer(self, num_neurons, activation, input_dim=None):
+    def add_layer(self, num_neurons: int, activation: str,
+                  input_dim: int=None):
         '''Adds a fully-connected layer to the model
 
         Args:
@@ -71,8 +73,10 @@ def add_layer(self, num_neurons, activation, input_dim=None):
             input_shape=(input_dim,)
         ))
 
-    def fit(self, l_x, l_y, v_x=None, v_y=None, epochs=1500, lr=0.001,
-            beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, v=0):
+    def fit(self, l_x: array, l_y: array, v_x: array=None, v_y: array=None,
+            epochs: int=1500, lr: float=0.001, beta_1: float=0.9,
+            beta_2: float=0.999, epsilon: float=0.0000001, decay: float=0.0,
+            v: int=0):
         '''Fits neural network to supplied inputs and targets
 
         Args:
@@ -133,7 +137,7 @@ def fit(self, l_x, l_y, v_x=None, v_y=None, epochs=1500, lr=0.001,
         logger.log('debug', 'Training complete after {} epochs'.format(epochs),
                    call_loc='MLP')
 
-    def use(self, x):
+    def use(self, x: array) -> array:
         '''Uses neural network to predict values for supplied data
 
         Args:
@@ -146,7 +150,7 @@ def use(self, x):
         with get_default_graph().as_default():
             return self._model.predict(x)
 
-    def save(self, filename=None):
+    def save(self, filename: str=None):
         '''Saves neural network to .h5 file
 
         filename (str): if None, uses MultilayerPerceptron._filename;
@@ -165,7 +169,7 @@ def save(self, filename=None):
         logger.log('debug', 'Model saved to {}'.format(filename),
                    call_loc='MLP')
 
-    def load(self, filename=None):
+    def load(self, filename: str=None):
         '''Loads neural network from .h5 file
 
         Args:

diff --git a/ecnet/server.py b/ecnet/server.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # ecnet/server.py
-# v.3.1.1
+# v.3.1.2
 # Developed in 2019 by Travis Kessler <[email protected]>
 #
 # Contains the "Server" class, which handles ECNet project creation, neural
@@ -32,8 +32,8 @@
 
 class Server:
 
-    def __init__(self, model_config='config.yml', prj_file=None,
-                 num_processes=1):
+    def __init__(self, model_config: str='config.yml', prj_file: str=None,
+                 num_processes: int=1):
         '''Server object: handles data loading, model creation, data-to-model
         hand-off, data input parameter selection, hyperparameter tuning
 
@@ -72,7 +72,8 @@ def __init__(self, model_config='config.yml', prj_file=None,
             self._vars = default_config()
             save_config(self._vars, self._cf_file)
 
-    def load_data(self, filename, random=False, split=None, normalize=False):
+    def load_data(self, filename: str, random: bool=False, split: list=None,
+                  normalize: bool=False):
         '''Loads data from an ECNet-formatted CSV database
 
         Args:
@@ -92,7 +93,8 @@ def load_data(self, filename, random=False, split=None, normalize=False):
         self._df.create_sets(random, split)
         self._sets = self._df.package_sets()
 
-    def create_project(self, project_name, num_pools=1, num_candidates=1):
+    def create_project(self, project_name: str, num_pools: int=1,
+                       num_candidates: int=1):
         '''Creates folder hierarchy for a new project
 
         Args:
@@ -116,7 +118,7 @@ def create_project(self, project_name, num_pools=1, num_candidates=1):
         logger.log('debug', 'Number of candidates/pool: {}'.format(
                    num_candidates), call_loc='PROJECT')
 
-    def remove_outliers(self, leaf_size=30, output_filename=None):
+    def remove_outliers(self, leaf_size: int=30, output_filename: str=None):
         '''Removes any outliers from the currently-loaded data using
             unsupervised outlier detection using local outlier factor
 
@@ -136,8 +138,8 @@ def remove_outliers(self, leaf_size=30, output_filename=None):
             logger.log('info', 'Resulting database saved to {}'.format(
                        output_filename), call_loc='OUTLIERS')
 
-    def limit_inputs(self, limit_num, num_estimators=1000,
-                     output_filename=None):
+    def limit_inputs(self, limit_num: int, num_estimators: int=1000,
+                     output_filename: str=None):
         '''Selects `limit_num` influential input parameters using random
         forest regression
 
@@ -163,9 +165,10 @@ def limit_inputs(self, limit_num, num_estimators=1000,
             logger.log('info', 'Resulting database saved to {}'.format(
                        output_filename), call_loc='LIMIT')
 
-    def tune_hyperparameters(self, num_employers, num_iterations,
-                             shuffle=None, split=None, validate=True,
-                             eval_set=None, eval_fn='rmse'):
+    def tune_hyperparameters(self, num_employers: int, num_iterations: int,
+                             shuffle: bool=None, split: list=None,
+                             validate: bool=True, eval_set: str=None,
+                             eval_fn: str='rmse'):
         '''Tunes neural network learning hyperparameters using an artificial
         bee colony algorithm; tuned hyperparameters are saved to Server's
         model configuration file
@@ -206,8 +209,9 @@ def tune_hyperparameters(self, num_employers, num_iterations,
         )
         save_config(self._vars, self._cf_file)
 
-    def train(self, shuffle=None, split=None, retrain=False,
-              validate=False, selection_set=None, selection_fn='rmse'):
+    def train(self, shuffle: str=None, split: list=None, retrain: bool=False,
+              validate: bool=False, selection_set: str=None,
+              selection_fn: str='rmse'):
         '''Trains neural network(s) using currently-loaded data; single NN if
         no project is created, all candidates if created
 
@@ -307,7 +311,7 @@ def train(self, shuffle=None, split=None, retrain=False,
                     pool_fp.replace('model.h5', 'data.d')
                 )
 
-    def use(self, dset=None, output_filename=None):
+    def use(self, dset: str=None, output_filename: str=None):
         '''Uses trained neural network(s) to predict for specified set; single
         NN if no project created, best pool candidates if created
 
@@ -341,7 +345,7 @@ def use(self, dset=None, output_filename=None):
                        call_loc='USE')
         return results
 
-    def errors(self, *args, dset=None):
+    def errors(self, *args, dset: str=None):
         '''Obtains various errors for specified set
 
         Args:
@@ -365,7 +369,8 @@ def errors(self, *args, dset=None):
         logger.log('debug', 'Errors: {}'.format(errors), call_loc='ERRORS')
         return errors
 
-    def save_project(self, filename=None, clean_up=True, del_candidates=False):
+    def save_project(self, filename: str=None, clean_up: bool=True,
+                     del_candidates: bool=False):
         '''Saves current state of project to a .prj file
 
         Args:
@@ -400,7 +405,7 @@ def save_project(self, filename=None, clean_up=True, del_candidates=False):
         logger.log('info', 'Project saved to {}'.format(save_path),
                    call_loc='PROJECT')
 
-    def _open_project(self, prj_file):
+    def _open_project(self, prj_file: str):
         '''Private method: if project file specified on Server.__init__, loads
         the project
 

diff --git a/ecnet/tasks/limit_inputs.py b/ecnet/tasks/limit_inputs.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # ecnet/tasks/limit_inputs.py
-# v.3.1.1
+# v.3.1.2
 # Developed in 2019 by Travis Kessler <[email protected]>
 #
 # Contains functions for selecting influential input parameters
@@ -17,10 +17,12 @@
 from ditto_lib.utils.dataframe import Attribute
 
 # ECNet imports
+from ecnet.utils.data_utils import DataFrame
 from ecnet.utils.logging import logger
 
 
-def limit_rforest(df, limit_num, num_estimators=1000, num_processes=1):
+def limit_rforest(df: DataFrame, limit_num: int, num_estimators: int=1000,
+                  num_processes: int=1) -> DataFrame:
     '''Uses random forest regression to select input parameters
 
     Args:

diff --git a/ecnet/tasks/remove_outliers.py b/ecnet/tasks/remove_outliers.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # ecnet/tasks/remove_outliers.py
-# v.3.1.1
+# v.3.1.2
 # Developed in 2019 by Travis Kessler <[email protected]>
 #
 # Contains function for removing outliers from ECNet DataFrame
@@ -18,10 +18,12 @@
 from ditto_lib.utils.dataframe import Attribute
 
 # ECNet imports
+from ecnet.utils.data_utils import DataFrame
 from ecnet.utils.logging import logger
 
 
-def remove_outliers(df, leaf_size=40, num_processes=1):
+def remove_outliers(df: DataFrame, leaf_size: int=40,
+                    num_processes: int=1) -> DataFrame:
     '''Unsupervised outlier detection using local outlier factor
 
     Args:

diff --git a/ecnet/tasks/tuning.py b/ecnet/tasks/tuning.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # ecnet/tasks/tuning.py
-# v.3.1.1
+# v.3.1.2
 # Developed in 2019 by Travis Kessler <[email protected]>
 #
 # Contains functions/fitness functions for tuning hyperparameters
@@ -12,13 +12,16 @@
 from ecabc.abc import ABC
 
 # ECNet imports
+from ecnet.utils.data_utils import DataFrame
 from ecnet.utils.logging import logger
 from ecnet.utils.server_utils import default_config, train_model
 
 
-def tune_hyperparameters(df, vars, num_employers, num_iterations,
-                         num_processes=1, shuffle=None, split=None,
-                         validate=True, eval_set=None, eval_fn='rmse'):
+def tune_hyperparameters(df: DataFrame, vars: dict, num_employers: int,
+                         num_iterations: int, num_processes: int=1,
+                         shuffle: str=None, split: list=None,
+                         validate: bool=True, eval_set: str=None,
+                         eval_fn: str='rmse') -> dict:
     '''Tunes neural network learning/architecture hyperparameters
 
     Args:
@@ -27,7 +30,7 @@ def tune_hyperparameters(df, vars, num_employers, num_iterations,
         num_employers (int): number of employer bees
         num_iterations (int): number of search cycles for the colony
         num_processes (int): number of parallel processes to utilize
-        shuffle (bool): if True, shuffles L/V/T data for all evals
+        shuffle (str): shuffles `train` or `all` sets if not None
         split (list): if shuffle is True, [learn%, valid%, test%]
         validate (bool): if True, uses periodic validation; otherwise, no
         eval_set (str): set used to evaluate bee performance; `learn`, `valid`,
@@ -99,7 +102,7 @@ def tune_hyperparameters(df, vars, num_employers, num_iterations,
     return vars
 
 
-def tune_fitness_function(params, **kwargs):
+def tune_fitness_function(params: dict, **kwargs):
     '''Fitness function used by ABC
 
     Args:

diff --git a/ecnet/tools/conversions.py b/ecnet/tools/conversions.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # ecnet/tools/conversions.py
-# v.3.1.0
+# v.3.1.2
 # Developed in 2019 by Travis Kessler <[email protected]>
 #
 # Contains functions for converting various chemical file formats
@@ -26,21 +26,21 @@
 )
 
 
-def get_smiles(name):
+def get_smiles(name: str) -> list:
     '''Queries PubChemPy for SMILES string for supplied molecule
 
     Args:
         name (str): name of the molecule (IUPAC, CAS, etc.)
 
     Returns:
-        str or None: if molecule found, returns first idenitifying SMILES,
-            else None
+        list: list of all SMILES strings representing the given molecule
     '''
 
     return [m.isomeric_smiles for m in get_compounds(name, 'name')]
 
 
-def smiles_to_descriptors(smiles_file, descriptors_csv, fingerprints=False):
+def smiles_to_descriptors(smiles_file: str, descriptors_csv: str,
+                          fingerprints: bool=False) -> list:
     '''Generates QSPR descriptors from supplied SMILES file using
     PaDEL-Descriptor
 
@@ -111,7 +111,7 @@ def smiles_to_descriptors(smiles_file, descriptors_csv, fingerprints=False):
         return [row for row in reader]
 
 
-def smiles_to_mdl(smiles_file, mdl_file):
+def smiles_to_mdl(smiles_file: str, mdl_file: str):
     '''Invoke Open Babel to generate an MDL file containing all supplied
     molecules; requires Open Babel to be installed externally
 
@@ -151,7 +151,8 @@ def smiles_to_mdl(smiles_file, mdl_file):
                 continue
 
 
-def mdl_to_descriptors(mdl_file, descriptors_csv, fingerprints=False):
+def mdl_to_descriptors(mdl_file: str, descriptors_csv: str,
+                       fingerprints: bool=False) -> list:
     '''Generates QSPR descriptors from supplied MDL file using
     PaDEL-Descriptor