-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #30 from ECRL/dev
Documentation Additions
- Loading branch information
Showing
7 changed files
with
236 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,3 +8,4 @@ ECNet Documentation | |
usage/installation | ||
usage/quickstart | ||
usage/tools | ||
usage/examples |
Binary file modified
BIN
+695 Bytes
(100%)
docs/tutorials/Getting Started/Getting Started with Predicting Fuel Properties.pdf
Binary file not shown.
47 changes: 47 additions & 0 deletions
47
docs/tutorials/Getting Started/scripts/create_parity_plot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from ecnet import Server | ||
from ecnet.utils.logging import logger | ||
from ecnet.tools.plotting import ParityPlot | ||
|
||
|
||
def main(): | ||
|
||
logger.stream_level = 'info' | ||
sv = Server(prj_file='kinetic_viscosity.prj') | ||
|
||
train_exp = [] | ||
train_exp.extend(y for y in sv._sets.learn_y) | ||
train_exp.extend(y for y in sv._sets.valid_y) | ||
train_pred = sv.use(dset='train') | ||
train_errors = sv.errors('rmse', 'r2', dset='train') | ||
|
||
test_exp = sv._sets.test_y | ||
test_pred = sv.use(dset='test') | ||
test_errors = sv.errors('rmse', 'r2', dset='test') | ||
|
||
kv_plot = ParityPlot( | ||
title='Predicted vs. Experimental Kinematic Viscosity', | ||
x_label='Experimental KV', | ||
y_label='Predicted KV' | ||
) | ||
kv_plot.add_series( | ||
train_exp, | ||
train_pred, | ||
name='Training Set', | ||
color='blue' | ||
) | ||
kv_plot.add_series( | ||
test_exp, | ||
test_pred, | ||
name='Test Set', | ||
color='red' | ||
) | ||
kv_plot.add_error_bars(test_errors['rmse'], label='Test RMSE') | ||
kv_plot._add_label('Test R-Squared', test_errors['r2']) | ||
kv_plot._add_label('Train RMSE', train_errors['rmse']) | ||
kv_plot._add_label('Train R-Squared', train_errors['r2']) | ||
kv_plot.save('../kv_parity_plot.png') | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# Example Scripts | ||
|
||
## SMILES String Validation | ||
|
||
SMILES strings are the basis for QSPR descriptor generation, and therefore play an immense role in what neural networks learn (and how they correlate QSPR descriptors to given fuel properties). It is paramount that SMILES strings for molecules are correct to ensure neural networks learn from correct molecule representations. | ||
|
||
To validate SMILES strings for molecules stored in an ECNet-formatted database, we can use the script below to query PubChem using molecule names. The "validate_smiles" function accepts two arguments, the database you wish to validate and the filename of the resulting validated database. Note that QSPR descriptors in the resulting database do not reflect changes made to SMILES strings, and you will need to create a new database using our [database construction tool](https://ecnet.readthedocs.io/en/latest/usage/tools.html#database-creation) to generate new descriptors. | ||
|
||
```python | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Developed in 2019 by Travis Kessler <[email protected]> | ||
# | ||
# Example script for validating ECNet-formatted database SMILES strings | ||
# | ||
|
||
from ecnet.utils.data_utils import DataFrame | ||
from ecnet.tools.database import get_smiles | ||
from ecnet.utils.logging import logger | ||
|
||
|
||
def validate_smiles(db_name, new_db): | ||
|
||
# load the database | ||
logger.log('info', 'Loading data from {}'.format(db_name)) | ||
df = DataFrame(db_name) | ||
|
||
# find index of `Compound Name` string | ||
name_idx = -1 | ||
for idx, name in enumerate(df.string_names): | ||
if name == 'Compound Name': | ||
name_idx = idx | ||
break | ||
if name_idx == -1: | ||
logger.log('error', '`Compound Name` string not found in database') | ||
return | ||
|
||
# find index of `SMILES` string | ||
smiles_idx = -1 | ||
for idx, name in enumerate(df.string_names): | ||
if name == 'SMILES': | ||
smiles_idx = idx | ||
break | ||
if smiles_idx == -1: | ||
logger.log('error', '`SMILES` string not found in database') | ||
|
||
# check each molecule's SMILES, replace if incorrect | ||
for pt in df.data_points: | ||
smiles = get_smiles(pt.strings[name_idx]) | ||
if smiles == '': | ||
logger.log('warn', '{} not found on PubChem'.format( | ||
pt.strings[name_idx] | ||
)) | ||
continue | ||
else: | ||
if smiles != pt.strings[smiles_idx]: | ||
logger.log( | ||
'crit', | ||
'Incorrect SMILES for {}:\n\tDatabase SMILES: {}' | ||
'\n\tPubChem SMILES: {}'.format( | ||
pt.strings[name_idx], | ||
pt.strings[smiles_idx], | ||
smiles | ||
)) | ||
pt.strings[smiles_idx] = smiles | ||
else: | ||
logger.log('info', 'Correct SMILES for {}'.format( | ||
pt.strings[name_idx] | ||
)) | ||
|
||
# save the validated database | ||
logger.log('info', 'Saving validated data to {}'.format(new_db)) | ||
df.save(new_db) | ||
return | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
# initialize logging | ||
logger.stream_level = 'info' | ||
# un-comment this for file logging | ||
# logger.file_level = 'info' | ||
|
||
validate_smiles('unvalidated_db.csv', 'validated_db.csv') | ||
|
||
``` |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters