Skip to content

Commit

Permalink
Merge pull request #162 from NREL/bnb/h5_with_era_training
Browse files Browse the repository at this point in the history
Bnb/h5 with era training
  • Loading branch information
bnb32 authored Aug 29, 2023
2 parents e1ae1b5 + a2f5399 commit fb000fa
Show file tree
Hide file tree
Showing 37 changed files with 8,435 additions and 5,278 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ nosetests.xml
coverage.xml
*.cover
.hypothesis/
*.png

# Translations
*.mo
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ matplotlib>=3.1
NREL-rex>=0.2.82
NREL-phygnn>=0.0.23
NREL-rev<0.8.0
NREL-gaps>=0.4.0
NREL-farms>=1.0.4
google-auth-oauthlib==0.5.3
pytest>=5.2
pillow
tensorflow>2.4
Expand All @@ -11,3 +13,4 @@ netCDF4==1.5.8
dask
sphinx
pandas
numpy==1.22
5 changes: 3 additions & 2 deletions sup3r/batch/batch.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# -*- coding: utf-8 -*-
"""sup3r batch utilities based on reV's batch module"""
from gaps.legacy import BatchJob as GapsBatchJob

from sup3r.pipeline.pipeline import Sup3rPipeline
from sup3r.pipeline.pipeline_cli import pipeline_monitor_background
from reV.batch.batch import BatchJob as RevBatchJob


class BatchJob(RevBatchJob):
class BatchJob(GapsBatchJob):
"""Framework for building a batched job suite."""

# Class attributes to set the software's pipeline class and run+monitor in
Expand Down
202 changes: 131 additions & 71 deletions sup3r/bias/bias_calc.py

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion sup3r/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
"""
Sup3r data pipeline architecture.
"""
from gaps.legacy import Status

from .pipeline import Sup3rPipeline
from reV.pipeline.status import Status
154 changes: 136 additions & 18 deletions sup3r/pipeline/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
@author: bnb32
"""
from reV.config.base_config import BaseConfig as RevBaseConfig
import os
from typing import ClassVar

from reV.config.base_analysis_config import AnalysisConfig
from reV.config.pipeline import PipelineConfig
from reV.utilities.exceptions import ConfigError
from reV.config.base_config import BaseConfig as RevBaseConfig
from reV.utilities.exceptions import ConfigError, PipelineError

from sup3r import SUP3R_DIR, TEST_DATA_DIR, CONFIG_DIR
from sup3r import CONFIG_DIR, SUP3R_DIR, TEST_DATA_DIR


class BaseConfig(RevBaseConfig):
Expand All @@ -20,9 +22,11 @@ class BaseConfig(RevBaseConfig):
REQUIREMENTS = ()
"""Required keys for config"""

STR_REP = {'SUP3R_DIR': SUP3R_DIR,
'CONFIG_DIR': CONFIG_DIR,
'TEST_DATA_DIR': TEST_DATA_DIR}
STR_REP: ClassVar[dict] = {
'SUP3R_DIR': SUP3R_DIR,
'CONFIG_DIR': CONFIG_DIR,
'TEST_DATA_DIR': TEST_DATA_DIR,
}
"""Mapping of config inputs (keys) to desired replacements (values) in
addition to relative file paths as demarcated by ./ and ../"""

Expand All @@ -40,13 +44,121 @@ class properties.
perform_str_rep : bool
Flag to perform string replacement for REVDIR, TESTDATADIR, and ./
"""
super().__init__(config, check_keys=check_keys,
perform_str_rep=perform_str_rep)
super().__init__(
config, check_keys=check_keys, perform_str_rep=perform_str_rep
)


class Sup3rPipelineConfig(PipelineConfig):
class Sup3rPipelineConfig(AnalysisConfig):
"""Sup3r pipeline configuration based on reV pipeline"""

def __init__(self, config):
"""
Parameters
----------
config : str | dict
File path to config json (str), serialized json object (str),
or dictionary with pre-extracted config.
"""

super().__init__(config, run_preflight=False)
self._check_pipeline()
self._parse_dirout()
self._check_dirout_status()

def _check_pipeline(self):
"""Check pipeline steps input. ConfigError if bad input."""

if 'pipeline' not in self:
raise ConfigError(
'Could not find required key "pipeline" in the '
'pipeline config.'
)

if not isinstance(self.pipeline, list):
raise ConfigError(
'Config arg "pipeline" must be a list of '
'(command, f_config) pairs, but received "{}".'.format(
type(self.pipeline)
)
)

for di in self.pipeline:
for f_config in di.values():
if not os.path.exists(f_config):
raise ConfigError(
'Pipeline step depends on non-existent '
'file: {}'.format(f_config)
)

def _check_dirout_status(self):
"""Check unique status file in dirout."""

if os.path.exists(self.dirout):
for fname in os.listdir(self.dirout):
if fname.endswith(
'_status.json'
) and fname != '{}_status.json'.format(self.name):
msg = (
'Cannot run pipeline "{}" in directory '
'{}. Another pipeline appears to have '
'been run here with status json: {}'.format(
self.name, self.dirout, fname
)
)
raise PipelineError(msg)

@property
def pipeline(self):
"""Get the pipeline steps.
Returns
-------
pipeline : list
reV pipeline run steps. Should be a list of (command, config)
pairs.
"""

return self['pipeline']

@property
def logging(self):
"""Get logging kwargs for the pipeline.
Returns
-------
dict
"""
return self.get('logging', {"log_file": None, "log_level": "INFO"})

@property
def hardware(self):
"""Get argument specifying which hardware the pipeline is being run on.
Defaults to "eagle" (most common use of the reV pipeline)
Returns
-------
hardware : str
Name of hardware that this pipeline is being run on.
Defaults to "eagle".
"""
return self.get('hardware', 'eagle')

@property
def status_file(self):
"""Get status file path.
Returns
-------
_status_file : str
reV status file path.
"""
if self.dirout is None:
raise ConfigError('Pipeline has not yet been initialized.')

return os.path.join(self.dirout, '{}_status.json'.format(self.name))

# pylint: disable=W0201
def _parse_dirout(self):
"""Parse pipeline steps for common dirout and unique job names."""
Expand All @@ -55,21 +167,27 @@ def _parse_dirout(self):
names = []
for di in self.pipeline:
for f_config in di.values():
config = AnalysisConfig(f_config, check_keys=False,
run_preflight=False)
config = AnalysisConfig(
f_config, check_keys=False, run_preflight=False
)
dirouts.append(config.dirout)

if 'name' in config:
names.append(config.name)

if len(set(dirouts)) != 1:
raise ConfigError('Pipeline steps must have a common output '
'directory but received {} different '
'directories.'.format(len(set(dirouts))))
raise ConfigError(
'Pipeline steps must have a common output '
'directory but received {} different '
'directories.'.format(len(set(dirouts)))
)
else:
self._dirout = dirouts[0]

if len(set(names)) != len(names):
raise ConfigError('Pipeline steps must have a unique job names '
'directory but received {} duplicate names.'
.format(len(names) - len(set(names))))
raise ConfigError(
'Pipeline steps must have a unique job names '
'directory but received {} duplicate names.'.format(
len(names) - len(set(names))
)
)
Loading

0 comments on commit fb000fa

Please sign in to comment.