Skip to content

Commit

Permalink
revert to last working state before i broke it
Browse files Browse the repository at this point in the history
  • Loading branch information
ilaflott committed Jul 22, 2024
1 parent 59e72b1 commit a82f589
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 70 deletions.
29 changes: 6 additions & 23 deletions .github/workflows/create-gfdl-catalog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,27 @@ on:
jobs:
catalog-upload:
runs-on: ubuntu-latest
container:
image: continuumio/miniconda3:latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: '3.10'

- name: Add conda to system path
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies
run: |
conda env create -f environment.yml --name catalogbuilder
- name: Make sample data
run: python tests/make_sample_data.py
- name: 'Generate catalog'
run: |
python tests/make_sample_data.py
- name: Generate catalog
with:
activate-environment: catalogbuilder
run: |
pip install .
python catalogbuilder/scripts/gen_intake_gfdl.py archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp gfdl_autotest
- name: Generate catalog with yaml
$CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/gen_intake_gfdl.py archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp gfdl_autotest
- name: 'Generate catalog with yaml'
run: |
pip install .
$CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/gen_intake_gfdl.py --config tests/test_config.yaml
$CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/gen_intake_gfdl.py --config tests/test_config.yaml
- name: upload-artifacts1
uses: actions/upload-artifact@v4
with:
Expand All @@ -55,17 +42,13 @@ jobs:
gfdl_autotest.json
cats/gfdl_autotest_from_yaml.csv
cats/gfdl_autotest_from_yaml.json
- name: Download all workflow run artifacts
uses: actions/download-artifact@v4

- name: Test with pytest
run: |
conda install pytest
$CONDA/envs/catalogbuilder/bin/pytest -v --runxfail
- name: Test for completeness
run: |
pip install .
run: |
$CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/test_catalog.py -tf gfdl_autotest.json catalogbuilder/cats/gfdl_template.json
$CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/test_catalog.py -tf catalogbuilder/cats/gfdl_autotest_from_yaml.json
2 changes: 1 addition & 1 deletion catalogbuilder/intakebuilder/CSVwriter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os.path
import csv
from csv import writer
from . import builderconfig, configparser
from intakebuilder import builderconfig, configparser

def getHeader(configyaml):
'''
Expand Down
2 changes: 1 addition & 1 deletion catalogbuilder/intakebuilder/getinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from csv import writer
import os
import xarray as xr
from . import builderconfig, configparser
from intakebuilder import builderconfig, configparser


'''
Expand Down
2 changes: 1 addition & 1 deletion catalogbuilder/intakebuilder/gfdlcrawler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from . import getinfo, builderconfig
from intakebuilder import getinfo, builderconfig
import sys
import re
import operator as op
Expand Down
24 changes: 15 additions & 9 deletions catalogbuilder/scripts/gen_intake_gfdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,23 @@
logger = logging.getLogger('local')
logger.setLevel(logging.INFO)

import catalogbuilder
try:
from catalogbuilder.intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser
except ModuleNotFoundError as exc:
raise Exception(f"import problems!!!") from exc
from intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser
except ModuleNotFoundError:
print("The module intakebuilder is not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")
print("Attempting again with adjusted sys.path ")
try:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
except:
print("Unable to adjust sys.path")
#print(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
from intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser
except ModuleNotFoundError:
sys.exit("The module 'intakebuilder' is still not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")

package_dir = os.path.dirname(os.path.abspath(__file__))

import catalogbuilder.cats
template_path = catalogbuilder.cats.__path__[0] + '/gfdl_template.json'
template_path = os.path.join(package_dir, '../cats/gfdl_template.json')

#Setting up argument parsing/flags
@click.command()
Expand All @@ -28,8 +35,7 @@
#,help='The directory path with the datasets to be cataloged. E.g a GFDL PP path till /pp')
@click.argument('output_path',required=False,nargs=1)
#,help='Specify output filename suffix only. e.g. catalog')
@click.option('--config',required=False,type=click.Path(exists=True),nargs=1,
help='Path to your yaml config, Use the config_template in intakebuilder repo')
@click.option('--config',required=False,type=click.Path(exists=True),nargs=1,help='Path to your yaml config, Use the config_template in intakebuilder repo')
@click.option('--filter_realm', nargs=1)
@click.option('--filter_freq', nargs=1)
@click.option('--filter_chunk', nargs=1)
Expand Down
16 changes: 5 additions & 11 deletions catalogbuilder/scripts/gen_intake_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,17 @@

def main():
#######INPUT HERE OR USE FROM A CONFIG FILE LATER######
#project_dir = "/Users/ar46/data_cmip6/CMIP6/" # DRS COMPLIANT PROJECT DIR
# project_dir = "/Users/ar46/data_cmip6/CMIP6/" # DRS COMPLIANT PROJECT DIR
project_dir = "/uda/CMIP6/"#
#CMIP/NOAA-GFDL/GFDL-ESM4/"
##"/Users/ar46/PycharmProjects/CatalogBuilder/intakebuilder/test/intake_local.csv"
csvfile = "/nbhome/a1r/intakebuilder_cats/intake_local.csv"
csvfile = "/nbhome/a1r/intakebuilder_cats/intake_local.csv" ##"/Users/ar46/PycharmProjects/CatalogBuilder/intakebuilder/test/intake_local.csv"
#######################################################

######### SEARCH FILTERS ###########################
dictFilter = {}
dictFilter["source_prefix"]= 'CMIP6/'
#CMIP/CMCC/CMCC-CM2-SR5' #'CMIP6/CMIP/'
#NOAA-GFDL/GFDL-CM4/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
#COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
#COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name
dictFilter["source_prefix"]= 'CMIP6/' #CMIP/CMCC/CMCC-CM2-SR5' #'CMIP6/CMIP/' #NOAA-GFDL/GFDL-CM4/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
#COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
#COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name
#########################################################

dictInfo = {}
project_dir = project_dir.rstrip("/")
logger.info("Calling localcrawler.crawlLocal")
Expand All @@ -37,6 +32,5 @@ def main():
CSVwriter.listdict_to_csv(list_files, headers, csvfile)
print("CSV generated at:", os.path.abspath(csvfile))
logger.info("CSV generated at"+ os.path.abspath(csvfile))

if __name__ == '__main__':
main()
4 changes: 2 additions & 2 deletions catalogbuilder/scripts/gen_intake_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def main():
######### SEARCH FILTERS ###########################
dictFilter = {}
dictFilter["source_prefix"]= 'CMIP6/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
#COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
#COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name
#COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
#COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name
#######################################################
project_bucket = project_root.split("/")[1].lstrip("/")
project_name = project_root.split("/")[2]
Expand Down
21 changes: 5 additions & 16 deletions catalogbuilder/scripts/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,14 @@
@click.command()
@click.argument('json_path', nargs = 1 , required = True)
@click.argument('json_template_path', nargs = 1 , required = False)
@click.option('-tf', '--test-failure', is_flag=True, default = False,
help="Errors are only printed. Program will not exit.")
@click.option('-tf', '--test-failure', is_flag=True, default = False, help="Errors are only printed. Program will not exit.")
def main(json_path,json_template_path,test_failure):

""" This test ensures catalogs generated by the Catalog Builder tool are minimally valid.
This means a few things: the generated catalog JSON file reflects the template it was
generated with, the catalog CSV has atleast one row of values (not headers), and each
required column exists without any empty values. If a test case is broken or expected to
fail, the --test-failure/-tf flag can be used. This flag will simply print errors
instead of doing a sys.exit.
""" This test ensures catalogs generated by the Catalog Builder tool are minimally valid. This means a few things: the generated catalog JSON file reflects the template it was generated with, the catalog CSV has atleast one row of values (not headers), and each required column exists without any empty values. If a test case is broken or expected to fail, the --test-failure/-tf flag can be used. This flag will simply print errors instead of doing a sys.exit.
JSON_PATH: Path to generated schema to be tested
JSON_PATH: Path to generated schema to be tested
JSON_TEMPLATE_PATH: Path of schema template. Without a given path, cats/gfdl_template.json
will be used for comparison
"""
JSON_TEMPLATE_PATH: Path of schema template. Without a given path, cats/gfdl_template.json will be used for comparison """

#Open JSON
j = json.load(open(json_path))
Expand Down Expand Up @@ -59,10 +51,7 @@ def main(json_path,json_template_path,test_failure):
errors = 0
for column in req:
if column not in catalog.columns:
print(f"The required column '{column}' does not exist in the csv. In other words, "
"there is some inconsistency between the json and the csv file. Please check "
"out info listed under aggregation_control and groupby_attrs in your json file"
" and verify if those columns show up in the csv as well." )
print(f"The required column '{column}' does not exist in the csv. In other words, there is some inconsistency between the json and the csv file. Please check out info listed under aggregation_control and groupby_attrs in your json file and verify if those columns show up in the csv as well.")
errors += 1

if column in catalog.columns:
Expand Down
24 changes: 18 additions & 6 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,23 @@ channels:
- conda-forge
- default
dependencies:
- python
- pytest
- click
- xarray
- conda
- python=3.7
- conda-env
- conda-build
- conda-verify
- _ipyw_jlab_nb_ext_conf
- anaconda==2020.02=py37_0
- anaconda-navigator
- navigator-updater
- gcsfs
- zarr
- cftime
- cartopy
- xgcm
- pandas
- xarray
- click
- intake-esm
- pyyaml
- jsondiff
- intake-esm
- boto3

0 comments on commit a82f589

Please sign in to comment.