revert to last working state before i broke it

NOAA-GFDL · Jul 22, 2024 · a82f589 · a82f589
1 parent 59e72b1
commit a82f589
Show file tree

Hide file tree

Showing 9 changed files with 54 additions and 70 deletions.
diff --git a/.github/workflows/create-gfdl-catalog.yml b/.github/workflows/create-gfdl-catalog.yml
@@ -12,40 +12,27 @@ on:
 jobs:
   catalog-upload:
     runs-on: ubuntu-latest
-    container:
-      image: continuumio/miniconda3:latest    
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python 3.10
       uses: actions/setup-python@v3
       with:
         python-version: '3.10'
-
     - name: Add conda to system path
       run: |
         # $CONDA is an environment variable pointing to the root of the miniconda directory
         echo $CONDA/bin >> $GITHUB_PATH
-        
     - name: Install dependencies
       run: |
         conda env create -f environment.yml --name catalogbuilder
-        
     - name: Make sample data
+      run: python tests/make_sample_data.py
+    - name: 'Generate catalog'
       run: |
-        python tests/make_sample_data.py
-      
-    - name: Generate catalog
-      with:
-        activate-environment: catalogbuilder
-      run: |
-        pip install .
-        python catalogbuilder/scripts/gen_intake_gfdl.py archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp gfdl_autotest
-        
-    - name: Generate catalog with yaml
+        $CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/gen_intake_gfdl.py archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp gfdl_autotest
+    - name: 'Generate catalog with yaml'
       run: |
-        pip install .
-        $CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/gen_intake_gfdl.py --config tests/test_config.yaml
-        
+         $CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/gen_intake_gfdl.py --config tests/test_config.yaml
     - name: upload-artifacts1
       uses: actions/upload-artifact@v4
       with:
@@ -55,17 +42,13 @@ jobs:
             gfdl_autotest.json
             cats/gfdl_autotest_from_yaml.csv
             cats/gfdl_autotest_from_yaml.json
-            
     - name: Download all workflow run artifacts
       uses: actions/download-artifact@v4
-
     - name: Test with pytest
       run: |
         conda install pytest
         $CONDA/envs/catalogbuilder/bin/pytest -v --runxfail
-        
     - name: Test for completeness
-      run: |
-        pip install .
+      run: | 
         $CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/test_catalog.py -tf gfdl_autotest.json catalogbuilder/cats/gfdl_template.json
         $CONDA/envs/catalogbuilder/bin/python catalogbuilder/scripts/test_catalog.py -tf catalogbuilder/cats/gfdl_autotest_from_yaml.json 
diff --git a/catalogbuilder/intakebuilder/CSVwriter.py b/catalogbuilder/intakebuilder/CSVwriter.py
@@ -1,7 +1,7 @@
 import os.path
 import csv
 from csv import writer
-from . import builderconfig, configparser 
+from intakebuilder import builderconfig, configparser 
 
 def getHeader(configyaml):
     '''

diff --git a/catalogbuilder/intakebuilder/getinfo.py b/catalogbuilder/intakebuilder/getinfo.py
@@ -4,7 +4,7 @@
 from csv import writer
 import os
 import xarray as xr
-from . import builderconfig, configparser 
+from intakebuilder import builderconfig, configparser 
 
 
 '''

diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py
@@ -1,5 +1,5 @@
 import os
-from . import getinfo, builderconfig
+from intakebuilder import getinfo, builderconfig
 import sys
 import re
 import operator as op

diff --git a/catalogbuilder/scripts/gen_intake_gfdl.py b/catalogbuilder/scripts/gen_intake_gfdl.py
@@ -10,16 +10,23 @@
 logger = logging.getLogger('local')
 logger.setLevel(logging.INFO)
 
-import catalogbuilder
 try:
-   from catalogbuilder.intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser
-except ModuleNotFoundError as exc:
-   raise Exception(f"import problems!!!") from exc
+   from intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser
+except ModuleNotFoundError:
+    print("The module intakebuilder is not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")
+    print("Attempting again with adjusted sys.path ")
+    try:
+       sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    except:
+       print("Unable to adjust sys.path")
+    #print(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    try:
+        from intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser
+    except ModuleNotFoundError:
+        sys.exit("The module 'intakebuilder' is still not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")
 
 package_dir = os.path.dirname(os.path.abspath(__file__))
-
-import catalogbuilder.cats
-template_path = catalogbuilder.cats.__path__[0] + '/gfdl_template.json'
+template_path = os.path.join(package_dir, '../cats/gfdl_template.json')
 
 #Setting up argument parsing/flags
 @click.command()
@@ -28,8 +35,7 @@
 #,help='The directory path with the datasets to be cataloged. E.g a GFDL PP path till /pp')
 @click.argument('output_path',required=False,nargs=1)
 #,help='Specify output filename suffix only. e.g. catalog')
-@click.option('--config',required=False,type=click.Path(exists=True),nargs=1,
-              help='Path to your yaml config, Use the config_template in intakebuilder repo')
+@click.option('--config',required=False,type=click.Path(exists=True),nargs=1,help='Path to your yaml config, Use the config_template in intakebuilder repo')
 @click.option('--filter_realm', nargs=1)
 @click.option('--filter_freq', nargs=1)
 @click.option('--filter_chunk', nargs=1)

diff --git a/catalogbuilder/scripts/gen_intake_local.py b/catalogbuilder/scripts/gen_intake_local.py
@@ -10,22 +10,17 @@
 
 def main():
     #######INPUT HERE OR USE FROM A CONFIG FILE LATER######
-    #project_dir = "/Users/ar46/data_cmip6/CMIP6/"  # DRS COMPLIANT PROJECT DIR
+#   project_dir = "/Users/ar46/data_cmip6/CMIP6/"  # DRS COMPLIANT PROJECT DIR
     project_dir = "/uda/CMIP6/"#
     #CMIP/NOAA-GFDL/GFDL-ESM4/"
-    ##"/Users/ar46/PycharmProjects/CatalogBuilder/intakebuilder/test/intake_local.csv"
-    csvfile = "/nbhome/a1r/intakebuilder_cats/intake_local.csv" 
+    csvfile = "/nbhome/a1r/intakebuilder_cats/intake_local.csv" ##"/Users/ar46/PycharmProjects/CatalogBuilder/intakebuilder/test/intake_local.csv"
     #######################################################
-
     ######### SEARCH FILTERS ###########################
     dictFilter = {}
-    dictFilter["source_prefix"]= 'CMIP6/'
-    #CMIP/CMCC/CMCC-CM2-SR5' #'CMIP6/CMIP/'
-    #NOAA-GFDL/GFDL-CM4/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
-    #COMMENT  dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
-    #COMMENT dictFilter["varname"] = "tas"   #Remove this if you don't want to filter by variable name
+    dictFilter["source_prefix"]= 'CMIP6/' #CMIP/CMCC/CMCC-CM2-SR5' #'CMIP6/CMIP/' #NOAA-GFDL/GFDL-CM4/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
+   #COMMENT  dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
+   #COMMENT dictFilter["varname"] = "tas"   #Remove this if you don't want to filter by variable name
     #########################################################
-
     dictInfo = {}
     project_dir = project_dir.rstrip("/")
     logger.info("Calling localcrawler.crawlLocal") 
@@ -37,6 +32,5 @@ def main():
     CSVwriter.listdict_to_csv(list_files, headers, csvfile)
     print("CSV generated at:", os.path.abspath(csvfile))
     logger.info("CSV generated at"+ os.path.abspath(csvfile))
-
 if __name__ == '__main__':
     main()
diff --git a/catalogbuilder/scripts/gen_intake_s3.py b/catalogbuilder/scripts/gen_intake_s3.py
@@ -15,8 +15,8 @@ def main():
     ######### SEARCH FILTERS ###########################
     dictFilter = {}
     dictFilter["source_prefix"]= 'CMIP6/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
-    #COMMENT  dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
-    #COMMENT dictFilter["varname"] = "tas"   #Remove this if you don't want to filter by variable name
+   #COMMENT  dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
+   #COMMENT dictFilter["varname"] = "tas"   #Remove this if you don't want to filter by variable name
     #######################################################
     project_bucket = project_root.split("/")[1].lstrip("/")
     project_name = project_root.split("/")[2]

diff --git a/catalogbuilder/scripts/test_catalog.py b/catalogbuilder/scripts/test_catalog.py
@@ -9,22 +9,14 @@
 @click.command()
 @click.argument('json_path', nargs = 1 , required = True)
 @click.argument('json_template_path', nargs = 1 , required = False)
-@click.option('-tf', '--test-failure', is_flag=True, default = False,
-              help="Errors are only printed. Program will not exit.")
+@click.option('-tf', '--test-failure', is_flag=True, default = False, help="Errors are only printed. Program will not exit.")
 def main(json_path,json_template_path,test_failure):
 
-    """ This test ensures catalogs generated by the Catalog Builder tool are minimally valid. 
-    This means a few things: the generated catalog JSON file reflects the template it was 
-    generated with, the catalog CSV has atleast one row of values (not headers), and each 
-    required column exists without any empty values. If a test case is broken or expected to 
-    fail, the --test-failure/-tf flag can be used. This flag will simply print errors 
-    instead of doing a sys.exit. 
+    """ This test ensures catalogs generated by the Catalog Builder tool are minimally valid. This means a few things: the generated catalog JSON file reflects the template it was generated with, the catalog CSV has atleast one row of values (not headers), and each required column exists without any empty values. If a test case is broken or expected to fail, the --test-failure/-tf flag can be used. This flag will simply print errors instead of doing a sys.exit. 
 
-      JSON_PATH: Path to generated schema to be tested 
+     JSON_PATH: Path to generated schema to be tested 
     
-      JSON_TEMPLATE_PATH: Path of schema template. Without a given path, cats/gfdl_template.json 
-      will be used for comparison 
-    """
+     JSON_TEMPLATE_PATH: Path of schema template. Without a given path, cats/gfdl_template.json will be used for comparison """
 
     #Open JSON
     j = json.load(open(json_path))
@@ -59,10 +51,7 @@ def main(json_path,json_template_path,test_failure):
     errors = 0
     for column in req:
         if column not in catalog.columns:
-            print(f"The required column '{column}' does not exist in the csv. In other words, "
-                   "there is some inconsistency between the json and the csv file. Please check "
-                   "out info listed under aggregation_control and groupby_attrs in your json file"
-                   " and verify if those columns show up in the csv as well." )
+            print(f"The required column '{column}' does not exist in the csv. In other words, there is some inconsistency between the json and the csv file. Please check out info listed under aggregation_control and groupby_attrs in your json file and verify if those columns show up in the csv as well.")
             errors += 1
 
         if column in catalog.columns:

diff --git a/environment.yml b/environment.yml
@@ -3,11 +3,23 @@ channels:
   - conda-forge 
   - default
 dependencies:
-  - python
-  - pytest
-  - click
-  - xarray
+  - conda
+  - python=3.7
+  - conda-env
+  - conda-build
+  - conda-verify
+  - _ipyw_jlab_nb_ext_conf
+  - anaconda==2020.02=py37_0
+  - anaconda-navigator
+  - navigator-updater
+  - gcsfs
+  - zarr
+  - cftime
+  - cartopy
+  - xgcm
   - pandas
+  - xarray
+  - click
+  - intake-esm 
+  - pyyaml
   - jsondiff
-  - intake-esm
-  - boto3