replaced catalog CSV w/ parquet file, added dap/vrt_summary() functio…

…ns which print out data summaries when verbose is True, and updated Netrc and Dodsrc files for accessing earth data sources
anguswg-ucsb · Aug 11, 2023 · 652571b · 652571b
1 parent 3dfa6ef
commit 652571b
Show file tree

Hide file tree

Showing 9 changed files with 799 additions and 108,042 deletions.
diff --git a/climatePy/__init__.py b/climatePy/__init__.py
@@ -1,5 +1,9 @@
 # __init__.py
 import pandas as pd
+import pyarrow
+# import requests
+# from io import BytesIO
+
 import pkg_resources
 
 # warnings lib
@@ -9,11 +13,64 @@
 warnings.filterwarnings('ignore', category=Warning)
 
 def params():
-    data_file = pkg_resources.resource_filename('climatePy', 'data/catalog.csv')
+    # data_file = pkg_resources.resource_filename('climatePy', 'data/catalog.csv')
     # data_file = pkg_resources.resource_filename('src', 'data/catalog.csv')
-    data = pd.read_csv(data_file, low_memory=False)
+    # data = pd.read_csv(data_file, low_memory=False)
+
+    data_file = pkg_resources.resource_filename('climatePy', 'data/catalog.parquet')
+    data = pd.read_parquet(data_file)
+
     return data
 
+# # try and get up to date catalog from GitHub, otherwise use local catalog file
+# def params():
+#     url = 'https://github.com/mikejohnson51/climateR-catalogs/releases/latest/download/catalog.parquet'
+#     cat = None
+#     try:
+#         cat = pd.read_parquet(url)
+#     except Exception:
+#         url = pkg_resources.resource_filename('climatePy', 'data/catalog.parquet')
+#         cat = pd.read_parquet(url)
+#     return cat
+
+# try and get up to date catalog from GitHub
+# def params():
+#     def read_live_catalog(url='https://github.com/mikejohnson51/climateR-catalogs/releases/latest/download/catalog.parquet'):
+
+#         try:
+#             # try to fetch the live catalog
+#             response = requests.get(url)
+
+#             # raise exceptions for 4xx and 5xx status codes
+#             response.raise_for_status()
+
+#             # read the parquet data
+#             cat = BytesIO(response.content)
+
+#             # read the parquet data
+#             cat = pd.read_parquet(cat)
+
+#             return cat
+
+#         except requests.exceptions.RequestException as e:
+#             print("Error fetching the live catalog:\n", e)
+
+#             return None
+
+#     # try to fetch the live catalog, but use the local dataset if error happens (cat returns None if error is thrown)
+#     cat = read_live_catalog()
+
+#     # if cat returns None
+#     if cat is None:
+#         print("Falling back to local catalog...")
+
+#         cat = pkg_resources.resource_filename('climatePy', 'data/catalog.csv')
+#         cat = pd.read_csv(cat, low_memory=False)
+#         # data_file = pkg_resources.resource_filename('climatePy', 'data/catalog.csv')
+#         # cat = pd.read_csv(data_file, low_memory=False)
+
+#     return cat
+
 from ._climatepy_filter import climatepy_filter
 from ._dap import dap, dap_crop, dap_get
 from ._shortcuts import getTerraClim, getTerraClimNormals, getGridMET, getMACA, \