From c81d6bfeed0a1887d35cc818b61fc8b5e5ad690c Mon Sep 17 00:00:00 2001
From: Jonathan Lin <jonathanlinnj@gmail.com>
Date: Tue, 14 May 2024 10:16:27 -0400
Subject: [PATCH 1/4] git attributes

---
 .gitattributes | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..9fb85ec
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,14 @@
+# Set the default behavior, in case people don't have core.autocrlf set.
+* text=auto
+
+# Explicitly declare text files you want to always be normalized and converted
+# to native line endings on checkout.
+*.c text
+*.h text
+
+# Declare files that will always have CRLF line endings on checkout.
+*.sln text eol=crlf
+
+# Denote all files that are truly binary and should not be modified.
+*.png binary
+*.jpg binary
\ No newline at end of file

From 07369c44e155c1758eb01dc4a398f5322998be1a Mon Sep 17 00:00:00 2001
From: Jonathan Lin <jonathanlinnj@gmail.com>
Date: Tue, 14 May 2024 10:23:04 -0400
Subject: [PATCH 2/4] include daily averaging of winds

---
 track/env_wind.py | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/track/env_wind.py b/track/env_wind.py
index bab82c4..695138a 100644
--- a/track/env_wind.py
+++ b/track/env_wind.py
@@ -5,7 +5,7 @@
 import xarray as xr
 
 import namelist
-from util import input
+from util import constants, input, sphere
 
 """
 Returns the name of the file containing environmental wind statistics.
@@ -51,18 +51,21 @@ def deep_layer_winds(env_wnds):
     u250 = env_wnds[:, var_names.index('ua250_Mean')]
     v250 = env_wnds[:, var_names.index('va250_Mean')]
     u850 = env_wnds[:, var_names.index('ua850_Mean')]
-    v850 = env_wnds[:, var_names.index('va850_Mean')]   
+    v850 = env_wnds[:, var_names.index('va850_Mean')]
     return (u250, v250, u850, v850)
 
 """
 Read the mean and covariance of the upper/lower level zonal and meridional winds.
 """
-def read_env_wnd_fn(fn_wnd_stat):
+def read_env_wnd_fn(fn_wnd_stat, dt_s = None, dt_e = None):
     var_Mean = wind_mean_vector_names()
     var_Var = wind_cov_matrix_names()
 
-    ds = xr.open_dataset(fn_wnd_stat)
-    wnd_Mean = [ds[x] for x in var_Mean] 
+    if dt_s is None:
+        ds = xr.open_dataset(fn_wnd_stat)
+    else:
+        ds = xr.open_dataset(fn_wnd_stat).sel(time = slice(dt_s, dt_e))
+    wnd_Mean = [ds[x] for x in var_Mean]
     wnd_Cov = [['' for i in range(len(var_Mean))] for j in range(len(var_Mean))]
     for i in range(len(var_Mean)):
         for j in range(len(var_Mean)):
@@ -88,7 +91,7 @@ def gen_wind_mean_cov():
     fns_va = input._glob_prefix(input.get_v_key())
 
     lazy_results = []
-    for i in range(len(fns_ua)):
+    for i in range(min(len(fns_ua), len(fns_va))):
         lazy_result = dask.delayed(wnd_stat_wrapper)((fns_ua[i], fns_va[i]))
         lazy_results.append(lazy_result)
     out = dask.compute(*lazy_results)
@@ -180,23 +183,34 @@ def calc_wnd_stat(ua, va, dt):
     else:
         p_upper = 25000; p_lower = 85000;
 
-    # TODO: Average over daily timescales.
-    ua250_month = ua.sel({input.get_lvl_key(): p_upper}).sel(time = month_mask)
-    va250_month = va.sel({input.get_lvl_key(): p_upper}).sel(time = month_mask)
-    ua850_month = ua.sel({input.get_lvl_key(): p_lower}).sel(time = month_mask)
-    va850_month = va.sel({input.get_lvl_key(): p_lower}).sel(time = month_mask)
+    # If time step is less than one day, group by day.
+    dt_step = (np.timedelta64(1, 'D') - (ua['time'][1] - ua['time'][0]).data) / np.timedelta64(1, 's')
+    if dt_step < 0:
+        ua_month = ua.sel(time = month_mask).groupby("time.day").mean(dim = 'time')
+        va_month = va.sel(time = month_mask).groupby("time.day").mean(dim = 'time')
+        t_unit = 'day'
+    else:
+        ua_month = ua.sel(time = month_mask)
+        va_month = va.sel(time = month_mask)
+        t_unit = 'time'
+
+    # Compute the daily averages
+    ua250_month = ua_month.sel({input.get_lvl_key(): p_upper})
+    va250_month = va_month.sel({input.get_lvl_key(): p_upper})
+    ua850_month = ua_month.sel({input.get_lvl_key(): p_lower})
+    va850_month = va_month.sel({input.get_lvl_key(): p_lower})
 
     month_wnds = [ua250_month, va250_month,
                   ua850_month, va850_month]
     month_mean_wnds = [0] * len(month_wnds)
     month_var_wnds = [[np.empty(0) for i in range(len(month_wnds))] for j in range(len(month_wnds))]
     for i in range(len(month_wnds)):
-        month_mean_wnds[i] = month_wnds[i].mean(dim = "time")
+        month_mean_wnds[i] = month_wnds[i].mean(dim = t_unit)
         for j in range(0, i+1):
             if i == j:
-                month_var_wnds[i][j] = month_wnds[i].var(dim = "time")
+                month_var_wnds[i][j] = month_wnds[i].var(dim = t_unit)
             else:
-                month_var_wnds[i][j] = xr.cov(month_wnds[i], month_wnds[j], dim = "time")
+                month_var_wnds[i][j] = xr.cov(month_wnds[i], month_wnds[j], dim = t_unit)
 
     wnd_vars = [[x for x in y if len(x) > 0] for y in month_var_wnds]
     stats = sum(wnd_vars, month_mean_wnds)

From b96c485d43ebc50970606a4cceadfb0e040a9395 Mon Sep 17 00:00:00 2001
From: Jonathan Lin <jonathanlinnj@gmail.com>
Date: Tue, 14 May 2024 10:24:20 -0400
Subject: [PATCH 3/4] midlevel now takes closest level (in pressure) to
 namelist.p_midlevel

---
 thermo/calc_thermo.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/thermo/calc_thermo.py b/thermo/calc_thermo.py
index e79b29d..33fa6c0 100644
--- a/thermo/calc_thermo.py
+++ b/thermo/calc_thermo.py
@@ -46,7 +46,7 @@ def compute_thermo(dt_start, dt_end):
         ta = ds_ta[input.get_temp_key()][i, :, :, :]
         hus = ds_hus[input.get_sp_hum_key()][i, :, :, :]
         lvl = ds_ta[input.get_lvl_key()]
-        lvl_d = ds_ta[input.get_lvl_key()].data
+        lvl_d = np.copy(ds_ta[input.get_lvl_key()].data)
 
         # Ensure lowest model level is first.
         # Here we assume the model levels are in pressure.
@@ -55,24 +55,31 @@ def compute_thermo(dt_start, dt_end):
             hus = hus.reindex({input.get_lvl_key(): lvl[::-1]})
             lvl_d = lvl_d[::-1]
     
-        p_midlevel = namelist.p_midlevel                # Pa
+        p_midlevel = namelist.p_midlevel                    # Pa
         if lvl.units in ['millibars', 'hPa']:
-            lvl_d *= 100
+            lvl_d *= 100                                    # needs to be in Pa
             p_midlevel = namelist.p_midlevel / 100          # hPa
+            lvl_mid = lvl.sel({input.get_lvl_key(): p_midlevel}, method = 'nearest')
 
         # TODO: Check units of psl, ta, and hus
         vmax_args = (sst_interp, psl.data, lvl_d, ta.data, hus.data)
         vmax[i, :, :] = thermo.CAPE_PI_vectorized(*vmax_args)
-        ta_midlevel = ta.sel({input.get_lvl_key(): p_midlevel}).data
-        hus_midlevel = hus.sel({input.get_lvl_key(): p_midlevel}).data
+        ta_midlevel = ta.sel({input.get_lvl_key(): p_midlevel}, method = 'nearest').data
+        hus_midlevel = hus.sel({input.get_lvl_key(): p_midlevel}, method = 'nearest').data
+
+        p_midlevel_Pa = float(lvl_mid) * 100 if lvl_mid.units in ['millibars', 'hPa'] else float(lvl_mid)
         chi_args = (sst_interp, psl.data, ta_midlevel,
-                    namelist.p_midlevel, hus_midlevel)
+                    p_midlevel_Pa, hus_midlevel)
         chi[i, :, :] = np.minimum(np.maximum(thermo.sat_deficit(*chi_args), 0), 10)
-        rh_mid[i, :, :] = thermo.conv_q_to_rh(ta_midlevel, hus_midlevel, namelist.p_midlevel)
+        rh_mid[i, :, :] = thermo.conv_q_to_rh(ta_midlevel, hus_midlevel, p_midlevel_Pa)
 
     return (vmax, chi, rh_mid)
 
 def gen_thermo():
+    # TODO: Assert all of the datasets have the same length in time.
+    if os.path.exists(get_fn_thermo()):
+        return
+
     # Load datasets metadata. Since SST is split into multiple files and can
     # cause parallel reads with open_mfdataset to hang, save as a single file.
     dt_start, dt_end = input.get_bounding_times()
@@ -83,14 +90,10 @@ def gen_thermo():
                    np.array([x for x in input.convert_to_datetime(ds, ds['time'].values)
                              if x >= ct_bounds[0] and x <= ct_bounds[1]]))
 
-    # TODO: Assert all of the datasets have the same length in time.
-    if os.path.exists(get_fn_thermo()):
-        return
-
     n_chunks = namelist.n_procs
-    chunks = np.array_split(ds_times, n_chunks)
+    chunks = np.array_split(ds_times, np.minimum(n_chunks, np.floor(len(ds_times) / 2)))
     lazy_results = []
-    for i in range(n_chunks):
+    for i in range(len(chunks)):
         lazy_result = dask.delayed(compute_thermo)(chunks[i][0], chunks[i][-1])
         lazy_results.append(lazy_result)
     out = dask.compute(*lazy_results, scheduler = 'processes', num_workers = n_chunks)

From dc471b045534a83c44b570b5be4e2bf5e2789d24 Mon Sep 17 00:00:00 2001
From: Jonathan Lin <jonathanlinnj@gmail.com>
Date: Tue, 14 May 2024 12:16:02 -0400
Subject: [PATCH 4/4] revert extra import

---
 track/env_wind.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/track/env_wind.py b/track/env_wind.py
index 695138a..3526945 100644
--- a/track/env_wind.py
+++ b/track/env_wind.py
@@ -5,7 +5,7 @@
 import xarray as xr
 
 import namelist
-from util import constants, input, sphere
+from util import input
 
 """
 Returns the name of the file containing environmental wind statistics.