Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include the Marine heatwave calculation feature #40

Merged
merged 15 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 12 additions & 19 deletions mom6/mom6_module/deprecated/mom6_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,19 @@
import xarray as xr
from scipy.stats import norm as normal
from mom6 import DATA_PATH
from mom6.mom6_module.mom6_types import RegionalOptions,GridOptions,DataTypeOptions,DataSourceOptions

warnings.simplefilter("ignore")
xr.set_options(keep_attrs=True)


# typing
RegionalOptions = Literal[
'MAB','GOM','SS','GB','SS_LME','NEUS_LME','SEUS_LME',
'GOMEX','GSL','NGOMEX','SGOMEX','Antilles','Floridian'
]



class OpenDapStore:
"""class to handle the OPeNDAP request
"""
def __init__(
self,
grid : Literal['raw','regrid'] = 'raw',
data_type : Literal['forecast','historical'] = 'historical'
grid : GridOptions = 'raw',
data_type : DataTypeOptions = 'historical'
) -> None:
"""
input for the class to get the opendap data
Expand Down Expand Up @@ -136,8 +129,8 @@ def __init__(
iyear : int,
imonth : int,
var : str,
grid : Literal['raw','regrid'] = 'raw',
source : Literal['local','opendap'] = 'local'
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local'
) -> None:
"""
input for the class to get the individual forecast
Expand Down Expand Up @@ -222,8 +215,8 @@ def get_mom6(self) -> xr.Dataset:
@staticmethod
def get_mom6_all(
var : str,
grid : Literal['raw','regrid'] = 'raw',
source : Literal['local','opendap'] = 'local'
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local'
) -> xr.Dataset:
"""
Return the mom6 all rawgrid/regridded hindcast/forecast field
Expand Down Expand Up @@ -586,8 +579,8 @@ def __init__(
year : int,
month : int,
day : int = 1,
grid : Literal['raw','regrid'] = 'raw',
source : Literal['local','opendap'] = 'local'
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local'
) -> None:
"""
input for getting the historical run data
Expand Down Expand Up @@ -672,8 +665,8 @@ def get_mom6(self) -> xr.Dataset:
@staticmethod
def get_mom6_all(
var : str,
grid : Literal['raw','regrid'] = 'raw',
source : Literal['local','opendap'] = 'local'
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local'
) -> xr.Dataset:
"""
Return the mom6 all rawgrid/regridded historical run field
Expand Down Expand Up @@ -774,7 +767,7 @@ def get_mom6_grid() -> xr.Dataset:
@staticmethod
def get_mom6_mask(
mask : Literal['wet','wet_c','wet_u','wet_v'] = 'wet',
grid : Literal['raw','regrid'] = 'raw'
grid : GridOptions = 'raw'
) -> xr.DataArray:
"""
The function is designed to export the various mask provided
Expand Down
22 changes: 8 additions & 14 deletions mom6/mom6_module/mom6_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,21 @@
import numpy as np
import xarray as xr
from mom6 import DATA_PATH
from mom6.mom6_module.mom6_types import ModelRegionOptions,GridOptions,DataTypeOptions,DataSourceOptions

warnings.simplefilter("ignore")
xr.set_options(keep_attrs=True)


# typing
RegionalOptions = Literal[
'MAB','GOM','SS','GB','SS_LME','NEUS_LME','SEUS_LME',
'GOMEX','GSL','NGOMEX','SGOMEX','Antilles','Floridian'
]



class OpenDapStore:
"""class to handle the OPeNDAP request
"""
def __init__(
self,
region : Literal['northwest_atlantic'] = 'northwest_atlantic',
grid : Literal['raw','regrid'] = 'raw',
data_type : Literal['forecast','historical'] = 'historical'
region : ModelRegionOptions = 'northwest_atlantic',
grid : GridOptions = 'raw',
data_type : DataTypeOptions = 'historical'
) -> None:
"""
input for the class to get the opendap data
Expand Down Expand Up @@ -136,8 +130,8 @@ def __init__(
data_relative_dir : str = None,
static_relative_dir : str = None,
tercile_relative_dir : str = None,
grid : Literal['raw','regrid'] = 'raw',
source : Literal['local','opendap'] = 'local',
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local',
) -> None:
"""
input for the class to get the forecast data
Expand Down Expand Up @@ -497,8 +491,8 @@ def __init__(
var : str,
data_relative_dir : str = None,
static_relative_dir : str = None,
grid : Literal['raw','regrid'] = 'raw',
source : Literal['local','opendap'] = 'local',
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local',
) -> None:
"""
input for getting the historical run data
Expand Down
242 changes: 242 additions & 0 deletions mom6/mom6_module/mom6_mhw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#!/usr/bin/env python

"""

This is the script to generate the marine heatwave analysis
including the statistic of climatology and percentile threshold
based on the forecast(reforecast) and historical run
that is generated by Andrew Ross at GFDL.

"""
import warnings
import xarray as xr
from mom6.mom6_module.mom6_statistics import (
ForecastClimatology,
ForecastQuantile,
CoordinateWrangle
)
from mom6.mom6_module.mom6_types import (
TimeGroupByOptions
)

warnings.simplefilter("ignore")
xr.set_options(keep_attrs=True)


class MarineHeatwaveForecast:
"""
Class for calculating the marine heatwave related
statistics and the resulted identified event
and the probability with the use of prob forecast
provided by the forecast/reforecast

The method should be able to accomadate the
raw and regridded format
"""

def __init__(
self,
ds_data : xr.Dataset,
sst_name : str = 'tos',
initialization_name : str = 'init',
member_name : str = 'member',
time_frequency : TimeGroupByOptions = 'month'
) -> None:
"""
Parameters
----------
ds_data : xr.Dataset
The sea surface temperature dataset one want to use to
derived the marine heatwave. The coordinate
must have the name "lon" and "lat" exactly (raw grid
geolon and geolat need to be renamed).
sst_name : str
The sea surface temperature variable name in the dataset
initialization_name : str, optional
initialization dimension name, by default 'init'
member_name : str, optional
ensemble member dimension name, by default 'member'
time_frequency : TimeGroupByOptions, optional
name in time frequency to do the time group, by default 'month'
'year', 'month', 'dayofyear' are the available options.
"""

self.dataset = CoordinateWrangle(ds_data).to_360()
self.varname = sst_name
self.init = initialization_name
self.mem = member_name
self.tfreq = time_frequency

def generate_forecast_batch(
self,
climo_start_year : int = 1993,
climo_end_year : int = 2020,
anom_start_year : int = 1993,
anom_end_year : int = 2020,
quantile_threshold : float = 90.
) -> xr.Dataset:
"""generate the MHW statistics and identify MHW

Parameters
----------
climo_start_year : int, optional
start year of climatology and threshold period, by default 1993
climo_end_year : int, optional
end year of climatology and threshold period, by default 2020
anom_start_year : int, optional
start year of anomaly that need to identify MHW, by default 1993
anom_end_year : int, optional
end year of anomaly that need to identify MHW, by default 2020
quantile_threshold : float, optional
quantile value that define the threshold, by default 90.

Returns
-------
xr.Dataset
The dataset including MHW probability, Magnitude, threshold.
"""

# calculate anomaly based on climatology
class_forecast_climo = ForecastClimatology(self.dataset,self.varname)
dict_anom = class_forecast_climo.generate_anom_batch(
climo_start_year,
climo_end_year,
climo_start_year, # force the anom start year for threshold be the same as climo period
climo_end_year, # force the anom end year for threshold be the same as climo period
'persist'
)

# anomaly used for the threshold
ds_anom = xr.Dataset()
ds_anom[f'{self.varname}_anom'] = dict_anom['anomaly']
ds_anom['lon'] = self.dataset['lon']
ds_anom['lat'] = self.dataset['lat']

# calculate threshold
class_forecast_quantile = ForecastQuantile(ds_anom,f'{self.varname}_anom')
### in memery result not lazy-loaded (same as climo period)
da_threshold = class_forecast_quantile.generate_quantile(
climo_start_year,
climo_end_year,
quantile_threshold
)

# anomaly that need to find MHW
dict_anom = class_forecast_climo.generate_anom_batch(
climo_start_year,
climo_end_year,
anom_start_year,
anom_end_year,
'persist',
precompute_climo = True,
da_climo = dict_anom['climatology']
)
da_anom = dict_anom['anomaly']

# calculate average mhw magnitude
da_mhw_mag = da_anom.where(da_anom.groupby(f'{self.init}.{self.tfreq}')>=da_threshold)
da_mhw_mag_ave = da_anom.mean(dim=f'{self.mem}').compute()
da_mhw_mag_ave.attrs['mhw_magnitude_definition'] = 'ensemble mean of all sst anomaly'

# calculate mhw event
da_mhw = (
da_mhw_mag
.where(da_mhw_mag.isnull(),other=1)
.sum(dim=f'{self.mem}',skipna=True)
)

# calculate probability
da_mask = da_anom.where(da_anom.isnull(),other=1.)
da_event = da_mask.sum(dim=f'{self.mem}')
da_prob = (da_mhw/da_event).compute()

# output dataset
ds_mhw = xr.Dataset()
ds_mhw[f'{self.varname}_threshold{quantile_threshold:02d}'] = da_threshold
ds_mhw[f'mhw_prob{quantile_threshold:02d}'] = da_prob
ds_mhw['ssta_avg'] = da_mhw_mag_ave
ds_mhw['mhw_magnitude_indentified_ens'] = da_mhw_mag
ds_mhw.attrs['period_of_quantile'] = da_threshold.attrs['period_of_quantile']
ds_mhw.attrs['period_of_climatology'] = da_threshold.attrs['period_of_climatology']

return ds_mhw

def generate_forecast_single(
self,
init_time : str = '2022-03',
da_climo : xr.DataArray = None,
da_threshold : xr.DataArray = None
) -> xr.Dataset:
"""generate MHW forecast for single initialization
based on existing stats (climatology and threshold)

Parameters
----------
init_time : str, optional
the signal initialization time in the format of YYYY-MM, by default '2022-03'
da_climo : xr.DataArray, optional
the dataarray containing the climatology data, by default None
da_threshold : xr.DataArray, optional
the dataarray containing the threshold data, by default None

Returns
-------
xr.Dataset
The dataset including MHW probability, Magnitude, threshold.
"""

# crop data
da_data = self.dataset[self.varname].sel(
{self.init :init_time}
).load()

# test if the da_data crop period exist
if len(da_data[self.init].data) == 0:
raise ValueError(
"The data array is empty based on the kwarg "+
"init_time"
)

# calculate anomaly based on climatology
da_anom = (
da_data.groupby(f'{self.init}.{self.tfreq}')
- da_climo
).compute()

# calculate average mhw magnitude
da_mhw_mag = da_anom.where(da_anom.groupby(f'{self.init}.{self.tfreq}')>=da_threshold)
da_mhw_mag_ave = da_anom.mean(dim=f'{self.mem}').compute()
da_mhw_mag_ave.attrs['mhw_magnitude_definition'] = 'ensemble mean of all sst anomaly'

# calculate mhw event
da_mhw = (
da_mhw_mag
.where(da_mhw_mag.isnull(),other=1)
.sum(dim=f'{self.mem}',skipna=True)
)

# calculate probability
da_mask = da_anom.where(da_anom.isnull(),other=1.)
da_event = da_mask.sum(dim=f'{self.mem}')
da_prob = (da_mhw/da_event).compute()

# output dataset
ds_mhw = xr.Dataset()
try:
ds_mhw.attrs['period_of_quantile'] = da_threshold.attrs['period_of_quantile']
ds_mhw.attrs['period_of_climatology'] = da_climo.attrs['period_of_climatology']
quantile_threshold = int(da_threshold.attrs['period_of_quantile'].split()[1])
except KeyError as e:
raise AttributeError(
'quantile file is not standard file that provide quantile number'
) from e
except ValueError as e:
raise ValueError(
'The "quantile_threshold" dataarray.... does not have period_of_quantile attrs'
) from e

ds_mhw[f'mhw_prob{quantile_threshold:02d}'] = da_prob
ds_mhw['ssta_avg'] = da_mhw_mag_ave
ds_mhw['mhw_mag_indentified_ens'] = da_mhw_mag

return ds_mhw
Loading