From 4c7c9d35d8547dae27f309db234fccb793b0cfb7 Mon Sep 17 00:00:00 2001 From: Swarnalee Mazumder Date: Thu, 21 Jul 2022 04:09:26 -0500 Subject: [PATCH 01/11] WIP ECMWF opendata download and conversion --- modules/data.atmosphere/R/download.ECMWF.R | 129 ++++++++++++++++++ .../inst/ECMWF/download_ecmwf_opendata.py | 44 ++++++ .../inst/ECMWF/ecmwf_grib2nc.py | 22 +++ 3 files changed, 195 insertions(+) create mode 100644 modules/data.atmosphere/R/download.ECMWF.R create mode 100644 modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py create mode 100644 modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py diff --git a/modules/data.atmosphere/R/download.ECMWF.R b/modules/data.atmosphere/R/download.ECMWF.R new file mode 100644 index 00000000000..f1597f399b3 --- /dev/null +++ b/modules/data.atmosphere/R/download.ECMWF.R @@ -0,0 +1,129 @@ +#' Download ECMWF Open Data 15 day forecast data products +#' +#' @author Swarnalee Mazumder +#' + +library(reticulate) +# py_install("ecmwf-opendata",pip = TRUE) + +download.ECMWF <- function(outfolder, + fchour, + stream, + type = "pf", + parameters = "all", + overwrite = FALSE, + reticulate_python = NULL,...) + +{ + if (!file.exists(outfolder)) { + dir.create(outfolder, showWarnings = FALSE, recursive = TRUE) + } + + + tryCatch({ + ecmwfod <- reticulate::import("ecmwf.opendata") + }, error = function(e) { + PEcAn.logger::logger.severe( + "Failed to load `ecmwfod` Python library. ", + "Please make sure it is installed to a location accessible to `reticulate`.", + "You should be able to install it with the following command: ", + "`pip install --user ecmwf-opendata`.", + "The following error was thrown by `reticulate::import(\"ecmwf.opendata\")`: ", + conditionMessage(e) + ) + }) + + hour <- fchour + + all_hours <- c(00, 12) + + if (any(!hour %in% all_hours)) { + bad_hours <- setdiff(hour, all_hours) + PEcAn.logger::logger.severe(sprintf( + "Invalid forecast hours %s. 15 day forecast hours must be one of the following: %s", + paste0("`", bad_hours, "`", collapse = ", "), + paste0("`", all_hours, "`", collapse = ", ") + )) + } + + if (fchour == 00){ + time <- 0 + } else if (fchour == 12){ + time <- 12 + } + + all_parameters <- c("2t", "tp", "10u", "10v", "q", "r", "sp") + + if (tolower(parameters) == "all") { + parameter_types <- all_parameters + } + + if (any(!parameter_types %in% all_parameters)) { + bad_parameters <- setdiff(parameter_types, all_parameters) + PEcAn.logger::logger.severe(sprintf( + "Invalid parameter types %s. Products must be one of the following: %s", + paste0("`", bad_parameters, "`", collapse = ", "), + paste0("`", all_parameters, "`", collapse = ", ") + )) + } + + + if (stream == "enfo"){ + time <- time + step <- 360 + type <- type + } else{ + PEcAn.logger::logger.severe(sprintf( + "Invalid data stream." + )) + } + + # Python script "download_ecmwf.py" to get latest forecast date and download forecast datasets + source_python("download_ecmwf.py") + + date_latestdata <- ecmwflatest(time, step, stream, type, all_parameters) + latest_filedate <- strtoi(gsub("-", "", substr(as.character.Date(date_latestdata), 1, 10))) + + current_date <- strtoi(gsub("-", "", Sys.Date())) + + date <- if (latest_filedate == current_date){ + # today + 0 + } else if ((latest_filedate - current_date) == -1){ + # yesterday + -1 + } else if ((latest_filedate - current_date) == -2){ + # the day before yesterday + -2 + } else { PEcAn.logger::logger.severe(sprintf( + "Invalid file download date." + )) + } + + fname <- paste(latest_filedate, time, step, stream, type, sep = "_") + + grib_fname <- paste0(fname, ".grib2") + data_download <- ecmwfdownload(date, time, step, stream, type, all_parameters, grib_fname) + + # Python script "ecmwf_grib2nc.py" to convert grib2 to netCDF + # Converting Grib2 file to ensemble wise NetCDF file + source_python("ecmwf_grib2nc.py") + nc_ecmwf <- grib2nc_ecmwf(grib_fname) + +} + +### code to reproduce +# date <- -1 +# time <- 0 +# step <- 360 +# stream <- "enfo" +# type <- "pf" +# +# all_parameters <- c("tp", "10u", "10v", "q", "r", "sp") +# grib_fname <- "trial_ecmwfod.grib2" +# +# source_python("download_ecmwf.py") +# data_download <- ecmwfdownload(date, time, step, stream, type, all_parameters, grib_fname) +# +# source_python("ecmwf_grib2nc.py") +# nc_ecmwf <- grib2nc_ecmwf(grib_fname) diff --git a/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py b/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py new file mode 100644 index 00000000000..0bb1c4ed96d --- /dev/null +++ b/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py @@ -0,0 +1,44 @@ +# !pip install ecmwf-opendata + +def ecmwflatest(time, step, stream, type, params): + from ecmwf.opendata import Client + + client = Client("ecmwf", beta=True) + + if stream == "mmsf": + latest= client.latest( + time = int(time), + step= step, + stream= stream, + type= type, + param= params, + ) + + elif stream == "enfo": + latest= client.latest( + time = int(time), + step= int(step), + stream= stream, + type= type, + param= params, + ) + + return latest + + +def ecmwfdownload(date, time, step, stream, type, params, filename): + from ecmwf.opendata import Client + + client = Client("ecmwf", beta=True) + + file = client.retrieve( + date= int(date), + time= int(time), + step= int(step), + stream= stream, + type= type, + param= params, + target= filename + ) + + return file diff --git a/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py b/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py new file mode 100644 index 00000000000..0976b6a20d0 --- /dev/null +++ b/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py @@ -0,0 +1,22 @@ +# !pip install xarray +# !pip install cfgrib + +def grib2nc_ecmwf(grib_fname): + import xarray + import cfgrib + + grib_fname = str(grib_fname) + + dataset = xarray.open_dataset(grib_fname, engine='cfgrib') + + fname = str(grib_fname[:-6]) + + for i in range(50): + + ens = dataset.isel(number=i) + + fname = fname+"ens_"+str(i+1)+".nc" + + ens.to_netcdf(fname) + + fname = str(grib_fname[:-6]) From 4fcf7e97364c6636803fa4e792923b19f1a25d64 Mon Sep 17 00:00:00 2001 From: Swarnalee Mazumder Date: Mon, 25 Jul 2022 10:41:04 -0500 Subject: [PATCH 02/11] *added ability to access point data *modified datasets acc to CF standard *updated documentation *resolved PR comments --- .../06_data/01_meteorology.Rmd | 8 + modules/data.atmosphere/DESCRIPTION | 2 +- modules/data.atmosphere/R/download.ECMWF.R | 114 +++++++-- .../inst/ECMWF/download_ecmwf_opendata.py | 118 +++++++--- .../inst/ECMWF/ecmwf_grib2nc.py | 222 ++++++++++++++++-- 5 files changed, 383 insertions(+), 81 deletions(-) diff --git a/book_source/03_topical_pages/06_data/01_meteorology.Rmd b/book_source/03_topical_pages/06_data/01_meteorology.Rmd index e912ac6545f..b7579bfcb80 100644 --- a/book_source/03_topical_pages/06_data/01_meteorology.Rmd +++ b/book_source/03_topical_pages/06_data/01_meteorology.Rmd @@ -132,3 +132,11 @@ Resolution: 30 min Availability: Varies by [site](https://meta.icos-cp.eu/collections/q4V7P1VLZevIrnlsW6SJO1Rz) Notes: To use this option, set `source` as `ICOS` and a `product` tag containing `etc` in `pecan.xml` + +## ECMWF Open Data + +Scale: Global + +Resolution: 6 hour, 0.4 degree + +Availability: Varies by [day](https://data.ecmwf.int/) diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION index 64a4b9d8bf7..546f2ccf7d0 100644 --- a/modules/data.atmosphere/DESCRIPTION +++ b/modules/data.atmosphere/DESCRIPTION @@ -48,6 +48,7 @@ Imports: RCurl, REddyProc, reshape2, + reticulate, rgdal, rlang (>= 0.2.0), sp, @@ -66,7 +67,6 @@ Suggests: foreach, parallel, progress, - reticulate Remotes: github::chuhousen/amerifluxr, github::ropensci/geonames, diff --git a/modules/data.atmosphere/R/download.ECMWF.R b/modules/data.atmosphere/R/download.ECMWF.R index f1597f399b3..91d535ba92a 100644 --- a/modules/data.atmosphere/R/download.ECMWF.R +++ b/modules/data.atmosphere/R/download.ECMWF.R @@ -1,15 +1,47 @@ -#' Download ECMWF Open Data 15 day forecast data products +#' Downloads ECMWF Open Data 15 day forecast data products +#' https://confluence.ecmwf.int/display/DAC/ECMWF+open+data%3A+real-time+forecasts +#' +#' https://github.com/ecmwf/ecmwf-opendata +#' Under the hood, this function uses the Python `ecmwf-opendata` module, +#' which can be installed via `pip` (`pip install --user ecmwf-opendata`). The +#' module is accessed via the `reticulate` package. #' +#' @param outfolder location on disk where outputs will be stored +#' @param lat.in,lon.in site coordinates, decimal degrees (numeric) +#' @param fchour reference time of the forecasts. Values are 00, 12 for 15 day forecast. +#' @param stream forecasting system that produces the data. Current value is `"enfo"` +#' @param type the type of forecast data. Current values: `cf` (controlled forecast), `pf` (perturbed forecast) +#' @param parameters character vector of product types, or `"all"`. +#' @param reticulate_python Path to Python binary for `reticulate` +#' (passed to [reticulate::use_python()]). If `NULL` (default), use +#' the system default. +#' @param overwrite Logical. If `FALSE` (default), skip any files with +#' the same target name (i.e. same variable) that already exist in +#' `outfolder`. If `TRUE`, silently overwrite existing files. +#' @return information about the output file +#' @export +#' @examples +#' +#' \dontrun{ +#' files <- download.ECMWF( +#' "ECMWF_output", +#' lat.in = 0, +#' lon.in = 180, +#' fchour = 00, +#' stream = "enfo" +#' type = c("cf", "pf"), +#' parameters = "all", +#' ) +#' } #' @author Swarnalee Mazumder #' -library(reticulate) -# py_install("ecmwf-opendata",pip = TRUE) - download.ECMWF <- function(outfolder, + lat.in, + lon.in, fchour, - stream, - type = "pf", + stream = "enfo", + type = c("cf", "pf"), parameters = "all", overwrite = FALSE, reticulate_python = NULL,...) @@ -67,19 +99,19 @@ download.ECMWF <- function(outfolder, )) } - if (stream == "enfo"){ time <- time step <- 360 type <- type } else{ PEcAn.logger::logger.severe(sprintf( - "Invalid data stream." + "Invalid data stream. Currently, data stream `enfo` is supported." )) } # Python script "download_ecmwf.py" to get latest forecast date and download forecast datasets - source_python("download_ecmwf.py") + script.path = file.path(system.file("download_ecmwf_opendata.py", package = "PEcAn.data.atmosphere")) + reticulate::source_python(script.path) date_latestdata <- ecmwflatest(time, step, stream, type, all_parameters) latest_filedate <- strtoi(gsub("-", "", substr(as.character.Date(date_latestdata), 1, 10))) @@ -100,30 +132,70 @@ download.ECMWF <- function(outfolder, )) } + # Removes any already existing files with base name same as `latest_filedate`` + if ((length(list.files(path = outfolder, pattern = as.character(latest_filedate)) ) > 0) == TRUE){ + file.remove(list.files(outfolder, pattern = as.character(latest_filedate))) + } + fname <- paste(latest_filedate, time, step, stream, type, sep = "_") - grib_fname <- paste0(fname, ".grib2") - data_download <- ecmwfdownload(date, time, step, stream, type, all_parameters, grib_fname) + in_filename <- paste0(fname, ".grib2") + + data_download <- ecmwfdownload(date, time, step, stream, type, all_parameters, in_filename) # Python script "ecmwf_grib2nc.py" to convert grib2 to netCDF # Converting Grib2 file to ensemble wise NetCDF file - source_python("ecmwf_grib2nc.py") - nc_ecmwf <- grib2nc_ecmwf(grib_fname) + script.path = file.path(system.file("ecmwf_grib2nc.py", package = "PEcAn.data.atmosphere")) + reticulate::source_python(script.path) + + nc_ecmwf <- grib2nc_ecmwf(in_filename, outfolder, out_filename) + + + rows <- 1 + results <- data.frame( + file = character(rows), + host = character(rows), + mimetype = character(rows), + formatname = character(rows), + startdate = character(rows), + enddate = character(rows), + dbfile.name = "ECMWF", + stringsAsFactors = FALSE + ) + + firstdate_st <- latest_filedate + lastdate_st <- ymd(firstdate_st) + days(15) + + results$file[rows] <- + file.path(outfolder, out_file_names) + results$host[rows] <- PEcAn.remote::fqdn() + results$startdate[rows] <- firstdate_st + results$enddate[rows] <- lastdate_st + results$mimetype[rows] <- "application/x-netcd" + results$formatname[rows] <- "CF Meteorology" + + return(results) } -### code to reproduce + +######### code to reproduce +# library(reticulate) # date <- -1 # time <- 0 # step <- 360 # stream <- "enfo" -# type <- "pf" +# type <- c("cf","pf") # -# all_parameters <- c("tp", "10u", "10v", "q", "r", "sp") -# grib_fname <- "trial_ecmwfod.grib2" +# all_parameters <- c("2t", "tp", "10u", "10v", "q", "r", "sp") +# in_filename <- "trial_ecmwfod.grib2" # -# source_python("download_ecmwf.py") -# data_download <- ecmwfdownload(date, time, step, stream, type, all_parameters, grib_fname) +# script.path = file.path(system.file("download_ecmwf_opendata.py", package = "PEcAn.data.atmosphere")) +# reticulate::source_python(script.path) +# data_download <- ecmwfdownload(date, time, step, stream, type, all_parameters, in_filename) # -# source_python("ecmwf_grib2nc.py") -# nc_ecmwf <- grib2nc_ecmwf(grib_fname) +# out_filename <- "trial_ecmwfod" +# +# script.path = file.path(system.file("ecmwf_grib2nc.py", package = "PEcAn.data.atmosphere")) +# reticulate::source_python(script.path) +# nc_ecmwf <- grib2nc_ecmwf(in_filename, outfolder, out_filename, lat_in, lon_in) \ No newline at end of file diff --git a/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py b/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py index 0bb1c4ed96d..e1224f47b34 100644 --- a/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py +++ b/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py @@ -1,44 +1,88 @@ -# !pip install ecmwf-opendata - def ecmwflatest(time, step, stream, type, params): - from ecmwf.opendata import Client - - client = Client("ecmwf", beta=True) - if stream == "mmsf": - latest= client.latest( - time = int(time), - step= step, - stream= stream, - type= type, - param= params, - ) + """ + Fetches latest ECMWF Open Data forecast based on specified forecast time, step, stream, parameters + + Parameters + ---------- + time (numeric) -- reference time of the forecasts. Values are 0 (00), 12. + + step (numeric) -- forecast time step. Current value: 360 + + stream (str) -- forecasting system that produces the data. Current value: `enfo` + + lat_in (numeric) -- Site coordinates, decimal degrees + + lon_in (numeric) -- Site coordinates, decimal degrees + + Returns + ------- + Latest date for which data is available. + """ + + from ecmwf.opendata import Client + + client = Client("ecmwf", beta=True) + + if stream == "mmsf": + latest= client.latest( + time = int(time), + step= step, + stream= stream, + type= type, + param= params, + ) + + elif stream == "enfo": + latest= client.latest( + time = int(time), + step= int(step), + stream= stream, + type= type, + param= params, + ) + + return latest + + +def ecmwfdownload(date, time, step, stream, type, params, filename): - elif stream == "enfo": - latest= client.latest( - time = int(time), + """ + Downloads latest ECMWF Open Data forecast based on specified forecast time, step, stream, parameters + + Parameters + ---------- + date (numeric) -- reference date of the forecasts. Values are 0 (today), -1 (yesterday), -2 (day before yesterday) + + time (numeric) -- reference time of the forecasts. Values are 0 (00), 12. + + step (numeric) -- forecast time step. Current value: 360 + + stream (str) -- forecasting system that produces the data. Current value: `enfo` + + type (str) -- the type of data. Current values: `cf`, `pf` + + params (str) -- the meteorological parameters, such as wind, pressure or humidity. Current parameters are "2t", "tp", "10u", "10v", "q", "r", "sp". + + filename (str) -- output filename for the Grib2 file + + Returns + ------- + Downloads ECMWF Open Data forecast for the latest date for which forecast data is available. + """ + + from ecmwf.opendata import Client + + client = Client("ecmwf", beta=True) + + file = client.retrieve( + date= int(date), + time= int(time), step= int(step), stream= stream, type= type, param= params, - ) - - return latest - - -def ecmwfdownload(date, time, step, stream, type, params, filename): - from ecmwf.opendata import Client - - client = Client("ecmwf", beta=True) - - file = client.retrieve( - date= int(date), - time= int(time), - step= int(step), - stream= stream, - type= type, - param= params, - target= filename - ) - - return file + target= filename + ) + + return file diff --git a/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py b/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py index 0976b6a20d0..d01f38b17f3 100644 --- a/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py +++ b/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py @@ -1,22 +1,200 @@ -# !pip install xarray -# !pip install cfgrib - -def grib2nc_ecmwf(grib_fname): - import xarray - import cfgrib - - grib_fname = str(grib_fname) - - dataset = xarray.open_dataset(grib_fname, engine='cfgrib') - - fname = str(grib_fname[:-6]) - - for i in range(50): - - ens = dataset.isel(number=i) - - fname = fname+"ens_"+str(i+1)+".nc" - - ens.to_netcdf(fname) - - fname = str(grib_fname[:-6]) +def grib2nc_ecmwf(in_filename, outfolder, out_filename, lat_in, lon_in): + + """ + Converts ECMWF Open Data 15 day forecast from Grib2 into PEcAn standard NetCDF + + Parameters + ---------- + in_filename (str) -- Grib2 file consisting of forecast data + + outfolder (str) -- path to the directory where the output file is stored. If specified directory does not exists, it is created. + + out_filename (str) -- filename of output NetCDF file + + lat_in (numeric) -- Site coordinates, decimal degrees + + lon_in (numeric) -- Site coordinates, decimal degrees + + Returns + ------- + output netCDF in CF Format which is saved in the specified directory. + + + Example Usage: + /don't run + + grib2nc_ecmwf(in_filename, + outfolder, + out_filename, + 0, + 180) + """ + + import os + import xarray + import cfgrib + + in_filename = str(in_filename) + + out_filename = str(out_filename) + + lat_in = float(lat_in) + lon_in = float(lon_in) + + + # if specified output directory does not exist create it + if not os.path.exists(outfolder): + os.makedirs(outfolder, exist_ok=True) + + + # "dataType" needs to be defined to correctly open the grib file + # https://github.com/ecmwf/cfgrib/issues/285 + + ######### Controlled Forecast + + # 10 metre U wind component + cf_u10 = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'10u'}}) + + # 10 metre V wind component + cf_v10 = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'10v'}}) + + # 2 metre temperature + cf_t2m = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'2t'}}) + + # total precipitation + cf_tp = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'tp'}}) + + # Surface pressure + cf_sp = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'sp'}}) + + # Specific humidity + cf_q = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'q'}}) + + # Relative humidity + cf_r = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'r'}}) + + + # https://docs.xarray.dev/en/stable/generated/xarray.merge.html + # “override”: skip comparing and copy attrs from the first dataset to the result + cf_allvar = xarray.merge([cf_u10, cf_v10, cf_t2m, cf_tp, cf_sp, cf_q, cf_r], + compat='override') + + + # total precipitation unit as per CF standard + # all other variables already in CF standard units + cf_allvar.tp.attrs.update({'GRIB_units': 'kg m**-2 s**-1', 'units': 'kg m**-2 s**-1'}) + attrs_tp_cf = cf_allvar.tp.attrs + + + # 1 m = 1/86.4 kg/m2/s + cf_allvar['tp'] = cf_allvar.tp / 86.4 + cf_allvar.tp.attrs = attrs_tp_cf + + + # variable name as per CF standard + cf_allvar_CF_name_standard = cf_allvar.rename(name_dict={'u10':'eastward_wind', + 'v10':'northward_wind', + 't2m':'air_temperature', + 'tp':'precipitation_flux', + 'sp':'air_pressure', + 'q':'specific_humidity', + 'r':'relative_humidity'}) + + + cf_allvar_CF_lat_lon = cf_allvar_CF_name_standard.sel(latitude=lat_in, longitude=lon_in, method="nearest") + + + cf_save_path = os.path.join( + outfolder, + out_filename + + "_cf" + + ".nc" + ) + + + # Creates netCDF file corresponding to "dataType":"cf" + cf_nc = cf_allvar_CF_lat_lon.to_netcdf(cf_save_path) + + + ######### Perturbed Forecast + + # 10 metre U wind component + pf_u10 = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'10u'}}) + + # 10 metre V wind component + pf_v10 = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'10v'}}) + + # 2 metre temperature + pf_t2m = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'2t'}}) + + # total precipitation + pf_tp = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'tp'}}) + + # Surface pressure + pf_sp = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'sp'}}) + + # Specific humidity + pf_q = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'q'}}) + + # Relative humidity + pf_r = xarray.open_dataset(in_filename, engine='cfgrib', + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'r'}}) + + + # https://docs.xarray.dev/en/stable/generated/xarray.merge.html + # “override”: skip comparing and copy attrs from the first dataset to the result + pf_allvar = xarray.merge([pf_u10, pf_v10, pf_t2m, pf_tp, pf_sp, pf_q, pf_r], + compat='override') + + + # total precipitation unit as per CF standard + # all other variables already in CF standard units + pf_allvar.tp.attrs.update({'GRIB_units': 'kg m**-2 s**-1', 'units': 'kg m**-2 s**-1'}) + attrs_tp_pf = pf_allvar.tp.attrs + + + # 1 m = 1/86.4 kg/m2/s + pf_allvar['tp'] = pf_allvar.tp / 86.4 + pf_allvar.tp.attrs = attrs_tp_pf + + + # variable name as per CF standard + pf_allvar_CF_name_standard = pf_allvar.rename(name_dict={'u10':'eastward_wind', + 'v10':'northward_wind', + 't2m':'air_temperature', + 'tp':'precipitation_flux', + 'sp':'air_pressure', + 'q':'specific_humidity', + 'r':'relative_humidity'}) + + + + # Creates 50 netCDF files corresponding to 50 ensemble members in "dataType":"pf" (perturbed forecast) + for i in range(50): + + pf_allvar_ens = pf_allvar_CF_name_standard.isel(number=i) + + pf_allvar_lat_lon = pf_allvar_ens.sel(latitude=lat_in, longitude=lon_in, method="nearest") + + pf_save_path = os.path.join( + outfolder, + out_filename + + "_pf_ens_" + + str(i+1) + + ".nc" + ) + + pf_allvar_lat_lon.to_netcdf(pf_save_path) From 9d1d77339ff303a0802b0e1edb78fdc446e273ea Mon Sep 17 00:00:00 2001 From: istfer Date: Wed, 27 Jul 2022 16:12:32 +0300 Subject: [PATCH 03/11] Update modules/data.atmosphere/R/download.ECMWF.R --- modules/data.atmosphere/R/download.ECMWF.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.atmosphere/R/download.ECMWF.R b/modules/data.atmosphere/R/download.ECMWF.R index 91d535ba92a..afd8ff5eeab 100644 --- a/modules/data.atmosphere/R/download.ECMWF.R +++ b/modules/data.atmosphere/R/download.ECMWF.R @@ -110,7 +110,7 @@ download.ECMWF <- function(outfolder, } # Python script "download_ecmwf.py" to get latest forecast date and download forecast datasets - script.path = file.path(system.file("download_ecmwf_opendata.py", package = "PEcAn.data.atmosphere")) + script.path = file.path(system.file("ECMWF/download_ecmwf_opendata.py", package = "PEcAn.data.atmosphere")) reticulate::source_python(script.path) date_latestdata <- ecmwflatest(time, step, stream, type, all_parameters) From ae5547b62835014a1b0c0d7a0aabb324b1692826 Mon Sep 17 00:00:00 2001 From: istfer Date: Wed, 27 Jul 2022 16:13:59 +0300 Subject: [PATCH 04/11] Update modules/data.atmosphere/R/download.ECMWF.R --- modules/data.atmosphere/R/download.ECMWF.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.atmosphere/R/download.ECMWF.R b/modules/data.atmosphere/R/download.ECMWF.R index afd8ff5eeab..e107a25ed79 100644 --- a/modules/data.atmosphere/R/download.ECMWF.R +++ b/modules/data.atmosphere/R/download.ECMWF.R @@ -145,7 +145,7 @@ download.ECMWF <- function(outfolder, # Python script "ecmwf_grib2nc.py" to convert grib2 to netCDF # Converting Grib2 file to ensemble wise NetCDF file - script.path = file.path(system.file("ecmwf_grib2nc.py", package = "PEcAn.data.atmosphere")) + script.path = file.path(system.file("ECMWF/ecmwf_grib2nc.py", package = "PEcAn.data.atmosphere")) reticulate::source_python(script.path) nc_ecmwf <- grib2nc_ecmwf(in_filename, outfolder, out_filename) From b7a2b923725b28f5f36108716fdbd6d935415c8e Mon Sep 17 00:00:00 2001 From: Swarnalee M <63165395+swarnalee-m@users.noreply.github.com> Date: Thu, 28 Jul 2022 20:42:07 +0530 Subject: [PATCH 05/11] Update modules/data.atmosphere/R/download.ECMWF.R Co-authored-by: istfer --- modules/data.atmosphere/R/download.ECMWF.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/data.atmosphere/R/download.ECMWF.R b/modules/data.atmosphere/R/download.ECMWF.R index e107a25ed79..10ec651e5a0 100644 --- a/modules/data.atmosphere/R/download.ECMWF.R +++ b/modules/data.atmosphere/R/download.ECMWF.R @@ -51,7 +51,11 @@ download.ECMWF <- function(outfolder, dir.create(outfolder, showWarnings = FALSE, recursive = TRUE) } - + if(is.null(product)){ + stream <- "enfo" + } else{ + stream <- product + } tryCatch({ ecmwfod <- reticulate::import("ecmwf.opendata") }, error = function(e) { From 78f5c9a2729d0ce4ba4906b0841d86feceae3efc Mon Sep 17 00:00:00 2001 From: Swarnalee Mazumder Date: Fri, 29 Jul 2022 03:17:30 -0500 Subject: [PATCH 06/11] * resolved comments * updated documentation --- .../06_data/01_meteorology.Rmd | 2 + modules/data.atmosphere/NAMESPACE | 1 + modules/data.atmosphere/R/download.ECMWF.R | 136 +++++++++++------- modules/data.atmosphere/man/download.ECMWF.Rd | 57 ++++++++ 4 files changed, 145 insertions(+), 51 deletions(-) create mode 100644 modules/data.atmosphere/man/download.ECMWF.Rd diff --git a/book_source/03_topical_pages/06_data/01_meteorology.Rmd b/book_source/03_topical_pages/06_data/01_meteorology.Rmd index b7579bfcb80..f832766be75 100644 --- a/book_source/03_topical_pages/06_data/01_meteorology.Rmd +++ b/book_source/03_topical_pages/06_data/01_meteorology.Rmd @@ -140,3 +140,5 @@ Scale: Global Resolution: 6 hour, 0.4 degree Availability: Varies by [day](https://data.ecmwf.int/) + +Notes: 15 day forecast is provided at two hours 00 and 12, but current implementation only uses forecast hour 00 when download function is called from within the PEcAn workflow diff --git a/modules/data.atmosphere/NAMESPACE b/modules/data.atmosphere/NAMESPACE index 81df3e94d88..2c0bb2e52e0 100644 --- a/modules/data.atmosphere/NAMESPACE +++ b/modules/data.atmosphere/NAMESPACE @@ -20,6 +20,7 @@ export(debias.met.regression) export(download.Ameriflux) export(download.AmerifluxLBL) export(download.CRUNCEP) +export(download.ECMWF) export(download.ERA5.old) export(download.FACE) export(download.Fluxnet2015) diff --git a/modules/data.atmosphere/R/download.ECMWF.R b/modules/data.atmosphere/R/download.ECMWF.R index 10ec651e5a0..bdc806cfb7c 100644 --- a/modules/data.atmosphere/R/download.ECMWF.R +++ b/modules/data.atmosphere/R/download.ECMWF.R @@ -2,24 +2,39 @@ #' https://confluence.ecmwf.int/display/DAC/ECMWF+open+data%3A+real-time+forecasts #' #' https://github.com/ecmwf/ecmwf-opendata -#' Under the hood, this function uses the Python `ecmwf-opendata` module, -#' which can be installed via `pip` (`pip install --user ecmwf-opendata`). The -#' module is accessed via the `reticulate` package. -#' +#' Under the hood, this function uses the Python `ecmwf-opendata` module. +#' The module and dependencies can be accessed via the `reticulate` package. +#' +#' `reticulate::conda_install(c("scipy", "xarray", "eccodes", "cfgrib", "ecmwf-opendata"), envname = "r-reticulate", pip = TRUE)` +#' `reticulate::use_condaenv("r-reticulate")` +#' +#' #' @param outfolder location on disk where outputs will be stored +#' #' @param lat.in,lon.in site coordinates, decimal degrees (numeric) +#' #' @param fchour reference time of the forecasts. Values are 00, 12 for 15 day forecast. -#' @param stream forecasting system that produces the data. Current value is `"enfo"` +#' `fchour` is not passed via upstream functions and we are using 00 by default when the function is called from within the pecan workflow. +#' #' @param type the type of forecast data. Current values: `cf` (controlled forecast), `pf` (perturbed forecast) +#' #' @param parameters character vector of product types, or `"all"`. +#' #' @param reticulate_python Path to Python binary for `reticulate` #' (passed to [reticulate::use_python()]). If `NULL` (default), use #' the system default. +#' #' @param overwrite Logical. If `FALSE` (default), skip any files with #' the same target name (i.e. same variable) that already exist in #' `outfolder`. If `TRUE`, silently overwrite existing files. +#' +#' Forecast hour is reference time of the forecasts. Values are 00, 12 for 15 day forecast. +# `fchour` is not passed via upstream functions currently as we are using 00 by default (time <- 0) for function calls from within the pecan workflow +#' #' @return information about the output file +#' #' @export +#' #' @examples #' #' \dontrun{ @@ -28,7 +43,7 @@ #' lat.in = 0, #' lon.in = 180, #' fchour = 00, -#' stream = "enfo" +#' product = "NULL, #' type = c("cf", "pf"), #' parameters = "all", #' ) @@ -39,8 +54,8 @@ download.ECMWF <- function(outfolder, lat.in, lon.in, - fchour, - stream = "enfo", + fchour = 00, + product = NULL, type = c("cf", "pf"), parameters = "all", overwrite = FALSE, @@ -51,43 +66,66 @@ download.ECMWF <- function(outfolder, dir.create(outfolder, showWarnings = FALSE, recursive = TRUE) } + ############## Forecast Hour ################ + + # 15 day forecast hours can be 00h or 12h. Here forecast hour is assigned to 00 + # hence the time variable required by the download function becomes 0 + + # all_hours <- c(00, 12) + # if (any(!hour %in% all_hours)) { + # bad_hours <- setdiff(hour, all_hours) + # PEcAn.logger::logger.severe(sprintf( + # "Invalid forecast hours %s. 15 day forecast hours must be one of the following: %s", + # paste0("`", bad_hours, "`", collapse = ", "), + # paste0("`", all_hours, "`", collapse = ", ") + # )) + # } + + # for forecast hour = 00 + if (fchour == 00){ + time <- 0 + } else { + PEcAn.logger::logger.severe(sprintf( + "Invalid forecast hour. Currently, forecast hour 00 is the default." + )) + } + + ############## Product (Forecasting system) ################ + + # For product = NULL,the stream is set to "enfo" + # stream is forecasting system that produces the data. Current value is `"enfo"` + if(is.null(product)){ stream <- "enfo" } else{ - stream <- product - } - tryCatch({ - ecmwfod <- reticulate::import("ecmwf.opendata") - }, error = function(e) { - PEcAn.logger::logger.severe( - "Failed to load `ecmwfod` Python library. ", - "Please make sure it is installed to a location accessible to `reticulate`.", - "You should be able to install it with the following command: ", - "`pip install --user ecmwf-opendata`.", - "The following error was thrown by `reticulate::import(\"ecmwf.opendata\")`: ", - conditionMessage(e) - ) - }) - - hour <- fchour - - all_hours <- c(00, 12) - - if (any(!hour %in% all_hours)) { - bad_hours <- setdiff(hour, all_hours) PEcAn.logger::logger.severe(sprintf( - "Invalid forecast hours %s. 15 day forecast hours must be one of the following: %s", - paste0("`", bad_hours, "`", collapse = ", "), - paste0("`", all_hours, "`", collapse = ", ") + "Invalid data stream. Currently, data stream `enfo` is supported." )) } - if (fchour == 00){ - time <- 0 - } else if (fchour == 12){ - time <- 12 + if (stream == "enfo"){ + time <- time + step <- 360 + type <- type # type of forecast here c("cf", "pf") + } else{ + PEcAn.logger::logger.severe(sprintf( + "Invalid data stream. Currently, data stream `enfo` is supported." + )) } + + ############## Product Types ################ + + # Currently supported parameters are + # "2t" - 2 metre temperature + # "tp" - total precipitation + # "10u" - 10 metre U wind component + # "10v" - 10 metre V wind component + # "q" - Specific humidity + # "r" - Relative humidity + # "sp" - Surface Pressure + + all_parameters <- c("2t", "tp", "10u", "10v", "q", "r", "sp") if (tolower(parameters) == "all") { @@ -103,15 +141,6 @@ download.ECMWF <- function(outfolder, )) } - if (stream == "enfo"){ - time <- time - step <- 360 - type <- type - } else{ - PEcAn.logger::logger.severe(sprintf( - "Invalid data stream. Currently, data stream `enfo` is supported." - )) - } # Python script "download_ecmwf.py" to get latest forecast date and download forecast datasets script.path = file.path(system.file("ECMWF/download_ecmwf_opendata.py", package = "PEcAn.data.atmosphere")) @@ -136,15 +165,17 @@ download.ECMWF <- function(outfolder, )) } - # Removes any already existing files with base name same as `latest_filedate`` + # Skip download if there is any already existing file with base name same as `latest_filedate`` if ((length(list.files(path = outfolder, pattern = as.character(latest_filedate)) ) > 0) == TRUE){ - file.remove(list.files(outfolder, pattern = as.character(latest_filedate))) - } + PEcAn.logger::logger.severe(sprintf( + "Files already exist for %s", + latest_filedate + )) + } else { fname <- paste(latest_filedate, time, step, stream, type, sep = "_") - in_filename <- paste0(fname, ".grib2") - + in_filename <- file.path(outfolder, paste0(fname, ".grib2")) data_download <- ecmwfdownload(date, time, step, stream, type, all_parameters, in_filename) # Python script "ecmwf_grib2nc.py" to convert grib2 to netCDF @@ -152,7 +183,8 @@ download.ECMWF <- function(outfolder, script.path = file.path(system.file("ECMWF/ecmwf_grib2nc.py", package = "PEcAn.data.atmosphere")) reticulate::source_python(script.path) - nc_ecmwf <- grib2nc_ecmwf(in_filename, outfolder, out_filename) + out_filename <- file.path(outfolder, fname) + nc_ecmwf <- grib2nc_ecmwf(in_filename, outfolder, out_filename, lat_in, lon_in) rows <- 1 @@ -180,6 +212,8 @@ download.ECMWF <- function(outfolder, return(results) + } + } diff --git a/modules/data.atmosphere/man/download.ECMWF.Rd b/modules/data.atmosphere/man/download.ECMWF.Rd new file mode 100644 index 00000000000..fa999e4b8c2 --- /dev/null +++ b/modules/data.atmosphere/man/download.ECMWF.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download.ECMWF.R +\name{download.ECMWF} +\alias{download.ECMWF} +\title{Downloads ECMWF Open Data 15 day forecast data products +https://confluence.ecmwf.int/display/DAC/ECMWF+open+data%3A+real-time+forecasts} +\usage{ +download.ECMWF( + outfolder, + lat.in, + lon.in, + fchour = 0, + product = NULL, + type = c("cf", "pf"), + parameters = "all", + overwrite = FALSE, + reticulate_python = NULL, + ... +) +} +\arguments{ +\item{outfolder}{location on disk where outputs will be stored} + +\item{lat.in, lon.in}{site coordinates, decimal degrees (numeric)} + +\item{fchour}{reference time of the forecasts. Values are 00, 12 for 15 day forecast. +`fchour` is not passed via upstream functions and we are using 00 by default when the function is called from within the pecan workflow.} + +\item{type}{the type of forecast data. Current values: `cf` (controlled forecast), `pf` (perturbed forecast)} + +\item{parameters}{character vector of product types, or `"all"`.} + +\item{overwrite}{Logical. If `FALSE` (default), skip any files with + the same target name (i.e. same variable) that already exist in + `outfolder`. If `TRUE`, silently overwrite existing files. + +Forecast hour is reference time of the forecasts. Values are 00, 12 for 15 day forecast.} + +\item{reticulate_python}{Path to Python binary for `reticulate` +(passed to [reticulate::use_python()]). If `NULL` (default), use +the system default.} +} +\value{ +information about the output file +} +\description{ +https://github.com/ecmwf/ecmwf-opendata +Under the hood, this function uses the Python `ecmwf-opendata` module. +The module and dependencies can be accessed via the `reticulate` package. +} +\details{ +`reticulate::conda_install(c("scipy", "xarray", "eccodes", "cfgrib", "ecmwf-opendata"), envname = "r-reticulate", pip = TRUE)` +`reticulate::use_condaenv("r-reticulate")` +} +\author{ +Swarnalee Mazumder +} From 0b9de4471118d00ea73b259f517f1bbc07f4f680 Mon Sep 17 00:00:00 2001 From: Swarnalee Mazumder Date: Sun, 21 Aug 2022 07:13:46 -0500 Subject: [PATCH 07/11] downloads 15day forecast files --- .../inst/ECMWF/ecmwfopendata_download.py | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 modules/data.atmosphere/inst/ECMWF/ecmwfopendata_download.py diff --git a/modules/data.atmosphere/inst/ECMWF/ecmwfopendata_download.py b/modules/data.atmosphere/inst/ECMWF/ecmwfopendata_download.py new file mode 100644 index 00000000000..99f84f71122 --- /dev/null +++ b/modules/data.atmosphere/inst/ECMWF/ecmwfopendata_download.py @@ -0,0 +1,101 @@ +def ecmwflatest(time, step15, stream, type, params): + + from ecmwf.opendata import Client + + """ + Fetches latest ECMWF Open Data forecast based on specified forecast time, step, stream, parameters + + Parameters + ---------- + time (numeric) -- reference time of the forecasts. Values are 0 (00), 12. + + step (numeric) -- forecast time step. Current value: 360 + + stream (str) -- forecasting system that produces the data. Current value: `enfo` + + lat_in (numeric) -- Site coordinates, decimal degrees + + lon_in (numeric) -- Site coordinates, decimal degrees + + Returns + ------- + Latest date for which data is available. + """ + from ecmwf.opendata import Client + + client = Client("ecmwf", beta=True) + + if stream == "mmsf": + latest= client.latest( + time = int(time), + step= int(step15), + stream= stream, + type= type, + param= params, + ) + + elif stream == "enfo": + latest= client.latest( + time = int(time), + step= int(step15), + stream= stream, + type= type, + param= params, + ) + + return latest + + +def ecmwfdownloadopendata(date, time, stream, type, params, filename): + + """ + Downloads latest ECMWF Open Data forecast based on specified forecast time, step, stream, parameters + + Parameters + ---------- + date (numeric) -- reference date of the forecasts. Values are 0 (today), -1 (yesterday), -2 (day before yesterday) + + time (numeric) -- reference time of the forecasts. Values are 0 (00), 12. + + stream (str) -- forecasting system that produces the data. Current value: `enfo` + + type (str) -- the type of data. Current values: `cf`, `pf` + + params (str) -- the meteorological parameters, such as wind, pressure or humidity. Current parameters are "2t", "tp", "10u", "10v", "q", "r", "sp". + + filename (str) -- output filename for the Grib2 file + + Returns + ------- + Downloads ECMWF Open Data forecast for the latest date for which forecast data is available. + """ + + from ecmwf.opendata import Client + + client = Client("ecmwf", beta=True) + + for i in range(0, 142, 3): + file_3h = client.retrieve( + date= int(date), + time= int(time), + step= int(i), + stream= stream, + type= type, + param= params, + target= filename+str(i)+".grib2" + ) + + + for j in range(144, 361, 6): + file_6h = client.retrieve( + date= int(date), + time= int(time), + step= int(j), + stream= stream, + type= type, + param= params, + target= filename+str(j)+".grib2" + ) + + return file_3h, file_6h + From 722b728187db189d871ef8f3728e05c22373c5bf Mon Sep 17 00:00:00 2001 From: Swarnalee Mazumder Date: Fri, 9 Sep 2022 00:05:03 -0500 Subject: [PATCH 08/11] updated ecmwf download function added met2CF --- .../R/ECMWF_helper_functions.R | 283 ++++++++++++++++++ modules/data.atmosphere/R/download.ECMWF.R | 121 ++++---- modules/data.atmosphere/R/met2CF.ECMWF.R | 32 ++ ...opendata_download.py => download_ecmwf.py} | 5 +- .../inst/ECMWF/download_ecmwf_opendata.py | 88 ------ .../inst/ECMWF/ecmwf_grib2nc.py | 200 ------------- .../data.atmosphere/inst/ECMWF/met2CFutils.py | 175 +++++++++++ 7 files changed, 556 insertions(+), 348 deletions(-) create mode 100644 modules/data.atmosphere/R/ECMWF_helper_functions.R create mode 100644 modules/data.atmosphere/R/met2CF.ECMWF.R rename modules/data.atmosphere/inst/ECMWF/{ecmwfopendata_download.py => download_ecmwf.py} (91%) delete mode 100644 modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py delete mode 100644 modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py create mode 100644 modules/data.atmosphere/inst/ECMWF/met2CFutils.py diff --git a/modules/data.atmosphere/R/ECMWF_helper_functions.R b/modules/data.atmosphere/R/ECMWF_helper_functions.R new file mode 100644 index 00000000000..2b72c321301 --- /dev/null +++ b/modules/data.atmosphere/R/ECMWF_helper_functions.R @@ -0,0 +1,283 @@ +#' Helper functions for met2CF.ECMWF +#' +#' mergenc_ECMWF writes 1 CF and 50 PF netCDF files by extracting data from GRIB2 files and filling NaNs at 3h position wherever data is unavailable. +#' +#' @param lat latitude +#' @param lon longitude +#' @param outfolder Path to directory where nc files need to be saved. +#' @param overwrite Logical if files needs to be overwritten. +#' @param verbose Logical flag defining if output of function be extra verbose. +#' +#' @return list of dataframes +#' @export +#' +#' \dontrun{ +#' results <- mergenc_ECMWF(lat.in, +#' lon.in, +#' outfolder, +#' overwrite = overwrite) +#' +#' @author Swarnalee Mazumder +#' +nanfill <- function(variable) { # nanfill function fills NaN at every 3h period from 144h to 360h (original step 6h) + + len6 <- variable + len3nan <- rep(NA,length(len6)*2-1) + len3nan[seq(1, length(len6)*2-1, 2)] <- len6 + + return(len3nan) +} + +mergenc_ECMWF <- function(lat.in, + lon.in, + outfolder, + overwrite = FALSE, + verbose = TRUE) { + + # Python script "download_ecmwf.py" to get latest forecast date and download forecast datasets + script.path = file.path(system.file("ECMWF/download_ecmwf.py", package = "PEcAn.data.atmosphere")) + reticulate::source_python(script.path) + + time <- 0 + stream <- "enfo" + type <- c("cf", "pf") + step_15day <- 360 + + all_parameters <- c("10u", "10v", "2t", "sp", "tp") + + date_latestdata <- ecmwflatest(time= time, step15= step_15day, stream= stream, type= type, params= all_parameters) + + latest_filedate <- strtoi(gsub("-", "", substr(as.character.Date(date_latestdata), 1, 10))) + + current_date <- strtoi(gsub("-", "", Sys.Date())) + + in_fname <- paste(latest_filedate, time, stream, sep = "_") + + script.path = file.path(system.file("ECMWF/met2CFutils.py", package = "PEcAn.data.atmosphere")) + reticulate::source_python(script.path) + + fnames_3h <- all_filenames_3h(in_fname) + fnames_6h <- all_filenames_6h(in_fname) + + ######### CF ######### + + ##### 3h CF -> 0 - 141h at 3h timestep + cf_3h_out_fname <- paste0(latest_filedate, time, stream, "cf_3h", sep = "_") + cf_3h_nc <- combine_files_cf(fnames_3h, lat_in, lon_in, out_filename= cf_3h_out_fname) + + ##### 3h CF -> 144h - 360h at 6h timestep + cf_6h_out_fname <- paste0(latest_filedate, time, stream, "cf_6h", sep = "_") + cf_6h_nc <- combine_files_cf(fnames_6h, lat_in, lon_in, out_filename= cf_6h_out_fname) + + cf_nc3 <- ncdf4::nc_open(cf_3h_nc) + cf_nc6 <- ncdf4::nc_open(cf_6h_nc) + + cf_nc_fname <- paste0(latest_filedate, time, stream, "cf.nc", sep = "_") + + lat <- ncdf4::ncvar_get(cf_nc6, "latitude") + lon <- ncdf4::ncvar_get(cf_nc6, "longitude") + validtime <- seq(0, 360, 3) + validtimeunits <- ncdf4::ncatt_get(cf_nc3,"valid_time","units")$value + + # create and write the netCDF file -- ncdf4 version + # define dimensions + londim <- ncdf4::ncdim_def("longitude","degrees_east",as.double(lon)) + latdim <- ncdf4::ncdim_def("latitude","degrees_north",as.double(lat)) + timedim <- ncdf4::ncdim_def("valid_time",validtimeunits,as.double(validtime)) + time <- ncdf4::ncdim_def("time", "days since 2022-08-08 00:00:00", as.double(validtime)) + + # define variables + fillvalue <- NA + + cf_u10_3 <- ncdf4::ncvar_get(cf_nc3, "u10") + cf_u10_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "u10")) + cf_u10 <- c(cf_u10_3, cf_u10_6_nan) + + cf_v10_3 <- ncdf4::ncvar_get(cf_nc3, "v10") + cf_v10_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "v10")) + cf_v10 <- c(cf_v10_3, cf_v10_6_nan) + + cf_t2m_3 <- ncdf4::ncvar_get(cf_nc3, "t2m") + cf_t2m_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "t2m")) + cf_t2m <- c(t2m_3, t2m_6_nan) + + sp_3 <- ncdf4::ncvar_get(cf_nc3, "sp") + sp_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "sp")) + cf_sp <- c(sp_3, sp_6_nan) + + tp_3 <- ncdf4::ncvar_get(cf_nc3, "tp") + tp_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "tp")) + cf_tp <- c(tp_3, tp_6_nan) + + dlname <- "Eastward Component of Wind" + u10.def <- ncdf4::ncvar_def("eastward_wind","m s**-1",list(timedim),fillvalue,dlname,prec="double") + + dlname <- "Northward Component of Wind" + v10.def <- ncdf4::ncvar_def("northward_wind","m s**-1",list(timedim),fillvalue,dlname,prec="double") + + dlname <- "Near surface air temperature" + t2m.def <- ncdf4::ncvar_def("air_temperature","K",list(timedim),fillvalue,dlname,prec="double") + + dlname <- "Surface pressure" + sp.def <- ncdf4::ncvar_def("air_pressure","Pa",list(timedim),fillvalue,dlname,prec="double") + + dlname <- "Rainfall rate" + tp.def <- ncdf4::ncvar_def("precipitation_flux","kg m**-2 s**-1",list(timedim),fillvalue,dlname,prec="double") + + if (!file.exists(ncfname_pf) || overwrite) { + + tryCatch({ + + # create netCDF file and put arrays + ncout_cf <- ncdf4::nc_create(cf_nc_fname, list(u10.def, v10.def, t2m.def, sp.def, tp.def),force_v4=TRUE) + + # put variables in netCDF file + ncdf4::ncvar_put(ncout_cf,u10.def, cf_u10, verbose = TRUE) + ncdf4::ncvar_put(ncout_cf,v10.def, cf_v10, verbose = TRUE) + ncdf4::ncvar_put(ncout_cf,t2m.def, cf_t2m, verbose = TRUE) + ncdf4::ncvar_put(ncout_cf,sp.def, cf_sp, verbose = TRUE) + ncdf4::ncvar_put(ncout_cf,tp.def, cf_tp, verbose = TRUE) + + gribed <- ncatt_get(cf_nc6,0,"GRIB_edition") + institution <- ncatt_get(cf_nc6,0,"GRIB_centreDescription") + datasource <- ncatt_get(cf_nc6,0,"GRIB_centre") + history <- ncatt_get(cf_nc6,0,"history") + Conventions <- ncatt_get(cf_nc6,0,"Conventions") + + # add global attributes + ncdf4::ncatt_put(ncout_cf,0,"GRIB_edition",gribed$value) + ncdf4::ncatt_put(ncout_cf,0,"Institution",institution$value) + ncdf4::ncatt_put(ncout_cf,0,"Source",datasource$value) + history <- paste("PEcAn Project", date(Sys.Date()), sep=", ") + ncdf4::ncatt_put(ncout_cf,0,"history",history) + ncdf4::ncatt_put(ncout_cf,0,"Conventions",Conventions$value) + + ncdf4::nc_close(ncout_cf) # writes ensemble wise pf files to disk + + }, + + error = function(e) { + PEcAn.logger::logger.severe("Something went wrong during the writing of the nc file.", + conditionMessage(e)) + }) + + } else { + PEcAn.logger::logger.info(paste0( + "The file ", + flname, + " already exists. It was not overwritten." + )) + } + + rows <- 51 + results <- data.frame( + file = character(rows), + host = character(rows), + mimetype = character(rows), + formatname = character(rows), + startdate = character(rows), + enddate = character(rows), + dbfile.name = "ECMWF", + stringsAsFactors = FALSE + ) + + start_date <- latest_filedate + end_date <- strtoi(gsub("-", "", lubridate::ymd(start_date) + days(14))) + + results$file[1] <- + file.path(outfolder, cf_nc_fname) + results$host[1] <- PEcAn.remote::fqdn() + results$startdate[1] <- start_date + results$enddate[1] <- end_date + results$mimetype[1] <- "application/x-netcdf" + results$formatname[1] <- "CF Meteorology" + + ######### Perturbed Forecast ######### + + ##### 3h PF -> 0 - 141h at 3h timestep + pf_3h_out_fname <- paste(latest_filedate, time, stream, "pf_3h", sep = "_") + pf_3h_nc <- combine_files_cf(fnames_3h, lat_in, lon_in, out_filename= pf_3h_out_fname) + + ##### 6h PF -> 144h - 360h at 6h timestep + pf_6h_out_fname <- paste(latest_filedate, time, stream, "pf_6h", sep = "_") + pf_6h_nc <- combine_files_cf(fnames_6h, lat_in, lon_in, out_filename= pf_6h_out_fname) + + pf_nc3 <- ncdf4::nc_open(pf_3h_nc) + pf_nc6 <- ncdf4::nc_open(pf_6h_nc) + + # Creating ensemble wise netCDF files + pf_u10_3 <- t(ncdf4::ncvar_get(pf_nc3, "u10")) + pf_u10_6 <- t(ncdf4::ncvar_get(pf_nc6, "u10")) + + pf_v10_3 <- t(ncdf4::ncvar_get(pf_nc3, "v10")) + pf_v10_6 <- t(ncdf4::ncvar_get(pf_nc6, "v10")) + + pf_t2m_3 <- t(ncdf4::ncvar_get(pf_nc3, "t2m")) + pf_t2m_6 <- t(ncdf4::ncvar_get(pf_nc6, "t2m")) + + pf_sp_3 <- t(ncdf4::ncvar_get(pf_nc3, "sp")) + pf_sp_6 <- t(ncdf4::ncvar_get(pf_nc6, "sp")) + + pf_tp_3 <- t(ncdf4::ncvar_get(pf_nc3, "tp")) + pf_tp_6 <- t(ncdf4::ncvar_get(pf_nc6, "tp")) + + for (ens in 1:50){ + + ncfname_pf <- paste0(latest_filedate, time, stream, "pf_ens", ens, sep = "_") + + if (!file.exists(ncfname_pf) || overwrite) { + + tryCatch({ + + ncfname_pf <- paste0(latest_filedate, time, stream, "pf_ens", ens, sep = "_") + + pf_u10 <- c(pf_u10_3[, ens], nanfill(u10_6[, ens])) + pf_v10 <- c(pf_v10_3[, ens], nanfill(v10_6[, ens])) + pf_t2m <- c(pf_t2m_3[, ens], nanfill(pf_t2m_6[, ens])) + pf_sp <- c(pf_sp_3[, ens], nanfill(pf_sp_6[, ens])) + pf_tp <- c(pf_tp_3[, ens], nanfill(pf_tp_6[, ens])) + + ncout_pf <- ncdf4::nc_create(ncfname_pf, list(u10.def, v10.def, t2m.def, sp.def, tp.def),force_v4=TRUE) + + ncdf4::ncvar_put(ncout_pf,u10.def, pf_u10, verbose = TRUE) + ncdf4::ncvar_put(ncout_pf,v10.def, pf_v10, verbose = TRUE) + ncdf4::ncvar_put(ncout_pf,t2m.def, pf_t2m, verbose = TRUE) + ncdf4::ncvar_put(ncout_pf,sp.def, pf_sp, verbose = TRUE) + ncdf4::ncvar_put(ncout_pf,tp.def, pf_tp, verbose = TRUE) + + # add global attributes + ncdf4::ncatt_put(ncout_pf,0,"GRIB_edition",gribed$value) + ncdf4::ncatt_put(ncout_pf,0,"Institution",institution$value) + ncdf4::ncatt_put(ncout_pf,0,"Source",datasource$value) + history <- paste("PEcAn Project", date(Sys.Date()), sep=", ") + ncdf4::ncatt_put(ncout_pf,0,"history",history) + ncdf4::ncatt_put(ncout_pf,0,"Conventions",Conventions$value) + + ncdf4::nc_close(ncout_pf) # writes ensemble wise pf files to disk + + row <- ens # final data frame has 1+50 rows pertaining to 1cf + 50pf files + results$file[row+1] <- file.path(outfolder, ncout_pf) + results$host[row+1] <- "PEcAn.remote::fqdn()" + results$startdate[row+1] <- start_date + results$enddate[row+1] <- end_date + results$mimetype[row+1] <- "application/x-netcdf" + results$formatname[row+1] <- "CF Meteorology" + + }, + + error = function(e) { + PEcAn.logger::logger.severe("Something went wrong during the writing of the nc file.", + conditionMessage(e)) + }) + + } else { + PEcAn.logger::logger.info(paste0( + "The file ", + flname, + " already exists. It was not overwritten." + )) + } + } + + return(results) +} \ No newline at end of file diff --git a/modules/data.atmosphere/R/download.ECMWF.R b/modules/data.atmosphere/R/download.ECMWF.R index bdc806cfb7c..f1078eba62a 100644 --- a/modules/data.atmosphere/R/download.ECMWF.R +++ b/modules/data.atmosphere/R/download.ECMWF.R @@ -5,7 +5,7 @@ #' Under the hood, this function uses the Python `ecmwf-opendata` module. #' The module and dependencies can be accessed via the `reticulate` package. #' -#' `reticulate::conda_install(c("scipy", "xarray", "eccodes", "cfgrib", "ecmwf-opendata"), envname = "r-reticulate", pip = TRUE)` +#' `reticulate::conda_install(c("scipy", "xarray", "eccodes", "cfgrib", "ecmwf-opendata", "dask", "netCDF4"), envname = "r-reticulate", pip = TRUE)` #' `reticulate::use_condaenv("r-reticulate")` #' #' @@ -105,7 +105,7 @@ download.ECMWF <- function(outfolder, if (stream == "enfo"){ time <- time - step <- 360 + step_15day <- 360 type <- type # type of forecast here c("cf", "pf") } else{ PEcAn.logger::logger.severe(sprintf( @@ -121,12 +121,10 @@ download.ECMWF <- function(outfolder, # "tp" - total precipitation # "10u" - 10 metre U wind component # "10v" - 10 metre V wind component - # "q" - Specific humidity - # "r" - Relative humidity # "sp" - Surface Pressure - all_parameters <- c("2t", "tp", "10u", "10v", "q", "r", "sp") + all_parameters <- c("2t", "tp", "10u", "10v", "sp") if (tolower(parameters) == "all") { parameter_types <- all_parameters @@ -143,7 +141,7 @@ download.ECMWF <- function(outfolder, # Python script "download_ecmwf.py" to get latest forecast date and download forecast datasets - script.path = file.path(system.file("ECMWF/download_ecmwf_opendata.py", package = "PEcAn.data.atmosphere")) + script.path = file.path(system.file("ECMWF/download_ecmwf.py", package = "PEcAn.data.atmosphere")) reticulate::source_python(script.path) date_latestdata <- ecmwflatest(time, step, stream, type, all_parameters) @@ -172,68 +170,73 @@ download.ECMWF <- function(outfolder, latest_filedate )) } else { - - fname <- paste(latest_filedate, time, step, stream, type, sep = "_") - - in_filename <- file.path(outfolder, paste0(fname, ".grib2")) - data_download <- ecmwfdownload(date, time, step, stream, type, all_parameters, in_filename) - - # Python script "ecmwf_grib2nc.py" to convert grib2 to netCDF - # Converting Grib2 file to ensemble wise NetCDF file - script.path = file.path(system.file("ECMWF/ecmwf_grib2nc.py", package = "PEcAn.data.atmosphere")) - reticulate::source_python(script.path) - - out_filename <- file.path(outfolder, fname) - nc_ecmwf <- grib2nc_ecmwf(in_filename, outfolder, out_filename, lat_in, lon_in) - - - rows <- 1 - results <- data.frame( - file = character(rows), - host = character(rows), - mimetype = character(rows), - formatname = character(rows), - startdate = character(rows), - enddate = character(rows), - dbfile.name = "ECMWF", - stringsAsFactors = FALSE - ) - - firstdate_st <- latest_filedate - lastdate_st <- ymd(firstdate_st) + days(15) - - results$file[rows] <- - file.path(outfolder, out_file_names) - results$host[rows] <- PEcAn.remote::fqdn() - results$startdate[rows] <- firstdate_st - results$enddate[rows] <- lastdate_st - results$mimetype[rows] <- "application/x-netcd" - results$formatname[rows] <- "CF Meteorology" - - return(results) - + + in_fname <- paste(latest_filedate, time, stream, sep = "_") + + data_download <- ecmwfdownload(date, time, stream, type, all_parameters, in_fname) + + rows <- 85 # 48 files at 3h timestep and 36 files at 6h timestep + results <- data.frame( + file = character(rows), + host = character(rows), + mimetype = character(rows), + formatname = character(rows), + startdate = character(rows), + enddate = character(rows), + dbfile.name = "ECMWF", + stringsAsFactors = FALSE + ) + + out_3h <- list() + for (h3 in seq(0, 141, 3)){ + out_3h <- append(out_3h, paste0(in_fname,"_",h3,".grib2")) + } + + out_6h <- list() + for (h6 in seq(144, 360, 6)){ + out_6h <- append(out_6h, paste0(in_fname,"_",h6,".grib2")) + } + + for (row in 1:48){ + results$file[row] <- + file.path(outfolder, out_3h[[row]]) + results$host[row] <- PEcAn.remote::fqdn() + results$startdate[row] <- "start_date" + results$enddate[row] <- "end_date" + results$mimetype[row] <- "application/x-netcd" + results$formatname[row] <- "CF Meteorology" + } + + for (row in 1:37){ + results$file[row+48] <- + file.path(outfolder, out_6h[[row]]) + results$host[row+48] <- PEcAn.remote::fqdn() + results$startdate[row+48] <- "start_date" + results$enddate[row+48] <- "end_date" + results$mimetype[row+48] <- "application/x-netcd" + results$formatname[row+48] <- "CF Meteorology" + } + return(results) } } - ######### code to reproduce # library(reticulate) +# reticulate::conda_install(c("scipy", "xarray", "eccodes", "cfgrib", "ecmwf-opendata", "dask", "netCDF4"), envname = "r-reticulate", pip = TRUE) +# reticulate::use_condaenv("r-reticulate") +# # date <- -1 # time <- 0 -# step <- 360 # stream <- "enfo" -# type <- c("cf","pf") +# type <- c("cf", "pf") +# step_15day <- 360 +# +# all_parameters <- c("10u", "10v", "2t", "sp", "tp") +# fname <- "15day_ecmwfxxx" # -# all_parameters <- c("2t", "tp", "10u", "10v", "q", "r", "sp") -# in_filename <- "trial_ecmwfod.grib2" +# reticulate::source_python("downloadecmwf.py") # -# script.path = file.path(system.file("download_ecmwf_opendata.py", package = "PEcAn.data.atmosphere")) -# reticulate::source_python(script.path) -# data_download <- ecmwfdownload(date, time, step, stream, type, all_parameters, in_filename) +# latest <- ecmwflatest(time, step_15day, stream, type, all_parameters) # -# out_filename <- "trial_ecmwfod" -# -# script.path = file.path(system.file("ecmwf_grib2nc.py", package = "PEcAn.data.atmosphere")) -# reticulate::source_python(script.path) -# nc_ecmwf <- grib2nc_ecmwf(in_filename, outfolder, out_filename, lat_in, lon_in) \ No newline at end of file +# data_download <- ecmwfdownload(date= date, time= time, stream= stream, type= type, params= all_parameters, filename= fname) \ No newline at end of file diff --git a/modules/data.atmosphere/R/met2CF.ECMWF.R b/modules/data.atmosphere/R/met2CF.ECMWF.R new file mode 100644 index 00000000000..ef2cb5e3a11 --- /dev/null +++ b/modules/data.atmosphere/R/met2CF.ECMWF.R @@ -0,0 +1,32 @@ +#' met2CF.ECMWF +#' +#' Converts ECMWF Open Data variables to CF format. +#' +#' Variables present in the output netCDF files: +#' air_temperature, air_temperature, air_pressure, +#' precipitation_flux, eastward_wind, northward_wind +#' +#' @param lat.in latitude +#' @param lon.in longitude +#' @param outfolder Path to directory where nc files need to be saved. +#' @param overwrite Logical if files needs to be overwritten. +#' @param verbose Logical flag defining if ouput of function be extra verbose. +#' +#' @return list of files in a dataframe +#' @export +#' +#' @author Swarnalee Mazumder +#' +met2CF.ECMWF <- function(lat.in, + lon.in, + outfolder, + overwrite = FALSE, + verbose = TRUE) { + + results <- + PEcAn.data.atmosphere::mergenc_ECMWF(lat.in, + lon.in, + outfolder, + overwrite = overwrite) + return(results) +} \ No newline at end of file diff --git a/modules/data.atmosphere/inst/ECMWF/ecmwfopendata_download.py b/modules/data.atmosphere/inst/ECMWF/download_ecmwf.py similarity index 91% rename from modules/data.atmosphere/inst/ECMWF/ecmwfopendata_download.py rename to modules/data.atmosphere/inst/ECMWF/download_ecmwf.py index 99f84f71122..18f6b4c6c01 100644 --- a/modules/data.atmosphere/inst/ECMWF/ecmwfopendata_download.py +++ b/modules/data.atmosphere/inst/ECMWF/download_ecmwf.py @@ -1,3 +1,6 @@ +#This script helps in viewing the latest date for which 15 day forecast data is available on https://data.ecmwf.int/ and helps in downloading the same +#Author: Swarnalee Mazumder + def ecmwflatest(time, step15, stream, type, params): from ecmwf.opendata import Client @@ -46,7 +49,7 @@ def ecmwflatest(time, step15, stream, type, params): return latest -def ecmwfdownloadopendata(date, time, stream, type, params, filename): +def ecmwfdownload(date, time, stream, type, params, filename): """ Downloads latest ECMWF Open Data forecast based on specified forecast time, step, stream, parameters diff --git a/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py b/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py deleted file mode 100644 index e1224f47b34..00000000000 --- a/modules/data.atmosphere/inst/ECMWF/download_ecmwf_opendata.py +++ /dev/null @@ -1,88 +0,0 @@ -def ecmwflatest(time, step, stream, type, params): - - """ - Fetches latest ECMWF Open Data forecast based on specified forecast time, step, stream, parameters - - Parameters - ---------- - time (numeric) -- reference time of the forecasts. Values are 0 (00), 12. - - step (numeric) -- forecast time step. Current value: 360 - - stream (str) -- forecasting system that produces the data. Current value: `enfo` - - lat_in (numeric) -- Site coordinates, decimal degrees - - lon_in (numeric) -- Site coordinates, decimal degrees - - Returns - ------- - Latest date for which data is available. - """ - - from ecmwf.opendata import Client - - client = Client("ecmwf", beta=True) - - if stream == "mmsf": - latest= client.latest( - time = int(time), - step= step, - stream= stream, - type= type, - param= params, - ) - - elif stream == "enfo": - latest= client.latest( - time = int(time), - step= int(step), - stream= stream, - type= type, - param= params, - ) - - return latest - - -def ecmwfdownload(date, time, step, stream, type, params, filename): - - """ - Downloads latest ECMWF Open Data forecast based on specified forecast time, step, stream, parameters - - Parameters - ---------- - date (numeric) -- reference date of the forecasts. Values are 0 (today), -1 (yesterday), -2 (day before yesterday) - - time (numeric) -- reference time of the forecasts. Values are 0 (00), 12. - - step (numeric) -- forecast time step. Current value: 360 - - stream (str) -- forecasting system that produces the data. Current value: `enfo` - - type (str) -- the type of data. Current values: `cf`, `pf` - - params (str) -- the meteorological parameters, such as wind, pressure or humidity. Current parameters are "2t", "tp", "10u", "10v", "q", "r", "sp". - - filename (str) -- output filename for the Grib2 file - - Returns - ------- - Downloads ECMWF Open Data forecast for the latest date for which forecast data is available. - """ - - from ecmwf.opendata import Client - - client = Client("ecmwf", beta=True) - - file = client.retrieve( - date= int(date), - time= int(time), - step= int(step), - stream= stream, - type= type, - param= params, - target= filename - ) - - return file diff --git a/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py b/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py deleted file mode 100644 index d01f38b17f3..00000000000 --- a/modules/data.atmosphere/inst/ECMWF/ecmwf_grib2nc.py +++ /dev/null @@ -1,200 +0,0 @@ -def grib2nc_ecmwf(in_filename, outfolder, out_filename, lat_in, lon_in): - - """ - Converts ECMWF Open Data 15 day forecast from Grib2 into PEcAn standard NetCDF - - Parameters - ---------- - in_filename (str) -- Grib2 file consisting of forecast data - - outfolder (str) -- path to the directory where the output file is stored. If specified directory does not exists, it is created. - - out_filename (str) -- filename of output NetCDF file - - lat_in (numeric) -- Site coordinates, decimal degrees - - lon_in (numeric) -- Site coordinates, decimal degrees - - Returns - ------- - output netCDF in CF Format which is saved in the specified directory. - - - Example Usage: - /don't run - - grib2nc_ecmwf(in_filename, - outfolder, - out_filename, - 0, - 180) - """ - - import os - import xarray - import cfgrib - - in_filename = str(in_filename) - - out_filename = str(out_filename) - - lat_in = float(lat_in) - lon_in = float(lon_in) - - - # if specified output directory does not exist create it - if not os.path.exists(outfolder): - os.makedirs(outfolder, exist_ok=True) - - - # "dataType" needs to be defined to correctly open the grib file - # https://github.com/ecmwf/cfgrib/issues/285 - - ######### Controlled Forecast - - # 10 metre U wind component - cf_u10 = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'10u'}}) - - # 10 metre V wind component - cf_v10 = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'10v'}}) - - # 2 metre temperature - cf_t2m = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'2t'}}) - - # total precipitation - cf_tp = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'tp'}}) - - # Surface pressure - cf_sp = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'sp'}}) - - # Specific humidity - cf_q = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'q'}}) - - # Relative humidity - cf_r = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'r'}}) - - - # https://docs.xarray.dev/en/stable/generated/xarray.merge.html - # “override”: skip comparing and copy attrs from the first dataset to the result - cf_allvar = xarray.merge([cf_u10, cf_v10, cf_t2m, cf_tp, cf_sp, cf_q, cf_r], - compat='override') - - - # total precipitation unit as per CF standard - # all other variables already in CF standard units - cf_allvar.tp.attrs.update({'GRIB_units': 'kg m**-2 s**-1', 'units': 'kg m**-2 s**-1'}) - attrs_tp_cf = cf_allvar.tp.attrs - - - # 1 m = 1/86.4 kg/m2/s - cf_allvar['tp'] = cf_allvar.tp / 86.4 - cf_allvar.tp.attrs = attrs_tp_cf - - - # variable name as per CF standard - cf_allvar_CF_name_standard = cf_allvar.rename(name_dict={'u10':'eastward_wind', - 'v10':'northward_wind', - 't2m':'air_temperature', - 'tp':'precipitation_flux', - 'sp':'air_pressure', - 'q':'specific_humidity', - 'r':'relative_humidity'}) - - - cf_allvar_CF_lat_lon = cf_allvar_CF_name_standard.sel(latitude=lat_in, longitude=lon_in, method="nearest") - - - cf_save_path = os.path.join( - outfolder, - out_filename - + "_cf" - + ".nc" - ) - - - # Creates netCDF file corresponding to "dataType":"cf" - cf_nc = cf_allvar_CF_lat_lon.to_netcdf(cf_save_path) - - - ######### Perturbed Forecast - - # 10 metre U wind component - pf_u10 = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'10u'}}) - - # 10 metre V wind component - pf_v10 = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'10v'}}) - - # 2 metre temperature - pf_t2m = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'2t'}}) - - # total precipitation - pf_tp = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'tp'}}) - - # Surface pressure - pf_sp = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'sp'}}) - - # Specific humidity - pf_q = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'q'}}) - - # Relative humidity - pf_r = xarray.open_dataset(in_filename, engine='cfgrib', - backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'r'}}) - - - # https://docs.xarray.dev/en/stable/generated/xarray.merge.html - # “override”: skip comparing and copy attrs from the first dataset to the result - pf_allvar = xarray.merge([pf_u10, pf_v10, pf_t2m, pf_tp, pf_sp, pf_q, pf_r], - compat='override') - - - # total precipitation unit as per CF standard - # all other variables already in CF standard units - pf_allvar.tp.attrs.update({'GRIB_units': 'kg m**-2 s**-1', 'units': 'kg m**-2 s**-1'}) - attrs_tp_pf = pf_allvar.tp.attrs - - - # 1 m = 1/86.4 kg/m2/s - pf_allvar['tp'] = pf_allvar.tp / 86.4 - pf_allvar.tp.attrs = attrs_tp_pf - - - # variable name as per CF standard - pf_allvar_CF_name_standard = pf_allvar.rename(name_dict={'u10':'eastward_wind', - 'v10':'northward_wind', - 't2m':'air_temperature', - 'tp':'precipitation_flux', - 'sp':'air_pressure', - 'q':'specific_humidity', - 'r':'relative_humidity'}) - - - - # Creates 50 netCDF files corresponding to 50 ensemble members in "dataType":"pf" (perturbed forecast) - for i in range(50): - - pf_allvar_ens = pf_allvar_CF_name_standard.isel(number=i) - - pf_allvar_lat_lon = pf_allvar_ens.sel(latitude=lat_in, longitude=lon_in, method="nearest") - - pf_save_path = os.path.join( - outfolder, - out_filename - + "_pf_ens_" - + str(i+1) - + ".nc" - ) - - pf_allvar_lat_lon.to_netcdf(pf_save_path) diff --git a/modules/data.atmosphere/inst/ECMWF/met2CFutils.py b/modules/data.atmosphere/inst/ECMWF/met2CFutils.py new file mode 100644 index 00000000000..a1528fc6ac3 --- /dev/null +++ b/modules/data.atmosphere/inst/ECMWF/met2CFutils.py @@ -0,0 +1,175 @@ +#The functions contained in this script help in extraction of CF and PF data from GRIB2 files and conversion to CF standard +#Author: Swarnalee Mazumder + +def all_filenames_3h(filename): + files_3 = [] + + for i in range(0, 142,3): + files_3.append(filename+"_"+str(i)+".grib2") + + return files_3 + +def all_filenames_6h(filename): + files_6 = [] + for j in range(144, 366, 6): + files_6.append(filename+"_"+str(j)+".grib2") + + return files_6 + +def combine_files_cf(files, lat_in, lon_in, out_filename): + + """ + Converts ECMWF Open Data 15 day controlled forecast data from Grib2 into PEcAn standard NetCDF + + Parameters + ---------- + in_filename (str) -- Grib2 file consisting of forecast data + + outfolder (str) -- path to the directory where the output file is stored. If specified directory does not exists, it is created. + + out_filename (str) -- filename of output NetCDF file + + lat_in (numeric) -- Site coordinates, decimal degrees + + lon_in (numeric) -- Site coordinates, decimal degrees + + Returns + ------- + output netCDF files are saved in the specified directory. + """ + + import os + import xarray + import cfgrib + + out_filename = str(out_filename) + + lat_in = float(lat_in) + lon_in = float(lon_in) + + if not os.path.exists(outfolder): + os.makedirs(outfolder, exist_ok=True) + + cf_u10 = xarray.open_mfdataset(files, engine = 'cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'10u'}}) + + cf_v10 = xarray.open_mfdataset(files, engine = 'cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'10v'}}) + + cf_t2m = xarray.open_mfdataset(files, engine = 'cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'2t'}}) + + cf_sp = xarray.open_mfdataset(files, engine = 'cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'sp'}}) + + cf_tp = xarray.open_mfdataset(files, engine = 'cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'tp'}}) + + cf = xarray.merge([cf_u10, cf_v10, cf_t2m, cf_sp, cf_tp], + compat = 'override') + + # total precipitation unit as per CF standard + # all other variables already in CF standard units + cf.tp.attrs.update({'GRIB_units': 'kg m**-2 s**-1', 'units': 'kg m**-2 s**-1'}) + attrs_tp_cf = cf.tp.attrs + + + # 1 m = 1/86.4 kg/m2/s + cf['tp'] = cf.tp / 86.4 + cf.tp.attrs = attrs_tp_cf + + + cf_lat_lon = cf.sel(latitude = lat_in, longitude = lon_in, method = "nearest") + + + cf_save_path = os.path.join( + outfolder, + out_filename + + "_cf" + + ".nc" + ) + + cf_lat_lon.to_netcdf(cf_save_path) + + return cf_save_path + +def combine_files_pf(files, lat_in, lon_in, out_filename): + + """ + Converts ECMWF Open Data 15 day perturbed forecast from Grib2 into PEcAn standard NetCDF + + Parameters + ---------- + in_filename (str) -- Grib2 file consisting of forecast data + + outfolder (str) -- path to the directory where the output file is stored. If specified directory does not exists, it is created. + + out_filename (str) -- filename of output NetCDF file + + lat_in (numeric) -- Site coordinates, decimal degrees + + lon_in (numeric) -- Site coordinates, decimal degrees + + Returns + ------- + output netCDF files are saved in the specified directory. + """ + import os + import xarray + import cfgrib + + out_filename = str(out_filename) + + lat_in = float(lat_in) + lon_in = float(lon_in) + + if not os.path.exists(outfolder): + os.makedirs(outfolder, exist_ok=True) + + ######### Perturbed Forecast + # 10 metre U wind component + pf_u10 = xarray.open_mfdataset(files, engine='cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'10u'}}) + + # 10 metre V wind component + pf_v10 = xarray.open_mfdataset(files, engine='cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'10v'}}) + + # 2 metre temperature + pf_t2m = xarray.open_mfdataset(files, engine='cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'2t'}}) + + # Surface pressure + pf_sp = xarray.open_mfdataset(files, engine='cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'sp'}}) + + # total precipitation + pf_tp = xarray.open_mfdataset(files, engine='cfgrib', concat_dim="valid_time",combine="nested", + backend_kwargs={'filter_by_keys': {'dataType': 'pf', 'shortName':'tp'}}) + + + pf = xarray.merge([pf_u10, pf_v10, pf_t2m, pf_sp, pf_tp], + compat = 'override') + + + pf.tp.attrs.update({'GRIB_units': 'kg m**-2 s**-1', 'units': 'kg m**-2 s**-1'}) + attrs_tp_pf = pf.tp.attrs + + # 1 m = 1/86.4 kg/m2/s + pf['tp'] = pf.tp / 86.4 + pf.tp.attrs = attrs_tp_pf + + + pf_lat_lon = pf.sel(latitude = lat_in, longitude = lon_in, method = "nearest") + + + pf_save_path = os.path.join( + outfolder, + out_filename + + "_cf" + + ".nc" + ) + + pf_lat_lon.to_netcdf(out_filename) + + return pf_save_path From 312ecd4a711c54e1bc5924038c01f9bf7df12bbf Mon Sep 17 00:00:00 2001 From: Swarnalee Mazumder Date: Thu, 15 Sep 2022 01:57:31 -0500 Subject: [PATCH 09/11] removed creation of intermediate cf pf netCDF files --- .../R/ECMWF_helper_functions.R | 123 +++++++++--------- modules/data.atmosphere/R/met2CF.ECMWF.R | 4 + .../data.atmosphere/inst/ECMWF/met2CFutils.py | 49 +------ 3 files changed, 76 insertions(+), 100 deletions(-) diff --git a/modules/data.atmosphere/R/ECMWF_helper_functions.R b/modules/data.atmosphere/R/ECMWF_helper_functions.R index 2b72c321301..4c77639d320 100644 --- a/modules/data.atmosphere/R/ECMWF_helper_functions.R +++ b/modules/data.atmosphere/R/ECMWF_helper_functions.R @@ -34,6 +34,11 @@ mergenc_ECMWF <- function(lat.in, overwrite = FALSE, verbose = TRUE) { + if (!file.exists(outfolder)) { + dir.create(outfolder, showWarnings = FALSE, recursive = TRUE) + } + + # Python script "download_ecmwf.py" to get latest forecast date and download forecast datasets script.path = file.path(system.file("ECMWF/download_ecmwf.py", package = "PEcAn.data.atmosphere")) reticulate::source_python(script.path) @@ -62,52 +67,56 @@ mergenc_ECMWF <- function(lat.in, ######### CF ######### ##### 3h CF -> 0 - 141h at 3h timestep - cf_3h_out_fname <- paste0(latest_filedate, time, stream, "cf_3h", sep = "_") - cf_3h_nc <- combine_files_cf(fnames_3h, lat_in, lon_in, out_filename= cf_3h_out_fname) + cf_3h <- combine_files_cf(fnames_3h, lat_in, lon_in) + names(cf_3h) <- c("valid_time", "lon", "lat", "u10", "v10", "t2m", "sp", "tp") + + days_since <- paste("days since", cf_3h$valid_time$time$data, "00:00:00") + cf_3h$valid_time <- cf_3h$valid_time$values ##### 3h CF -> 144h - 360h at 6h timestep - cf_6h_out_fname <- paste0(latest_filedate, time, stream, "cf_6h", sep = "_") - cf_6h_nc <- combine_files_cf(fnames_6h, lat_in, lon_in, out_filename= cf_6h_out_fname) + cf_6h_nc <- combine_files_cf(fnames_6h, lat_in, lon_in) + names(cf_6h) <- c("valid_time", "lon", "lat", "u10", "v10", "t2m", "sp", "tp") - cf_nc3 <- ncdf4::nc_open(cf_3h_nc) - cf_nc6 <- ncdf4::nc_open(cf_6h_nc) + cf_6h$valid_time <- cf_6h$valid_time$values - cf_nc_fname <- paste0(latest_filedate, time, stream, "cf.nc", sep = "_") + ### Final CF netCDF filename + cf_nc_fname <- paste(latest_filedate, time, stream, "cf.nc", sep = "_") - lat <- ncdf4::ncvar_get(cf_nc6, "latitude") - lon <- ncdf4::ncvar_get(cf_nc6, "longitude") + # values from cf_3h + lat <- cf_3h$lat + lon <- cf_3h$lon validtime <- seq(0, 360, 3) - validtimeunits <- ncdf4::ncatt_get(cf_nc3,"valid_time","units")$value + validtimeunits <- cf_3h$valid_time # create and write the netCDF file -- ncdf4 version # define dimensions londim <- ncdf4::ncdim_def("longitude","degrees_east",as.double(lon)) latdim <- ncdf4::ncdim_def("latitude","degrees_north",as.double(lat)) timedim <- ncdf4::ncdim_def("valid_time",validtimeunits,as.double(validtime)) - time <- ncdf4::ncdim_def("time", "days since 2022-08-08 00:00:00", as.double(validtime)) - + time <- ncdf4::ncdim_def("time", days_since, as.double(validtime)) + # define variables fillvalue <- NA - cf_u10_3 <- ncdf4::ncvar_get(cf_nc3, "u10") - cf_u10_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "u10")) + cf_u10_3 <- cf_3h$u10 + cf_u10_6_nan <- nanfill(cf_6h$u10) cf_u10 <- c(cf_u10_3, cf_u10_6_nan) - cf_v10_3 <- ncdf4::ncvar_get(cf_nc3, "v10") - cf_v10_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "v10")) + cf_v10_3 <- cf_3h$v10 + cf_v10_6_nan <- nanfill(cf_6h$v10) cf_v10 <- c(cf_v10_3, cf_v10_6_nan) - cf_t2m_3 <- ncdf4::ncvar_get(cf_nc3, "t2m") - cf_t2m_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "t2m")) - cf_t2m <- c(t2m_3, t2m_6_nan) + cf_t2m_3 <- cf_3h$t2m + cf_t2m_6_nan <- nanfill(cf_6h$t2m) + cf_t2m <- c(cf_t2m_3, cf_t2m_6_nan) - sp_3 <- ncdf4::ncvar_get(cf_nc3, "sp") - sp_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "sp")) - cf_sp <- c(sp_3, sp_6_nan) + cf_sp_3 <- cf_3h$sp + cf_sp_6_nan <- nanfill(cf_6h$tp) + cf_sp <- c(cf_sp_3, cf_sp_6_nan) - tp_3 <- ncdf4::ncvar_get(cf_nc3, "tp") - tp_6_nan <- nanfill(ncdf4::ncvar_get(cf_nc6, "tp")) - cf_tp <- c(tp_3, tp_6_nan) + cf_tp_3 <- cf_3h$tp + cf_tp_6_nan <- nanfill(cf_6h$tp) + cf_tp <- c(cf_tp_3, cf_tp_6_nan) dlname <- "Eastward Component of Wind" u10.def <- ncdf4::ncvar_def("eastward_wind","m s**-1",list(timedim),fillvalue,dlname,prec="double") @@ -124,7 +133,7 @@ mergenc_ECMWF <- function(lat.in, dlname <- "Rainfall rate" tp.def <- ncdf4::ncvar_def("precipitation_flux","kg m**-2 s**-1",list(timedim),fillvalue,dlname,prec="double") - if (!file.exists(ncfname_pf) || overwrite) { + if (!file.exists(cf_nc_fname) || overwrite) { tryCatch({ @@ -138,19 +147,12 @@ mergenc_ECMWF <- function(lat.in, ncdf4::ncvar_put(ncout_cf,sp.def, cf_sp, verbose = TRUE) ncdf4::ncvar_put(ncout_cf,tp.def, cf_tp, verbose = TRUE) - gribed <- ncatt_get(cf_nc6,0,"GRIB_edition") - institution <- ncatt_get(cf_nc6,0,"GRIB_centreDescription") - datasource <- ncatt_get(cf_nc6,0,"GRIB_centre") - history <- ncatt_get(cf_nc6,0,"history") - Conventions <- ncatt_get(cf_nc6,0,"Conventions") - # add global attributes - ncdf4::ncatt_put(ncout_cf,0,"GRIB_edition",gribed$value) - ncdf4::ncatt_put(ncout_cf,0,"Institution",institution$value) - ncdf4::ncatt_put(ncout_cf,0,"Source",datasource$value) + ncdf4::ncatt_put(ncout_cf,0,"GRIB_edition",2) + ncdf4::ncatt_put(ncout_cf,0,"Institution","ecmwf") + ncdf4::ncatt_put(ncout_cf,0,"Source","European Centre for Medium-Range Weather Forecasts") history <- paste("PEcAn Project", date(Sys.Date()), sep=", ") - ncdf4::ncatt_put(ncout_cf,0,"history",history) - ncdf4::ncatt_put(ncout_cf,0,"Conventions",Conventions$value) + ncdf4::ncatt_put(ncout_cf,0,"Conventions","CF-1.7") ncdf4::nc_close(ncout_cf) # writes ensemble wise pf files to disk @@ -196,34 +198,40 @@ mergenc_ECMWF <- function(lat.in, ##### 3h PF -> 0 - 141h at 3h timestep pf_3h_out_fname <- paste(latest_filedate, time, stream, "pf_3h", sep = "_") - pf_3h_nc <- combine_files_cf(fnames_3h, lat_in, lon_in, out_filename= pf_3h_out_fname) + pf_3h <- combine_files_pf(fnames_3h, lat_in, lon_in) + names(pf_3h) <- c("valid_time", "lon", "lat", "u10", "v10", "t2m", "sp", "tp") + + days_since <- paste("days since", pf_3h$valid_time$time$data, "00:00:00") + pf_3h$valid_time <- pf_3h$valid_time$values ##### 6h PF -> 144h - 360h at 6h timestep pf_6h_out_fname <- paste(latest_filedate, time, stream, "pf_6h", sep = "_") - pf_6h_nc <- combine_files_cf(fnames_6h, lat_in, lon_in, out_filename= pf_6h_out_fname) + pf_6h <- combine_files_pf(fnames_6h, lat_in, lon_in) + names(pf_6h) <- c("valid_time", "lon", "lat", "u10", "v10", "t2m", "sp", "tp") + + pf_6h$valid_time <- pf_6h$valid_time$values - pf_nc3 <- ncdf4::nc_open(pf_3h_nc) - pf_nc6 <- ncdf4::nc_open(pf_6h_nc) + + ##### Creating ensemble wise netCDF files - # Creating ensemble wise netCDF files - pf_u10_3 <- t(ncdf4::ncvar_get(pf_nc3, "u10")) - pf_u10_6 <- t(ncdf4::ncvar_get(pf_nc6, "u10")) + pf_u10_3 <- t(pf_3h$u10) + pf_u10_6 <- t(pf_6h$u10) - pf_v10_3 <- t(ncdf4::ncvar_get(pf_nc3, "v10")) - pf_v10_6 <- t(ncdf4::ncvar_get(pf_nc6, "v10")) + pf_v10_3 <- t(pf_3h$v10) + pf_v10_6 <- t(pf_6h$v10) - pf_t2m_3 <- t(ncdf4::ncvar_get(pf_nc3, "t2m")) - pf_t2m_6 <- t(ncdf4::ncvar_get(pf_nc6, "t2m")) + pf_t2m_3 <- t(pf_3h$t2m) + pf_t2m_6 <- t(pf_6h$t2m) - pf_sp_3 <- t(ncdf4::ncvar_get(pf_nc3, "sp")) - pf_sp_6 <- t(ncdf4::ncvar_get(pf_nc6, "sp")) + pf_sp_3 <- t(pf_3h$sp) + pf_sp_6 <- t(pf_6h$sp) - pf_tp_3 <- t(ncdf4::ncvar_get(pf_nc3, "tp")) - pf_tp_6 <- t(ncdf4::ncvar_get(pf_nc6, "tp")) + pf_tp_3 <- t(pf_3h$tp) + pf_tp_6 <- t(pf_6h$tp) for (ens in 1:50){ - ncfname_pf <- paste0(latest_filedate, time, stream, "pf_ens", ens, sep = "_") + ncfname_pf <- paste(latest_filedate, time, stream, "pf_ens", ens, sep = "_") if (!file.exists(ncfname_pf) || overwrite) { @@ -246,12 +254,11 @@ mergenc_ECMWF <- function(lat.in, ncdf4::ncvar_put(ncout_pf,tp.def, pf_tp, verbose = TRUE) # add global attributes - ncdf4::ncatt_put(ncout_pf,0,"GRIB_edition",gribed$value) - ncdf4::ncatt_put(ncout_pf,0,"Institution",institution$value) - ncdf4::ncatt_put(ncout_pf,0,"Source",datasource$value) + ncdf4::ncatt_put(ncout_pf,0,"GRIB_edition",2) + ncdf4::ncatt_put(ncout_pf,0,"Institution","ecmwf") + ncdf4::ncatt_put(ncout_pf,0,"Source","European Centre for Medium-Range Weather Forecasts") history <- paste("PEcAn Project", date(Sys.Date()), sep=", ") - ncdf4::ncatt_put(ncout_pf,0,"history",history) - ncdf4::ncatt_put(ncout_pf,0,"Conventions",Conventions$value) + ncdf4::ncatt_put(ncout_pf,0,"Conventions","CF-1.7") ncdf4::nc_close(ncout_pf) # writes ensemble wise pf files to disk diff --git a/modules/data.atmosphere/R/met2CF.ECMWF.R b/modules/data.atmosphere/R/met2CF.ECMWF.R index ef2cb5e3a11..fa9504f3f83 100644 --- a/modules/data.atmosphere/R/met2CF.ECMWF.R +++ b/modules/data.atmosphere/R/met2CF.ECMWF.R @@ -23,6 +23,10 @@ met2CF.ECMWF <- function(lat.in, overwrite = FALSE, verbose = TRUE) { + if (!file.exists(outfolder)) { + dir.create(outfolder, showWarnings = FALSE, recursive = TRUE) + } + results <- PEcAn.data.atmosphere::mergenc_ECMWF(lat.in, lon.in, diff --git a/modules/data.atmosphere/inst/ECMWF/met2CFutils.py b/modules/data.atmosphere/inst/ECMWF/met2CFutils.py index a1528fc6ac3..8957559dc52 100644 --- a/modules/data.atmosphere/inst/ECMWF/met2CFutils.py +++ b/modules/data.atmosphere/inst/ECMWF/met2CFutils.py @@ -16,7 +16,7 @@ def all_filenames_6h(filename): return files_6 -def combine_files_cf(files, lat_in, lon_in, out_filename): +def combine_files_cf(files, lat_in, lon_in): """ Converts ECMWF Open Data 15 day controlled forecast data from Grib2 into PEcAn standard NetCDF @@ -25,30 +25,22 @@ def combine_files_cf(files, lat_in, lon_in, out_filename): ---------- in_filename (str) -- Grib2 file consisting of forecast data - outfolder (str) -- path to the directory where the output file is stored. If specified directory does not exists, it is created. - - out_filename (str) -- filename of output NetCDF file - lat_in (numeric) -- Site coordinates, decimal degrees lon_in (numeric) -- Site coordinates, decimal degrees Returns ------- - output netCDF files are saved in the specified directory. + Arrays of variables """ import os import xarray import cfgrib - out_filename = str(out_filename) - lat_in = float(lat_in) lon_in = float(lon_in) - if not os.path.exists(outfolder): - os.makedirs(outfolder, exist_ok=True) cf_u10 = xarray.open_mfdataset(files, engine = 'cfgrib', concat_dim="valid_time",combine="nested", backend_kwargs={'filter_by_keys': {'dataType': 'cf', 'shortName':'10u'}}) @@ -81,19 +73,10 @@ def combine_files_cf(files, lat_in, lon_in, out_filename): cf_lat_lon = cf.sel(latitude = lat_in, longitude = lon_in, method = "nearest") - - cf_save_path = os.path.join( - outfolder, - out_filename - + "_cf" - + ".nc" - ) - - cf_lat_lon.to_netcdf(cf_save_path) - - return cf_save_path + return cf_lat_lon["valid_time"], cf_lat_lon["longitude"].values, cf_lat_lon["latitude"].values, cf_lat_lon['u10'].values, cf_lat_lon["v10"].values, cf_lat_lon["t2m"].values, cf_lat_lon["sp"].values, cf_lat_lon["tp"].values + -def combine_files_pf(files, lat_in, lon_in, out_filename): +def combine_files_pf(files, lat_in, lon_in): """ Converts ECMWF Open Data 15 day perturbed forecast from Grib2 into PEcAn standard NetCDF @@ -102,30 +85,21 @@ def combine_files_pf(files, lat_in, lon_in, out_filename): ---------- in_filename (str) -- Grib2 file consisting of forecast data - outfolder (str) -- path to the directory where the output file is stored. If specified directory does not exists, it is created. - - out_filename (str) -- filename of output NetCDF file - lat_in (numeric) -- Site coordinates, decimal degrees lon_in (numeric) -- Site coordinates, decimal degrees Returns ------- - output netCDF files are saved in the specified directory. + Arrays of variables """ import os import xarray import cfgrib - out_filename = str(out_filename) - lat_in = float(lat_in) lon_in = float(lon_in) - if not os.path.exists(outfolder): - os.makedirs(outfolder, exist_ok=True) - ######### Perturbed Forecast # 10 metre U wind component pf_u10 = xarray.open_mfdataset(files, engine='cfgrib', concat_dim="valid_time",combine="nested", @@ -162,14 +136,5 @@ def combine_files_pf(files, lat_in, lon_in, out_filename): pf_lat_lon = pf.sel(latitude = lat_in, longitude = lon_in, method = "nearest") - - pf_save_path = os.path.join( - outfolder, - out_filename - + "_cf" - + ".nc" - ) + return pf_lat_lon["valid_time"], pf_lat_lon["longitude"].values, pf_lat_lon["latitude"].values, pf_lat_lon['u10'].values, pf_lat_lon["v10"].values, pf_lat_lon["t2m"].values, pf_lat_lon["sp"].values, pf_lat_lon["tp"].values - pf_lat_lon.to_netcdf(out_filename) - - return pf_save_path From 4646cbce2b32c34b1ee718841869178cc554b02b Mon Sep 17 00:00:00 2001 From: Swarnalee Mazumder Date: Thu, 15 Sep 2022 08:31:17 -0500 Subject: [PATCH 10/11] changed function name --- modules/data.atmosphere/R/ECMWF_helper_functions.R | 2 +- modules/data.atmosphere/R/met2CF.ECMWF.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data.atmosphere/R/ECMWF_helper_functions.R b/modules/data.atmosphere/R/ECMWF_helper_functions.R index 4c77639d320..6bc1b58a994 100644 --- a/modules/data.atmosphere/R/ECMWF_helper_functions.R +++ b/modules/data.atmosphere/R/ECMWF_helper_functions.R @@ -28,7 +28,7 @@ nanfill <- function(variable) { # nanfill function fills NaN at every 3h period return(len3nan) } -mergenc_ECMWF <- function(lat.in, +combine2nc_ECMWF <- function(lat.in, lon.in, outfolder, overwrite = FALSE, diff --git a/modules/data.atmosphere/R/met2CF.ECMWF.R b/modules/data.atmosphere/R/met2CF.ECMWF.R index fa9504f3f83..ecaa7932aa5 100644 --- a/modules/data.atmosphere/R/met2CF.ECMWF.R +++ b/modules/data.atmosphere/R/met2CF.ECMWF.R @@ -28,7 +28,7 @@ met2CF.ECMWF <- function(lat.in, } results <- - PEcAn.data.atmosphere::mergenc_ECMWF(lat.in, + PEcAn.data.atmosphere::combine2nc_ECMWF(lat.in, lon.in, outfolder, overwrite = overwrite) From fff01ec860b46529db46baf3ed1b1485a67c8c75 Mon Sep 17 00:00:00 2001 From: Swarnalee Mazumder Date: Thu, 15 Sep 2022 09:25:29 -0500 Subject: [PATCH 11/11] registration file --- .../inst/registration/register.ECMWF.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 modules/data.atmosphere/inst/registration/register.ECMWF.xml diff --git a/modules/data.atmosphere/inst/registration/register.ECMWF.xml b/modules/data.atmosphere/inst/registration/register.ECMWF.xml new file mode 100644 index 00000000000..726f4dcdb9e --- /dev/null +++ b/modules/data.atmosphere/inst/registration/register.ECMWF.xml @@ -0,0 +1,12 @@ + + + regional + 51 + TRUE + + 33 + CF Meteorology + application/x-netcdf + nc + + \ No newline at end of file