From 6647ca633dd7124117fb4de4334b15c75569cde8 Mon Sep 17 00:00:00 2001 From: "Zhao, Xin" Date: Sun, 6 Aug 2023 21:17:21 -0400 Subject: [PATCH] package processing updates moving preprocessing into the package structure to ensure traceability. However, vector size could be a concern, requiring larger memory. --- NAMESPACE | 3 +- R/utils-data.R | 55 + R/xfaostat_L100_constants.R | 6 + R/xfaostat_L101_RawDataPreProcessing1.R | 196 +++ R/xfaostat_L101_RawDataPreProcessing2.R | 178 +++ R/xfaostat_L101_RawDataPreProcessing3.R | 142 +++ R/xfaostat_helper_funcs.R | 521 ++++++++ R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R | 1108 +++++++++++++++++ R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R | 1034 --------------- ...nerate_package_data_faostat_helper_funcs.R | 8 +- inst/extdata/aglu/FAO/SUA_item_code_map.csv | 540 ++++++++ ..._AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd | 4 + man/get_data_list.Rd | 26 + 13 files changed, 2784 insertions(+), 1037 deletions(-) create mode 100644 R/xfaostat_L100_constants.R create mode 100644 R/xfaostat_L101_RawDataPreProcessing1.R create mode 100644 R/xfaostat_L101_RawDataPreProcessing2.R create mode 100644 R/xfaostat_L101_RawDataPreProcessing3.R create mode 100644 R/xfaostat_helper_funcs.R create mode 100644 R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R delete mode 100644 R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R create mode 100644 inst/extdata/aglu/FAO/SUA_item_code_map.csv create mode 100644 man/get_data_list.Rd diff --git a/NAMESPACE b/NAMESPACE index 87694965..9c2cb966 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,11 +28,11 @@ export(standardize_iso) export(unprotect_integer_cols) importFrom(assertthat,assert_that) importFrom(data.table,data.table) -importFrom(dplyr,arrange) importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) importFrom(dplyr,distinct) importFrom(dplyr,filter) +importFrom(dplyr,first) importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,group_by_at) @@ -48,6 +48,7 @@ importFrom(dplyr,right_join) importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(dplyr,summarise_all) +importFrom(dplyr,summarize) importFrom(dplyr,ungroup) importFrom(dplyr,vars) importFrom(grDevices,rainbow) diff --git a/R/utils-data.R b/R/utils-data.R index 74b1f970..443de588 100644 --- a/R/utils-data.R +++ b/R/utils-data.R @@ -233,6 +233,61 @@ get_data <- function(all_data, name, strip_attributes = FALSE) { } } +#' get_data_list +#' +#' This function calls \code{get_data} for each data name in \code{data_list} and assigns +#' the resulting data into the given \code{environ} with the data name being used as the +#' variable name. Note: for values in data_list "named" FILE the "basename" of the string +#' will be used as the variable name. +#' +#' @param all_data Data structure +#' @param data_list A character vector of data to load into the given environment +#' @param strip_attributes A logical vector which will be passed on to \code{get_data}. The length +#' must be 1 (use the save logical for all values of data_list) or match the length of +#' \code{data_list} (one logical value for each data_list item). +#' @param environ The environment into which the data should be loaded. If NULL (the default) +#' the caller's environment will be used. +get_data_list <- function(all_data, data_list, strip_attributes = FALSE, environ = NULL) { + # expecting a (potentially named) character vector for data_list + assertthat::assert_that(is.character(data_list)) + data_list_names <- names(data_list) + + # strip_attributes must be logical and either be length 1 (in which case the same value + # will be used for all calls to get_data) or match the length of data_list (values will be + # matched up per index) + assertthat::assert_that(is.logical(strip_attributes)) + assertthat::assert_that(length(strip_attributes) == 1 || length(strip_attributes) == length(data_list)) + if(length(strip_attributes) == 1) { + strip_attributes = rep_len(strip_attributes, length(data_list)) + } + + # if no environment was explicitly given the default behavior is to load into the caller's + # environment + if(is.null(environ)) { + environ = parent.frame() + } + + # loop over each data_list, call get_data and assign the result to the same data name in + # the given environment + for(i in seq_along(data_list)) { + curr_var_name <- data_list[i] + # the variable name to assign for FILE is the "basename" of the file + # i.e. `FILE = "common/GCAM_region_names"` will result in `GCAM_region_names` being set + if(!is.null(data_list_names) && data_list_names[i] %in% c("FILE", "OPTIONAL_FILE")) { + # Note: strsplit returns a list (one per each str to be split) of character vector + # (one for each token split out). Given we are split one string at a time + # we will just grab the first element of the list (`[[1]]`) + data_name_split = strsplit(curr_var_name, "/")[[1]] + # get the last element of the char vec to use as the var name + curr_var_name = tail(data_name_split, n = 1) + } + # get the data + data = get_data(all_data, data_list[i], strip_attributes[i]) + # assign it into the environment + assign(curr_var_name, data, envir = environ) + } +} + #' return_data #' diff --git a/R/xfaostat_L100_constants.R b/R/xfaostat_L100_constants.R new file mode 100644 index 00000000..86270287 --- /dev/null +++ b/R/xfaostat_L100_constants.R @@ -0,0 +1,6 @@ +# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. + +# General behavior constants ====================================================================== + +# having issues with package check here + DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata") diff --git a/R/xfaostat_L101_RawDataPreProcessing1.R b/R/xfaostat_L101_RawDataPreProcessing1.R new file mode 100644 index 00000000..b9b2acaa --- /dev/null +++ b/R/xfaostat_L101_RawDataPreProcessing1.R @@ -0,0 +1,196 @@ +# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. + +#' module_xfaostat_L101_RawDataPreProcessing1 +#' +#' Preprocess raw faostat data +#' +#' @param command API command to execute +#' @param ... other optional parameters, depending on command +#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if +#' \code{command} is "MAKE") all the generated outputs +#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities. +#' @importFrom assertthat assert_that +#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at +#' first case_when vars +#' @importFrom tibble tibble +#' @importFrom tidyr complete drop_na gather nesting spread replace_na +#' @author XZ 2023 +module_xfaostat_L101_RawDataPreProcessing1 <- function(command, ...) { + + MODULE_INPUTS <- + c(FILE = "aglu/AGLU_ctry") + + MODULE_OUTPUTS <- + c("QCL", # Ag production quantity and harvested area + "PP", # Producer prices + "SCL", # Supply utilization accounting + "FBS", # New food balance sheet + "QCL_area_code_map" # Country code + ) + + if(command == driver.DECLARE_INPUTS) { + return(MODULE_INPUTS) + } else if(command == driver.DECLARE_OUTPUTS) { + return(MODULE_OUTPUTS) + } else if(command == driver.MAKE) { + + year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check. + + all_data <- list(...)[[1]] + + # Load required inputs ---- + + get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE) + + #source("data-raw/generate_package_data_faostat_helper_funcs.R") + #DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata.faostat") + + # *[QCL] FAOSTAT Production and area ---- + + ## Load raw data + FAOSTAT_load_raw_data(DATASETCODE = "QCL", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT) + + + QCL %>% + # Remove aggregated areas and items + filter(area_code < 350, item_code < 1700) %>% + select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% + # When dealing with animal/livestock data, units are important + # Prod Popultn (5314) for Beewax and honey is removed since data is only before 1990 + filter(element_code != 5314) %>% + # Remove NA for simplicity for now; expend.grid later + # All Coir (coconut fiber) is filtered out due to NA + filter(!is.na(value)) %>% + # remove accent + rm_accent("item", "area") -> QCL1 + + + QCL1 %>% + distinct(area_code, area) %>% + add_title("FAO primary production country and code") %>% + add_units("NA") %>% + add_comments("FAO Country and code") -> + QCL_area_code_map + + # Other data uses OCL area for consistency + QCL_area_code <- QCL1 %>% distinct(area_code) %>% pull() + + + QCL1 %>% + add_title("FAO primary production") %>% + add_units("USD/tonne") %>% + add_comments("Preprocessed FAOSTAT primary production") -> + QCL + + + + # *[PP] Producer price ---- + + FAOSTAT_load_raw_data(DATASETCODE = "PP", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT) + + PP %>% distinct(element, element_code, unit) + + + PP %>% + filter(area_code < 350, # rm aggregated regions + item_code < 1700, #rm aggregated items + area_code %in% QCL_area_code, # only keep regions with production + element_code %in% c(5532, 5539)) %>% #keep USD/tonne and index + rm_accent("item", "area") -> PP1 + + + # Using index to fill in missing across years + PP1 %>% + filter(element_code %in% c(5532, 5539)) %>% + select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% + # Not completing year and area here + spread(element, value) %>% + left_join( + PP1 %>% filter(element_code %in% c(5532, 5539)) %>% + select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% + spread(element, value) %>% + rename(pp_base = `Producer Price (USD/tonne)`, + pp_baseindex = `Producer Price Index (2014-2016 = 100)`) %>% + filter(!is.na(pp_base)) %>% + group_by(area, area_code, item) %>% + filter(year == 2015) %>% within(rm(year)) %>% + ungroup(), + by = c("area_code", "area", "item_code", "item") + ) %>% mutate( + `Producer Price (USD/tonne)` = if_else(is.na(`Producer Price (USD/tonne)`), + pp_base* `Producer Price Index (2014-2016 = 100)` /pp_baseindex, + `Producer Price (USD/tonne)`) + ) %>% + select(area_code, area, item_code, item, year, `Producer Price (USD/tonne)`) %>% + gather(element, value, `Producer Price (USD/tonne)`) %>% + mutate(element_code = 5532) -> PP2 + + + ### output PP and clean memory ---- + PP2 %>% + add_title("FAO producer prices") %>% + add_units("USD/tonne") %>% + add_comments("Preprocessed FAOSTAT producer prices") -> + PP + + rm(PP, PP1, PP2) + + # Food balance and Supply-Utilization-Account + + ## *[FBS] new food balance sheet (2010-) ---- + + ## Load raw data + FAOSTAT_load_raw_data("FBS") # New FBS 2010+ + FBS %>% distinct(element, element_code, unit) + + FBS %>% filter(item_code < 2901, item_code != 2501, + !element_code %in% c(511, 5301), + area_code %in% QCL_area_code) %>% + select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% + rm_accent("item", "area") -> FBS1 + + + ### output FBS and clean memory ---- + FBS1 %>% + add_title("FAO SCL") %>% + add_units("tonne") %>% + add_comments("Preprocessed FAOSTAT SCL") -> + FBS + rm(FBS, FBS1) + + + ## *[SCL] SUA: supply utilization accounting ---- + + FAOSTAT_load_raw_data("SCL") # SUA 2010+ + SCL %>% distinct(element, element_code, unit) + # FAOSTAT "accidentally" used CPC code in SCL; + if (is.numeric(SCL$item_code)) { + SCL %>% filter(item_code <= 1700, item_code != 1) -> SCL + } + + SCL %>% filter(!element_code %in% c(664, 665, 674, 684, 511), + # it is not useful to calculate cal/g using `Food supply (kcal/capita/day)` /`Food supply quantity (g/capita/day)` + # unit too small so remove them here + # `Calories/Year` / `Food supply quantity (tonnes)` is more accurate! + # similarly for protein and fat + # Use annual value in SUA to calculate the conversion rate! + area_code %in% QCL_area_code) %>% + select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% + rm_accent("item", "area") -> SCL1 + + + ### output SCL and clean memory ---- + SCL1 %>% + add_title("FAO SCL") %>% + add_units("tonne") %>% + add_comments("Preprocessed FAOSTAT SCL") -> + SCL + rm(SCL, SCL1) + + + return_data(MODULE_OUTPUTS) + + } else { + stop("Unknown command") + } +} diff --git a/R/xfaostat_L101_RawDataPreProcessing2.R b/R/xfaostat_L101_RawDataPreProcessing2.R new file mode 100644 index 00000000..13ba10cd --- /dev/null +++ b/R/xfaostat_L101_RawDataPreProcessing2.R @@ -0,0 +1,178 @@ +# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. + +#' module_xfaostat_L101_RawDataPreProcessing2 +#' +#' Preprocess raw faostat data +#' +#' @param command API command to execute +#' @param ... other optional parameters, depending on command +#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if +#' \code{command} is "MAKE") all the generated outputs +#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities. +#' @importFrom assertthat assert_that +#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at +#' first case_when vars +#' @importFrom tibble tibble +#' @importFrom tidyr complete drop_na gather nesting spread replace_na +#' @author XZ 2023 +module_xfaostat_L101_RawDataPreProcessing2 <- function(command, ...) { + + MODULE_INPUTS <- + c(FILE = "aglu/AGLU_ctry", + "QCL_area_code_map") + + MODULE_OUTPUTS <- + c("FBSH", # Old food balance sheet + "CB", # Old non food utilization accounting + "FBSH_CB", # Combined FBSH and CB + "OA" # Population + ) + + if(command == driver.DECLARE_INPUTS) { + return(MODULE_INPUTS) + } else if(command == driver.DECLARE_OUTPUTS) { + return(MODULE_OUTPUTS) + } else if(command == driver.MAKE) { + + year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check. + + all_data <- list(...)[[1]] + + # Load required inputs ---- + + get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE) + + + + QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull() + + # *[QCL] FAOSTAT Production and area ---- + + ## Load raw data + FAOSTAT_load_raw_data(DATASETCODE = "QCL", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT) + + + # Food balance and Supply-Utilization-Account + + ## *[FBSH] old food balance sheet (-2013) ---- + + FAOSTAT_load_raw_data("FBSH") # Old FBS -2013 + FBSH %>% distinct(element, element_code, unit) + # Keep population (old) + FBSH %>% filter(item_code < 2901, + !element_code %in% c(5301), + area_code %in% QCL_area_code) %>% + select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% + rm_accent("item", "area") -> FBSH1 + + + ### output FBSH and clean memory ---- + + FBSH1 %>% + add_title("FAO FBSH") %>% + add_units("tonne") %>% + add_comments("Preprocessed FAO FBSH") -> + FBSH + + rm(FBSH1) + + + ## *[CB] Non-food Balance ---- + + FAOSTAT_load_raw_data("CB") # Old FBS-nonfood -2013 + CB %>% distinct(element, element_code, unit) + # Keep population (old) + CB %>% filter(item_code < 2901, + !element_code %in% c(5300), + area_code %in% QCL_area_code) %>% + select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% + rm_accent("item", "area") -> CB1 + + ### output CB and clean memory ---- + + CB1 %>% + add_title("FAO CB") %>% + add_units("tonne") %>% + add_comments("Preprocessed FAO CB") -> + CB + + rm(CB1) + + + ## *FBSH_CB merge the two---- + # load processed data + + FBSH %>% distinct(item_code) %>% + dplyr::intersect(CB %>% distinct(item_code) ) %>% + pull -> + dup_item_code + + + FBSH %>% mutate(value = value * 1000) %>% # convert to ton + bind_rows(CB %>% filter(!item_code %in% dup_item_code)) %>% + select(-unit) %>% + filter(!element_code %in% c(645, 664, 674, 684, 511)) %>% # remove non-balance items + mutate(element = gsub(" Quantity| supply quantity \\(tonnes\\)| \\(non-food\\)", "", element)) %>% + mutate( element = replace(element, element == "Losses", "Loss"), + element = replace(element, element == "Processing", "Processed")) %>% + select(-element_code) %>% + mutate(unit = "tonnes") -> + FBSH_CB + + # Rice and products and Groundnuts related adjustments + # to make FBS and FBSH more consistent + SHELL_RATE_groundnuts <- 0.7 + Mill_RATE_rice <- 0.667 + + FBSH_CB %>% distinct(element, unit) + FBSH_CB %>% + filter(!item_code %in% c(2805, 2556)) %>% + # Adjust to Rice and products and Groundnuts in FBS and bind + bind_rows( + FBSH_CB %>% filter(item_code %in% c(2805)) %>% + mutate(item = "Rice and products", item_code = 2807, + value = value / Mill_RATE_rice) %>% + bind_rows(FBSH_CB %>% filter(item_code %in% c(2556)) %>% + mutate(item = "Groundnuts", item_code = 2552, + value = value / SHELL_RATE_groundnuts) ) + ) -> + FBSH_CB1 + + ### output FBSH_CB and clean memory ---- + + FBSH_CB1 %>% + add_title("FAO FBSH_CB", overwrite = T) %>% + add_units("tonne") %>% + add_comments("Preprocessed FAO FBSH_CB") -> + FBSH_CB + + rm(SHELL_RATE_groundnuts, Mill_RATE_rice); + rm(FBSH_CB1, FBSH, CB, dup_item_code) + + + # *[OA]: Population ---- + + FAOSTAT_load_raw_data("OA") # Population + OA %>% distinct(element, element_code) + OA %>% distinct(item, item_code) + + OA %>% filter(element_code == 511, item_code == 3010) %>% + select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% + rm_accent("item", "area") -> OA1 + + ### output OA and clean memory ---- + OA1 %>% + add_title("FAO OA") %>% + add_units("tonne") %>% + add_comments("Preprocessed FAO OA") -> + OA + rm(OA, OA1) + rm(QCL_area_code) + + + return_data(MODULE_OUTPUTS) + + } else { + stop("Unknown command") + } +} diff --git a/R/xfaostat_L101_RawDataPreProcessing3.R b/R/xfaostat_L101_RawDataPreProcessing3.R new file mode 100644 index 00000000..80b3c6b4 --- /dev/null +++ b/R/xfaostat_L101_RawDataPreProcessing3.R @@ -0,0 +1,142 @@ +# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. + +#' module_xfaostat_L101_RawDataPreProcessing3 +#' +#' Preprocess raw faostat data +#' +#' @param command API command to execute +#' @param ... other optional parameters, depending on command +#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if +#' \code{command} is "MAKE") all the generated outputs +#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities. +#' @importFrom assertthat assert_that +#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at +#' first case_when vars +#' @importFrom tibble tibble +#' @importFrom tidyr complete drop_na gather nesting spread replace_na +#' @author XZ 2023 +module_xfaostat_L101_RawDataPreProcessing3 <- function(command, ...) { + + MODULE_INPUTS <- + c(FILE = "aglu/AGLU_ctry", + "QCL_area_code_map") + + MODULE_OUTPUTS <- + c("TCL", "TM" # Gross and bilateral trade + ) + + if(command == driver.DECLARE_INPUTS) { + return(MODULE_INPUTS) + } else if(command == driver.DECLARE_OUTPUTS) { + return(MODULE_OUTPUTS) + } else if(command == driver.MAKE) { + + year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check. + + all_data <- list(...)[[1]] + + # Load required inputs ---- + + get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE) + + #source("data-raw/generate_package_data_faostat_helper_funcs.R") + #DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata.faostat") + + + QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull() + + + # *[TCL] Gross trade ---- + FAOSTAT_load_raw_data("TCL") # Gross trade + + TCL %>% distinct(element, element_code, unit) + TCL %>% distinct(item, item_code) + + TCL %>% + filter(item_code <= 1700, + # only keep quantity + !element_code %in% c(5622 , 5922), + area_code %in% QCL_area_code) %>% + select(area_code, area, item_code, item, element_code, element, year, value, unit) %>% + rm_accent("item", "area") -> TCL1 + + ### output OA and clean memory ---- + TCL1 %>% + add_title("FAO TCL") %>% + add_units("tonne") %>% + add_comments("Preprocessed FAO TCL") -> + TCL + + rm(TCL1) + + + + # *[TM] Bilateral trade ---- + #*FAO has better quality bilateral data since 1992, covering most SUA items + FAOSTAT_load_raw_data("TM") # Bilateral trade + + TM %>% + # Only keep quantities for elements with a unit of tonnes + filter(element_code %in% c(5910, 5610), + item_code < 1700, + # Bilateral trade year starts from 1986 but higher quality after 1992 + # Subset data also to shrink the size + year >= 1992, + partner_country_code %in% QCL_area_code, + reporter_country_code %in% QCL_area_code) %>% + select(reporter_country_code, reporter_countries, + partner_country_code, partner_countries, + item_code, item, element_code, element, year, value, unit) -> + TM1 + rm(TM) + + + ## **Reconcile export and import bilateral flow ---- + # Full join export and import and use available import to fill missing and zero export + TM1 %>% filter(element %in% c("Export Quantity")) %>% spread(element, value) %>% + select(exporter = reporter_country_code, + importer = partner_country_code, item_code, year, expflow = `Export Quantity`) %>% + full_join( + TM1 %>% filter(element %in% c("Import Quantity")) %>% spread(element, value)%>% + select(importer = reporter_country_code, + exporter = partner_country_code, item_code, year, impflow = `Import Quantity`), + by = c("exporter", "importer", "item_code", "year") + ) %>% + # replace na with zero but use import to replace zero export later + replace_na(list(expflow = 0, impflow = 0)) %>% + transmute(area_code = importer, year, item_code, source_code = exporter, + value = if_else(expflow == 0, impflow, expflow)) %>% + mutate(element = "Import Quantity") -> + TM2 + + + TM2 %>% + # remove self-trade (per unaggregated area_code) which existed in FAO TM importing data and likely due to data processing mistakes. + filter(area_code != source_code) %>% + left_join(TM1 %>% distinct(item, item_code), by = c("item_code")) %>% + left_join(TM1 %>% distinct(area = partner_countries, area_code = partner_country_code), by = c("area_code")) %>% + left_join(TM1 %>% distinct(source = partner_countries, source_code = partner_country_code), by = c("source_code")) %>% + rm_accent("item", "area", "source") %>% + mutate(unit = "tonnes") -> + TM3 + rm(TM1, TM2) + + TM3 %>% spread(year, value) -> TM4 + + ### output OA and clean memory ---- + TM4 %>% + add_title("FAO TM") %>% + add_units("tonne") %>% + add_comments("Preprocessed FAO TM") -> + TM + + rm(TM3, TM4) + rm(QCL_area_code) + + + return_data(MODULE_OUTPUTS) + + } else { + stop("Unknown command") + } +} diff --git a/R/xfaostat_helper_funcs.R b/R/xfaostat_helper_funcs.R new file mode 100644 index 00000000..23d54e99 --- /dev/null +++ b/R/xfaostat_helper_funcs.R @@ -0,0 +1,521 @@ + + +# Helper functions for downloading FAOSTAT data ---- +FAOSTAT_metadata <- function (code = NULL){ + FAOxml <- XML::xmlParse(xml2::read_xml("http://fenixservices.fao.org/faostat/static/bulkdownloads/datasets_E.xml")) + metadata <- XML::xmlToDataFrame(FAOxml, stringsAsFactors = FALSE) + names(metadata) <- tolower(gsub("\\.", "_", names(metadata))) + + # Bug fix for CB; can remove later if FAOSTAT udpate the link later + metadata["CB" == metadata[, "datasetcode"],"filelocation"] <- + "https://fenixservices.fao.org/faostat/static/bulkdownloads/CommodityBalances_(non-food)_E_All_Data_(Normalized).zip" + + if (!is.null(code)) { + metadata <- metadata[code == metadata[, "datasetcode"],] + } + + return(metadata) +} + +FAOSTAT_download_bulk <- function(DATASETCODE, + DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){ + + assertthat::assert_that(is.character(DATASETCODE)) + assertthat::assert_that(is.character(DATA_FOLDER)) + + + lapply(DATASETCODE, function(d){ + metadata <- FAOSTAT_metadata(code = d) + url_bulk = metadata$filelocation + + file_name <- basename(url_bulk) + download.file(url_bulk, file.path(DATA_FOLDER, file_name)) + }) + +} + +FAOSTAT_load_raw_data <- function(DATASETCODE, + DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){ + assertthat::assert_that(is.character(DATASETCODE)) + assertthat::assert_that(is.character(DATA_FOLDER)) + + metadata <- FAOSTAT_metadata() + + # Loop through each code + for (CODE in DATASETCODE) { + + metadata %>% filter(datasetcode == CODE) -> metadata1 + + zip_file_name <- file.path(DATA_FOLDER, basename(metadata1$filelocation)) + assertthat::assert_that(file.exists(zip_file_name)) + # assuming the csv in zip has the same base name + csv_file_name <- gsub(".zip$", ".csv", basename(zip_file_name)) + + df <- readr::read_csv(unz(zip_file_name, csv_file_name), col_types = NULL) + # Lower case col names and use _ as delimiter + names(df) <- tolower(gsub("\\.| ", "_", names(df))) + # Assigned to parent env + assign(CODE, df, envir = parent.env(environment())) + # Assigned to current env + #assign(CODE, df, envir = environment()) + } + +} + + +#remove accent and apostrophe for cols in a df +rm_accent <- function(.df, ...){ + + assertthat::assert_that( + length(intersect(c(...), names(.df))) == length(c(...)), + msg = "Columns listed not included in the data frame") + + # .df %>% + # mutate_at(c(...), iconv, to = 'ASCII//TRANSLIT') %>% + # mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "") + + .df %>% + mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>% + mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = "")) + +} + + + + +# Helper function for processing FAOSTAT data + +FAO_AREA_RM_NONEXIST <- function(.DF, + SUDAN2012_MERGE = F, + RM_AREA_CODE = c(69, 87, 127, 135, 145, 182, 299)){ + + assertthat::assert_that("area_code" %in% names(.DF), + msg = "Date frame is required and need a col of area_code") + + # Removed area due to missing data in other places and incomplete years mostly + # There are 7 + # 69 French Guyana + # 87 Guadeloupe + # 127 Marshall Islands + # 135 Martinique + # 145 Micronesia (Federated States of) + # 299 Palestine + # 182 Reunion + # area_code_remove <- c(69, 87, 127, 135, 145, 182, 299) + + + # In 1991 USSR(228) collapsed into 15 countries + area_code_USSR = c(228, 1, 52, 57, 63, 73, 108, 113, 119, 126, 146, 185, 208, 213, 230, 235) + + # In 1992 Yugoslav SFR dissolved into 5 countries + # Yugoslav SFR (248) + # Croatia (98) + # North Macedonia (154) + # Slovenia (198) + # Bosnia and Herzegovina (80) + # Serbia and Montenegro (186) + # In 2006 further broke into 2: + # Montenegro (273) + # Serbia (272) + # These regions will be merged for all years in data as most models aggregated them into a single region + area_code_Yugoslav <- c(248, 98, 154, 198, 80, 186) + area_code_SerbiaandMontenegro <- c(186, 273, 272) + # In 1999/2000 Belgium-Luxembourg (15) partitioned in 1999 to 255 (Belgium) and 256 (Luxembourg) + area_code_Belgium_Luxembourg <- c(15, 255, 256) + # In 1993 Czechoslovakia (51) to Czechia (167) and Slovakia (199) + area_code_Czechoslovakia <- c(51, 167, 199) + # In 2011 Sudan (former) (206) broke into South Sudan (277) and Sudan (276) + area_code_Sudan <- c(206, 276, 277) + # Ethiopia PDR (62) dissolved into Ethiopia (238) and Eritrea (178) in 1993 + area_code_Ethiopia <- c(62, 238, 178) + + .DF %>% + filter(!area_code %in% RM_AREA_CODE) %>% + # remove USSR related regions by their years + filter(!(area_code %in% area_code_USSR[1] & year >= 1992), + !(area_code %in% area_code_USSR[-1] & year <= 1991)) %>% + # remove Yugoslav and Serbia & Montenegro related regions by their years + filter(!(area_code %in% area_code_Yugoslav[1] & year >= 1992), + !(area_code %in% area_code_Yugoslav[-1] & year <= 1991)) %>% + filter(!(area_code %in% area_code_SerbiaandMontenegro[1] & year >= 2006), + !(area_code %in% area_code_SerbiaandMontenegro[-1] & year <= 2005)) %>% + # remove Belgium_Luxembourg related regions by their years + filter(!(area_code %in% area_code_Belgium_Luxembourg[1] & year >= 2000), + !(area_code %in% area_code_Belgium_Luxembourg[-1] & year <= 1999)) %>% + # remove area_code_Czechoslovakia related regions by their years + filter(!(area_code %in% area_code_Czechoslovakia[1] & year >= 1993), + !(area_code %in% area_code_Czechoslovakia[-1] & year <= 1992)) %>% + # remove area_code_Ethiopia related regions by their years + filter(!(area_code %in% area_code_Ethiopia[1] & year >= 1993), + !(area_code %in% area_code_Ethiopia[-1] & year <= 1992)) -> + .DF1 + + if (SUDAN2012_MERGE == F) { + .DF1 %>% + # remove area_code_Sudan related regions by their years + filter(!(area_code %in% area_code_Sudan[1] & year >= 2012), + !(area_code %in% area_code_Sudan[-1] & year <= 2011)) -> + .DF1 + } else { + .DF1 %>% + filter(!(area_code %in% area_code_Sudan[-1])) -> + .DF1 + } + + + return(.DF1) + +} + + +assert_FBS_balance <- function(.DF){ + + + # assert .DF structure + assert_that(is.data.frame(.DF)) + assertthat::assert_that(all(c("element") %in% names(.DF))) + assertthat::assert_that(is.grouped_df(.DF) == F) + + # Check data + # 1. Positive value except stock variation and residues + if (isTRUE(.DF %>% + filter(!element %in% c("Stock Variation", "Residuals")) %>% + summarise(min = min(value, na.rm = T)) %>% pull(min) >= -0.001)) { + message("Good! Signs checked") } else{ + warning("Negative values in key elements (not including stock variation and Residuals)") + } + + # 2. Trade balance in all year and items + if (isTRUE(.DF %>% filter(element %in% c("Import", "Export")) %>% + group_by(year, item_code, element) %>% + summarise(value = sum(value, na.rm = T), .groups = "drop") %>% + spread(element, value) %>% filter(abs(Import - Export) > 0.0001) %>% nrow() == 0)) { + message("Good! Gross trade in balance") } else{ + warning("Gross trade imbalance") + } + + # 3. SUA balance check + if (isTRUE(.DF %>% + spread(element, value) %>% + mutate(`Regional supply` = Production + `Import`, + `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` + `Stock Variation`, + bal = abs(`Regional supply` - `Regional demand` - Residuals)) %>% + filter(bal > 0.0001) %>% nrow() == 0)) { + message("Good! Regional supply = Regional demand + Residuals") } else{ + warning("Regional supply != Regional demand + Residuals") + } + + # 4. Storage in balance across time + if (isTRUE(.DF %>% filter(element %in% c("Opening stocks", "Closing stocks", "Stock Variation")) %>% + spread(element, value) %>% + filter(`Opening stocks` + `Stock Variation` - `Closing stocks` != 0) %>% nrow() == 0 & + .DF %>% filter(element %in% c("Opening stocks", "Closing stocks", "Stock Variation")) %>% + spread(element, value) %>% group_by(area_code, item_code) %>% + arrange(area_code, item_code, year) %>% + mutate(bal = abs(lag(`Closing stocks`) - `Opening stocks`)) %>% + filter(is.na(bal) == F, bal > 0.0001) %>% nrow() == 0)){ + message("Good! Storage in balance across time") } else{ + warning("Stock imbalance across time or inconsistent stock variation") + } + +} + + +# Be careful with the unit as value / 1000 here +# Light adjustments to merge Tourist consumption into food +# And maintain balance across dimensions +SUA_bal_adjust <- function(.df){ + + SCL_element_new <- + c("Opening stocks", "Production", "Export", "Import", "Stock Variation", + "Food", "Feed", "Seed", "Processed", "Other uses", + "Tourist consumption", "Loss", "Residuals") + + .df %>% + mutate(element = factor(element,levels = SCL_element_new), + value = value / 1000) %>% # unit: 1000 tonnes + replace_na(list(value = 0)) %>% + spread(element, value) %>% + #Merge tourist consumption into food + mutate(Food = Food + `Tourist consumption`) %>% + select(- `Tourist consumption`) %>% + #Maintain stock balance across time; SCL data (2010-) quality was high + #Calculate closing stock, when negative, shift up stocks in all years. + group_by(area_code, item_code) %>% arrange(-year, item_code) %>% + mutate(cumSV = cumsum(`Stock Variation`) - first(`Stock Variation`), + `Opening stocks1` = first(`Opening stocks`) - cumSV) %>% + select(-cumSV, -`Opening stocks`) %>% + rename(`Opening stocks` = `Opening stocks1`) %>% + mutate(`Closing stocks` = `Opening stocks` + `Stock Variation`) %>% + mutate(Stockshifter = if_else(`Closing stocks` < 0, abs(`Closing stocks`), 0)) %>% + mutate(Stockshifter = if_else(`Opening stocks` < 0 & `Opening stocks` < `Closing stocks`, + abs(`Opening stocks`), Stockshifter)) %>% + mutate(Stockshifter = max(Stockshifter), + `Opening stocks` = `Opening stocks` + Stockshifter, + `Closing stocks` = `Opening stocks` + `Stock Variation`) %>% + select(-Stockshifter) %>% + ungroup() %>% + #Correct negative processed where applicable + #For few regions (e.g., Congo), move residual to food so SUA data was not exist + mutate(Processed = if_else(Processed < 0, 0, Processed), + Food = ifelse(Production >0 & Food == 0 & Feed == 0 & Processed == 0 & Seed == 0 & `Other uses` == 0, + Production + Import - Export + `Stock Variation`, Food), + Food = ifelse(Food < 0, 0, Food)) %>% + #Check regional supply, demand, and residue + mutate(`Regional supply` = `Opening stocks` + Production + `Import`, + `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`, + Residuals = `Regional supply` - `Regional demand`) %>% + gather(element, value, -area_code, -item_code, -year) %>% + mutate(element = factor(element, levels = Bal_element_new)) +} + + +# Function to dissaggregate dissolved regions in historical years ---- +# copyed in gcamdata + +#' FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION +#' +#' @param .DF dataframe to disaggregate +#' @param AFFECTED_AREA_CODE FAO area codes for regions affected; first one should be pre-dissolved region (e.g., USSR) followed by post-dissolved regions. +#' @param YEAR_DISSOLVE_DONE First year after dissolution +#' @param YEAR_AFTER_DISSOLVE_ACCOUNT Number of years of data after dissolution used for sharing historical data +#' +#' @return Disaggregated data for the historical period for of the dissolved region +#' +FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION <- + function(.DF, + AFFECTED_AREA_CODE, #first one should be dissolved area + YEAR_DISSOLVE_DONE, + # using 3 year data after dissolution for sharing + YEAR_AFTER_DISSOLVE_ACCOUNT = 3){ + + .DF %>% + # filter dissolved region related areas by their years + filter((area_code %in% AFFECTED_AREA_CODE[-1] & year >= YEAR_DISSOLVE_DONE)| + (area_code %in% AFFECTED_AREA_CODE[1] & year <= YEAR_DISSOLVE_DONE)) -> + .DF1 + + Number_of_Regions_After_Dissolution <- AFFECTED_AREA_CODE %>% length -1 + + .DF1 %>% filter(year < YEAR_DISSOLVE_DONE) %>% + select(-area_code) %>% + right_join( + .DF1 %>% filter(year %in% c(YEAR_DISSOLVE_DONE:(YEAR_DISSOLVE_DONE + YEAR_AFTER_DISSOLVE_ACCOUNT))) %>% + dplyr::group_by_at(dplyr::vars(-year, -value)) %>% + replace_na(list(value = 0)) %>% + summarise(value = sum(value), .groups = "drop") %>% + dplyr::group_by_at(dplyr::vars(-value, -area_code)) %>% + mutate(Share = value/sum(value)) %>% + # using average share if data after dissolved does not exist + mutate(NODATA = if_else(sum(value) == 0, T, F)) %>% + mutate(Share = if_else(NODATA == T, 1/Number_of_Regions_After_Dissolution, Share)) %>% + ungroup() %>%select(-value, -NODATA), + by = names(.) %>% setdiff(c("year", "value", "area_code")) + ) %>% mutate(value = value * Share) %>% select(-Share) + + } + +#' FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL +#' +#' @param .DF Input data frame +#' @param SUDAN2012_BREAK If T break Sudan before 2012 based on 2013- 2016 data +#' @param SUDAN2012_MERGE If T merge South Sudan into Sudan +#' @param .FAO_AREA_CODE_COL +#' @param .AREA_COL +#' +#' @return data with historical periods of dissolved region disaggregated to small pieces. + +FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL <- function(.DF, + .FAO_AREA_CODE_COL = "area_code", + .AREA_COL = "area", + SUDAN2012_BREAK = F, + SUDAN2012_MERGE = T){ + + assertthat::assert_that(all(.FAO_AREA_CODE_COL %in% names(.DF)), + msg = "Date frame is required and need a col of area_code") + + # Remove area if exist + .DF0 <- .DF + if (all_of(.AREA_COL) %in% names(.DF)) { + .DF %>% select(-all_of(.AREA_COL)) -> .DF } + + if(.FAO_AREA_CODE_COL != "area_code"){ + # Check if "area_code" exist, replace to area_code_TEMP + if ("area_code" %in% names(.DF)) { + assertthat::assert_that("area_code_TEMP" %in% names(.DF) == F) + .DF %>% rename(area_code_TEMP = area_code) -> .DF + } + + # rename .FAO_AREA_CODE_COL to area_code + + .DF %>% rename(area_code = .FAO_AREA_CODE_COL) -> .DF + + # Will need to replace back later + } + + + # Define area code based on FAO ---- + # first one is dissolved area + # In 1991 USSR(228) collapsed into 15 countries + area_code_USSR = c(228, 1, 52, 57, 63, 73, 108, 113, 119, 126, 146, 185, 208, 213, 230, 235) + # first one is Russia + + # In 1992 Yugoslav SFR dissolved into 5 countries + # Yugoslav SFR (248) + # Croatia (98) + # North Macedonia (154) + # Slovenia (198) + # Bosnia and Herzegovina (80) + # Serbia and Montenegro (186) + # In 2006 further broke into 2: + # Montenegro (273) + # Serbia (272) + # These regions will be merged for all years in data as most models aggregated them into a single region + area_code_Yugoslav <- c(248, 98, 154, 198, 80, 186) + area_code_SerbiaandMontenegro <- c(186, 273, 272) + # In 1999/2000 Belgium-Luxembourg (15) partitioned in 1999 to 255 (Belgium) and 256 (Luxembourg) + area_code_Belgium_Luxembourg <- c(15, 255, 256) + # In 1993 Czechoslovakia (51) to Czechia (167) and Slovakia (199) + area_code_Czechoslovakia <- c(51, 167, 199) + # In 2011 Sudan (former) (206) broke into South Sudan (277) and Sudan (276) + area_code_Sudan <- c(206, 276, 277) + # Ethiopia PDR (62) dissolved into Ethiopia (238) and Eritrea (178) in 1993 + area_code_Ethiopia <- c(62, 238, 178) + + .DF %>% + # remove Yugoslav by their years first and area_code_SerbiaandMontenegro later + filter(!(area_code %in% area_code_Yugoslav[1] )) %>% + bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF, area_code_Yugoslav, 1992, 3))-> + .DF1 + + + FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_USSR, 1992, 3) %>% + bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_SerbiaandMontenegro, 2006, 3)) %>% + bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_Belgium_Luxembourg, 2000, 3)) %>% + bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_Czechoslovakia, 1993, 3)) %>% + bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_Ethiopia, 1993, 3)) -> + DF_FAO_AREA_DISAGGREGATE_HIST + + .DF1 %>% + # remove USSR by their years + filter(!(area_code %in% area_code_USSR[1])) %>% + # remove Serbia & Montenegro by their years + filter(!(area_code %in% area_code_SerbiaandMontenegro[1] )) %>% + # remove Belgium_Luxembourg by their years + filter(!(area_code %in% area_code_Belgium_Luxembourg[1])) %>% + # remove area_code_Czechoslovakia by their years + filter(!(area_code %in% area_code_Czechoslovakia[1] )) %>% + # remove area_code_Ethiopia by their years + filter(!(area_code %in% area_code_Ethiopia[1] )) %>% + bind_rows(DF_FAO_AREA_DISAGGREGATE_HIST) -> + .DF2 + + if (SUDAN2012_BREAK == T) { + .DF2 %>% + # remove area_code_Sudan by their years + filter(!(area_code %in% area_code_Sudan[1] )) %>% + bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_Sudan, 2012, 3)) -> + .DF2 + } + + if (SUDAN2012_MERGE == T) { + + .DF2 %>% + mutate(area_code = replace(area_code, area_code %in% area_code_Sudan, area_code_Sudan[1])) %>% + dplyr::group_by_at(dplyr::vars(-value)) %>% + summarise(value = sum(value, na.rm = T), .groups = "drop") %>% + ungroup() -> .DF2 + } + + + + if(.FAO_AREA_CODE_COL != "area_code"){ + + # rename .FAO_AREA_CODE_COL to area_code + .DF2[[.FAO_AREA_CODE_COL]] <- .DF2[["area_code"]] + .DF2 <- .DF2[, !names(.DF2) %in% "area_code"] + + # Check if "area_code_TEMP" exist, replace to area_code + if ("area_code_TEMP" %in% names(.DF2)) { + assertthat::assert_that("area_code" %in% names(.DF2) == F) + .DF2 %>% rename(area_code = area_code_TEMP) -> .DF2 + } + } + + + + if (all_of(.AREA_COL) %in% names(.DF0)) { + .DF2 %>%# Get area back + left_join(.DF0 %>% distinct_at(vars(all_of(.AREA_COL), .FAO_AREA_CODE_COL)), + by = .FAO_AREA_CODE_COL) -> .DF2 + } + + return(.DF2) + +} + + +# Count item_code and area_code by year +FAOSTAT_check_count_plot <- function(.DF, .ELEMENT = c()){ + if (.ELEMENT %>% length() == 0 ) { + .DF %>% distinct(element) %>% pull -> .ELEMENT + } + .DF %>% group_by(year, element) %>% + summarise(Country = length(unique(area_code)), + Item = length(unique(item_code)), .groups = "drop") %>% + gather(header, count, -year, -element) %>% + filter(element %in% .ELEMENT) %>% + ggplot() + facet_wrap(~header, scales = "free") + + geom_line(aes(x = year, y = count, color = element)) + + theme_bw() +} + +# decimal places in ggplot +scaleFUN <- function(x) sprintf("%.0f", x) + +lookup <- function(.lookupvalue, .lookup_df, .lookup_col, .target_col){ + + assert_that(is.character(.lookupvalue)) + assert_that(is.data.frame(.lookup_df)) + assert_that(.lookup_col %in% colnames(.lookup_df)) + assert_that(.target_col %in% colnames(.lookup_df)) + + .lookup_df[grep(paste0("^",.lookupvalue,"$"), + .lookup_df[, .lookup_col]), .target_col] +} + +output_csv_data <- function(gcam_dataset, col_type_nonyear, + title, unit, description = NA, + code, + out_dir = out_dir, + GZIP = F){ + + if (!missing(code)) {code = code} else { + code = lookup(gcam_dataset, data_map, "name", "FAO_domain_code") + } + + + col_type = paste0(col_type_nonyear, paste0(rep("n", ncol(get(gcam_dataset)) - nchar(col_type_nonyear)), collapse = "") ) + + cmnts <- c( + paste0("File: ", gcam_dataset, ifelse(GZIP, ".csv.gz", ".csv")), + paste0("Title: ", title), + paste0("Units: ", unit), + paste0("Description: ", description), + paste0("Data source: FAOSTAT (main domain:", code ," FAO.udpate:",FAOSTAT_metadata(code = code)$dateupdate,")"), + paste0("Date of CSV last update: ", Sys.Date()), + paste0("Column types: ",col_type) , + "----------" + ) + fqfn <- file.path(out_dir, paste0(gcam_dataset, ".csv")) + suppressWarnings(file.remove(fqfn)) + + if (GZIP == F) { + cat(paste("#", cmnts), file = fqfn, sep = "\n", append = TRUE) + readr::write_csv(get(gcam_dataset), fqfn, append = TRUE, col_names = TRUE, na = "") + } else { + cat(paste("#", cmnts), file = gzfile(paste0(fqfn, ".gz")), sep = "\n", append = TRUE) + readr::write_csv(get(gcam_dataset), gzfile(paste0(fqfn, ".gz")), append = TRUE, col_names = TRUE, na = "") + } +} + diff --git a/R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R b/R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R new file mode 100644 index 00000000..5930fbed --- /dev/null +++ b/R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R @@ -0,0 +1,1108 @@ +# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. + +#' module_aglu_L100.FAO_SUA_PrimaryEquivalent +#' +#' Generate supply utilization balance in primary equivalent +#' +#' @param command API command to execute +#' @param ... other optional parameters, depending on command +#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if +#' \code{command} is "MAKE") all the generated outputs: \code{GCAM_AgLU_SUA_APE_1973_2019}, +#' \code{FAO_AgProd_Kt_All},\code{FAO_AgArea_Kha_All},\code{FAO_Food_Macronutrient_All_2010_2019}, +#' \code{FAO_Food_MacronutrientRate_2010_2019_MaxValue} +#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities. +#' A method to generate primary equivalent is created for the new FAOSTAT supply utilization data (2010 to 2019). +#' New SUA balance is connected to the old one (before 2010). Production and harvested area data with FAO region and item +#' for primary production are provided. For FAO food items, macronutrient values are calculated at SUA item level. +#' Data processing was consistent across scales. Note that GCAM regions and commodities in aggregation mapping can +#' be changed in corresponding mappings. The output data is not averaged over time. +#' @importFrom assertthat assert_that +#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at +#' first case_when vars +#' @importFrom tibble tibble +#' @importFrom tidyr complete drop_na gather nesting spread replace_na +#' @author XZ 2022 +module_aglu_L100.FAO_SUA_PrimaryEquivalent <- function(command, ...) { + + MODULE_INPUTS <- + c(FILE = "aglu/AGLU_ctry", + FILE = "common/iso_GCAM_regID", + FILE = "common/GCAM_region_names", + FILE = "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", + FILE = "aglu/FAO/GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020", + FILE = "aglu/FAO/Mapping_SUA_PrimaryEquivalent", + FILE = "aglu/FAO/SUA_item_code_map", + FILE = "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020", + FILE = "aglu/FAO/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009", + FILE = "aglu/FAO/Mapping_item_FBS_GCAM", + FILE = "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_96Regs_16FodderItems_1973to2020", + FILE = "aglu/FAO/FAO_ag_items_PRODSTAT", + FILE = "aglu/FAO/FAO_an_items_PRODSTAT", + FILE = "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean" + ) + + MODULE_OUTPUTS <- + c("GCAM_AgLU_SUA_APE_1973_2019", + "FAO_AgProd_Kt_All", + "FAO_AgArea_Kha_All", + "FAO_Food_Macronutrient_All_2010_2019", + "FAO_Food_MacronutrientRate_2010_2019_MaxValue") + + if(command == driver.DECLARE_INPUTS) { + return(MODULE_INPUTS) + } else if(command == driver.DECLARE_OUTPUTS) { + return(MODULE_OUTPUTS) + } else if(command == driver.MAKE) { + + year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check. + + all_data <- list(...)[[1]] + + # Load required inputs ---- + + get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE) + + # Get Supply-utilization account (SUA) elements and use as factor + All_Bal_element <- levels(GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019$element) + All_Bal_element <- factor(All_Bal_element, levels = All_Bal_element) + + # Section1: [2010-2019] Region aggregation of supply-utilization-accounting data ---- + + # Note: the volume of data in this processing is quite large. Therefore we took + # extra care to be cognizant of processing speed and memory usage through section 1 and 2. + # In particular we rely on ID codes and factors are much as possible to speed up joins. + # In addition, we have filtered zero rows from the raw data to significantly reduce + # the overall volume. Unfortunately, this change makes the processing riddled with + # trap doors where we need to be extra careful to complete / refill zeros or risk loosing + # rows of legitimate data. + + # create a complete area / iso / GCAM region mapping + GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020 %>% + select(area_code, area) %>% + distinct() %>% + left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by="area") %>% + left_join(iso_GCAM_regID %>%select(iso, GCAM_region_ID), by = "iso") %>% + left_join(GCAM_region_names, by = "GCAM_region_ID") -> + Area_Region_Map + + # 1.1 Helper functions for regional aggregation ---- + + # Aggregate SUA to GCAM regions + # Intra regional trade is removed when bilateral trade data is available + SUA_Reg_Agg <- function() { + + GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 %>% + left_join_error_no_match(Area_Region_Map %>% select(area_code, GCAM_region_ID), by="area_code") %>% + group_by(GCAM_region_ID, item_code, element, year) %>% + summarize(value = sum(value)) %>% + ungroup() -> + DF_SUA_Agg + + # Calculate intra regional trade + GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020 %>% + left_join_error_no_match(Area_Region_Map %>% select(area_code, GCAM_region_ID), by="area_code") %>% + left_join_error_no_match(Area_Region_Map %>% select(source_code = area_code, source_GCAM_region_ID = GCAM_region_ID), by="source_code") %>% + filter(GCAM_region_ID == source_GCAM_region_ID) %>% + group_by(GCAM_region_ID, item_code, year) %>% + summarize(value = sum(value)) %>% + ungroup() %>% + mutate(value = -value / 1000.0) -> + DF_INTRA_REG_TRADE + + # SUA has fewer items and years than the bilateral data set and in addition + # there are some small discrepancies zero import/export in SUA vs tiny amounts of trade + # in the bilateral. Doing a left_join here will drop these dependencies which is + # what we want. + bind_rows(DF_INTRA_REG_TRADE %>% mutate(element = All_Bal_element[All_Bal_element == "Export"]), + DF_INTRA_REG_TRADE %>% mutate(element = All_Bal_element[All_Bal_element == "Import"])) %>% + rename(TCL = value) %>% + right_join(DF_SUA_Agg, by = c("GCAM_region_ID", "item_code", "year", "element")) %>% + # equivalent to + #left_join(DF_SUA_Agg, ., by=c("GCAM_region_ID", "item_code", "year", "element")) %>% + mutate(value = if_else(is.na(TCL), value, value + TCL)) %>% + select(-TCL) %>% + filter(value != 0.0) -> + DF_SUA_Agg_TradeAdj + + + # need to remove gross trade when export > production + # to maintain triangle the inequality rule + # Note that Prod < export is still possivle due to "residuals" + DF_SUA_Agg_TradeAdj %>% + filter(element %in% c("Production", "Import", "Export")) %>% + spread(element, value, fill=0.0) %>% + mutate(value = pmax(Production - Export, -Import)) %>% + filter(value < 0) %>% + select(-Production, -Import, -Export) -> + GrossTradeRM + + bind_rows(GrossTradeRM %>% mutate(element = All_Bal_element[All_Bal_element == "Export"]), + GrossTradeRM %>% mutate(element = All_Bal_element[All_Bal_element == "Import"]), + DF_SUA_Agg_TradeAdj) %>% + group_by(GCAM_region_ID, item_code, element, year) %>% + summarize(value = sum(value)) %>% + ungroup() -> + DF_SUA_Agg_TradeAdj_TriagAdj + + return(DF_SUA_Agg_TradeAdj_TriagAdj) + } + + # 1.2. Execution: regional aggregation ---- + # Get SUA data ready + FAO_SUA_Kt_2010to2019_R <- SUA_Reg_Agg() + + Min_SUA_Year <- min(FAO_SUA_Kt_2010to2019_R$year) + FAO_SUA_Kt_2010to2019 <- GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 + ## Clean up + rm(GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019) + rm(GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020) + ## Done Section1 ---- + #****************************---- + + # Section2: [2010-2019] Primary equivalent aggregation to GCAM commodities ---- + + Mapping_SUA_PrimaryEquivalent %>% + left_join_error_no_match(SUA_item_code_map %>% rename(sink_item_code = item_code), by=c("sink_item" = "item")) %>% + left_join_error_no_match(SUA_item_code_map %>% rename(source_item_code = item_code), by=c("source_item" = "item")) %>% + mutate(APE_comm = as.factor(APE_comm)) -> + Mapping_SUA_PrimaryEquivalent_ID + + #Mapping_SUA_PrimaryEquivalent_ID[Mapping_SUA_PrimaryEquivalent_ID$sink_item_code == 235, "source_primary"] = FALSE + + Mapping_SUA_PrimaryEquivalent_ID %>% + select(item_code = sink_item_code, output_specific_extraction_rate) %>% + filter(!is.na(output_specific_extraction_rate)) -> + OUTPUT_SPECIFIC_EXTRACTION_RATE + + # 2.1 Helper functions for SUA primary equivalent aggregation ---- + + + #' Get extraction rate + #' @description Gross extraction rate is calculated for domestic, traded, and lagged values. + #' By gross, it means sink items are aggregated. + #' The function is used in Proc_primarize. + #' @param DF_CURR_NEST Input supply-utilization accounting data frame with one tier of processing + #' @param DF_ALL Input supply-utilization accounting data frame with ALL the data + #' @return A data frame including regional, traded, and world extraction rates of a processing + + Get_GROSS_EXTRACTION_RATE <- function(DF_CURR_NEST, DF_ALL) { + curr_sink_items = unique(DF_CURR_NEST$item_code) + Mapping_SUA_PrimaryEquivalent_ID %>% + filter(sink_item_code %in% curr_sink_items) -> + Curr_Sink_Mapping + curr_source_items = unique(Curr_Sink_Mapping$source_item_code) + Mapping_SUA_PrimaryEquivalent_ID %>% + filter(source_item_code %in% curr_source_items) -> + Curr_Source_Mapping + Curr_Source_Mapping %>% + group_by(APE_comm) %>% + mutate(minimium_extraction_rate = if_else(Q25asMin, extraction_rate_Q25, 0)) %>% + select(APE_comm, minimium_extraction_rate) %>% + distinct() -> + MIN_EXTRACTION_RATE + + DF_ALL %>% + #Prepare data to calculate regional, traded, and world average extraction rates + tidyr::unnest(c(data)) %>% + filter(element == "Processed", item_code %in% curr_source_items) %>% + select(-nest_level) %>% + bind_rows(DF_CURR_NEST %>% filter(element == "Production" | element == "Export")) %>% + dplyr::group_by_at(vars(-item_code, -value)) %>% + summarize(value=sum(value)) %>% + ungroup() %>% + complete(GCAM_region_ID = GCAM_region_names$GCAM_region_ID, nesting(element, year, APE_comm), fill=list(value=0)) %>% + spread(element, value, fill = 0.0) %>% + left_join_error_no_match(MIN_EXTRACTION_RATE, by=c("APE_comm")) %>% + group_by(APE_comm, year) %>% + mutate(extraction_rate_world = sum(Production) / sum(Processed), + # in case sum(Processed) or sum(Production) == 0 + extraction_rate_world = if_else(extraction_rate_world != 0 & is.finite(extraction_rate_world), + extraction_rate_world, 1), + extraction_rate = Production / Processed, + # Regional extraction rate = prod of an aggregated processed item / Processed use of an aggregated primary item + # Use world average to fill in NA or zero + extraction_rate = if_else(is.na(extraction_rate) | extraction_rate == 0, extraction_rate_world, extraction_rate), + # Using minimum extraction rate here + extraction_rate = pmax(extraction_rate, minimium_extraction_rate), + extraction_rate_trade = sum(Export) / sum(Export / extraction_rate), + extraction_rate_trade = if_else(is.na(extraction_rate_trade), extraction_rate, extraction_rate_trade), + # both processed and production > 0 + positive_prod = Production > 0 & Processed > 0) %>% + ungroup() %>% + group_by(APE_comm, GCAM_region_ID) %>% + # Calculate lagged extraction_rate but replace NA with current rate (first period) + mutate(extraction_rate_lag = lag(extraction_rate, default=extraction_rate[1])) %>% + ungroup() %>% + select(APE_comm, GCAM_region_ID, year, bal_import = extraction_rate_trade, bal_domestic_lag = extraction_rate_lag, bal_domestic_current = extraction_rate) %>% + gather(bal_source, extraction_rate, bal_import, bal_domestic_lag, bal_domestic_current, factor_key = TRUE) + } + + + #' Separate the SUA balance into domestic and imported balanced for sink_item + #' @description The function is used in Proc_primarize + #' @param DF_CURR_NEST Input supply-utilization accounting data frame with one tier of processing + #' @return SUA DF + + Get_ARMINGTON_BALANCE <- function(DF_CURR_NEST) { + Import_Demand_Item <- factor(c("Food", "Feed", "Processed", "Other uses", "Seed", "Loss"), levels=All_Bal_element) + + DF_CURR_NEST %>% + # Calculate imported consumption share + # The assumption is that a portion of Import_Demand_Items was imported + # so they need to be scaled by an international extraction rate + # Note that stock variation is not included in import consumption to maintain stock balance + # so additional adjustment may be needed + filter(element == "Import" | element %in% Import_Demand_Item) %>% + mutate(is_import = element == "Import") %>% + spread(is_import, value, fill=0.0) %>% + group_by(APE_comm, GCAM_region_ID, year, item_code) %>% + summarize(import = sum(`TRUE`), + import_demand = sum(`FALSE`)) %>% + ungroup() %>% + mutate(Import_Demand_Share = import / import_demand, + # replace NA and inf + Import_Demand_Share = if_else(is.finite(Import_Demand_Share), Import_Demand_Share, 0), + # The share should be small than 1 though outlier regions may import for storage + Import_Demand_Share = pmin(Import_Demand_Share, 1), + residual = import - import_demand * Import_Demand_Share) %>% + ungroup() %>% + select(APE_comm, GCAM_region_ID, item_code, year, Import_Demand_Share, residual) %>% + left_join(DF_CURR_NEST, ., by=c("APE_comm", "GCAM_region_ID", "item_code", "year")) %>% + # when Import_Demand_Item consumption < Import they are used to share out Import consumptions + # otherwise, Residuals is used for adjustments + mutate(bal_import = case_when(element == "Import" ~ value, + element %in% Import_Demand_Item ~ value * Import_Demand_Share, + element == "Residuals" ~ residual, + TRUE ~ 0), + # Calculate domestic balance + bal_domestic = value - bal_import) %>% + select(-value, -Import_Demand_Share, -residual) %>% + gather(bal_source, value, bal_import, bal_domestic, factor_key = TRUE) -> + TradeBal_Data + + Regional_supply_elements <- factor(c("Opening stocks", "Production", "Import"), levels=All_Bal_element) + Regional_demand_elements <- factor(c("Export", "Feed", "Food", "Loss", "Processed", "Seed", "Other uses", "Closing stocks"), levels=All_Bal_element) + TradeBal_Data %>% + mutate(is_supply = element %in% Regional_supply_elements, + is_demand = element %in% Regional_demand_elements) %>% + filter(is_supply | is_demand) %>% + group_by(APE_comm, GCAM_region_ID, year, item_code, bal_source) %>% + # Clean the bal items + summarize(`Regional supply` = sum(value[is_supply]), + `Regional demand` = sum(value[is_demand])) %>% + ungroup() %>% + mutate(`Residuals` = `Regional supply` - `Regional demand`) %>% + gather(element, value, `Regional supply`, `Regional demand`, `Residuals`) %>% + mutate(element = factor(element, levels=All_Bal_element)) %>% + bind_rows(TradeBal_Data %>% filter(!element %in% c("Regional supply", "Regional demand", "Residuals")), .) + } + + + #' Separate the domestic SUA balance into current and lagged balanced for sink_item + #' @description The function is used in Proc_primarize + #' @param DF_CURR_NEST_TradeAdj Output from Get_ARMINGTON_BALANCE. Input supply-utilization accounting data frame with one tier of processing and + #' @param .SINK_ITEM Sink items or processed items in the processing + #' @return SUA DF + + Get_STOCK_BALANCE <- function(DF_CURR_NEST_TradeAdj) { + Opening_Stock_Item <- factor(c("Food", "Feed", "Processed", "Other uses", "Seed", "Loss"), levels=All_Bal_element) + + get_bal_source_data <- function(data, bal_source_key) { + data[data$bal_source == bal_source_key, "data", drop = TRUE][[1]] + } + + DF_CURR_NEST_TradeAdj %>% + tidyr::nest(data = -bal_source) -> + StockCalcNested + + StockCalcNested %>% + get_bal_source_data("bal_domestic") %>% + filter(element == "Opening stocks" | element %in% Opening_Stock_Item) %>% + mutate(is_opening = element == "Opening stocks") %>% + spread(is_opening, value, fill=0.0) %>% + group_by(APE_comm, GCAM_region_ID, year, item_code) %>% + summarize(Ostock = sum(`TRUE`), + Ostock_demand = sum(`FALSE`)) %>% + ungroup() %>% + mutate(Ostock_Demand_Share = Ostock / Ostock_demand, + # The share should be small than 1 + # Other elements will be adjusted if not + Ostock_Demand_Share = if_else(is.finite(Ostock_Demand_Share), Ostock_Demand_Share, 0), + Ostock_Demand_Share = pmin(Ostock_Demand_Share, 1), + residual = Ostock - Ostock_demand * Ostock_Demand_Share) %>% + ungroup() %>% + select(APE_comm, GCAM_region_ID, item_code, year, Ostock_Demand_Share, residual) %>% + left_join(StockCalcNested %>% get_bal_source_data("bal_domestic"), ., by=c("APE_comm", "GCAM_region_ID", "item_code", "year")) %>% + mutate(bal_domestic_lag = case_when(element == "Opening stocks" ~ value, + element %in% Opening_Stock_Item ~ value * Ostock_Demand_Share, + element == "Residuals" ~ residual, + TRUE ~ 0), + # Calculate domestic balance + bal_domestic_current = value - bal_domestic_lag) %>% + select(-value, -Ostock_Demand_Share, -residual) %>% + gather(bal_source, value, bal_domestic_lag, bal_domestic_current, factor_key = TRUE) -> + StockBal_Data + + Regional_supply_elements <- factor(c("Opening stocks", "Production", "Import"), levels=All_Bal_element) + Regional_demand_elements <- factor(c("Export", "Feed", "Food", "Loss", "Processed", "Seed", "Other uses", "Closing stocks"), levels=All_Bal_element) + Bal_types = c("bal_import", "bal_domestic_lag", "bal_domestic_current") + Bal_types = factor(Bal_types, levels=Bal_types) + StockBal_Data %>% + complete(year = unique(StockBal_Data$year), nesting(GCAM_region_ID, item_code, element, APE_comm, bal_source), fill=list(value=0)) %>% + mutate(is_supply = element %in% Regional_supply_elements, + is_demand = element %in% Regional_demand_elements) %>% + group_by(APE_comm, GCAM_region_ID, year, item_code, bal_source) %>% + # Clean the bal items + summarize(`Regional supply` = sum(value[is_supply]), + `Regional demand` = sum(value[is_demand]), + # using max to guard against missing Closing stocks row + `Stock Variation` = max(value[element == "Closing stocks"], 0) - max(value[element == "Opening stocks"], 0)) %>% + ungroup() %>% + mutate(`Residuals` = `Regional supply` - `Regional demand`) %>% + gather(element, value, `Regional supply`, `Regional demand`, `Stock Variation`, `Residuals`) %>% + mutate(element = factor(element, levels=All_Bal_element)) %>% + bind_rows(StockBal_Data %>% filter(!element %in% c("Regional supply", "Regional demand", "Stock Variation", "Residuals")), .) %>% + tidyr::nest(data = - bal_source) %>% + mutate(bal_source = factor(bal_source, levels=Bal_types)) %>% + bind_rows(StockCalcNested %>% filter(bal_source == "bal_import") %>% mutate(bal_source = factor(bal_source, levels=Bal_types))) %>% + tidyr::unnest(c("data")) + } + + + #' Primary equivalent aggregation + #' @param DF_ALL Input supply-utilization accounting data frame with all levels of data nested which need to be primarized + #' @return A supply-utilization accounting data frame with all levels processed and aggregated to GCAM_commodity + + Proc_primarize <- function(DF_ALL){ + MaxNest = max(DF_ALL$nest_level) + MinNest = 1 + for(curr_nest in MaxNest:MinNest) { + # get the current tier to process + DF_ALL %>% + filter(nest_level == curr_nest) %>% + pull(data) %>% + first() -> + DF_CURR_NEST + + # Sink items or processed items in the processing + curr_sink_items = unique(DF_CURR_NEST$item_code) + Mapping_SUA_PrimaryEquivalent_ID %>% + filter(sink_item_code %in% curr_sink_items) -> + Curr_Sink_Mapping + # Source items or primary items in the processing + curr_source_items = unique(Curr_Sink_Mapping$source_item_code) + Mapping_SUA_PrimaryEquivalent_ID %>% + filter(source_item_code %in% curr_source_items) -> + Curr_Source_Mapping + + # OUTPUT_SPECIFIC_EXTRACTION_RATE A data frame with item and output_specific_extraction_rate. + #' # In some cases, prescale sink item SUA using output_specific_extraction_rate can improve the processing. + #' # e.g., when coproduction shares are not fixed. + if(nrow(OUTPUT_SPECIFIC_EXTRACTION_RATE %>% filter(item_code %in% curr_sink_items)) > 0) { + ## a. Pre-scale sink item data when .OUTPUT_SPECIFIC_EXTRACTION_RATE is available ---- + DF_CURR_NEST %>% + left_join(OUTPUT_SPECIFIC_EXTRACTION_RATE, by=c("item_code")) %>% + replace_na(list(output_specific_extraction_rate = 1)) %>% + mutate(value = value / output_specific_extraction_rate) %>% + select(-output_specific_extraction_rate) -> + DF_CURR_NEST + } + + ## b. For the sink items of the tier, separate balance into domestic and imported ---- + # Note that the method here relies on Get_GROSS_EXTRACTION_RATE and Get_ARMINGTON_BALANCE + DF_CURR_NEST %>% + Get_ARMINGTON_BALANCE() %>% + Get_STOCK_BALANCE() %>% + # Get extraction rate for domestic and traded + # Note that extraction rates are mean values across time + # Note that regional extraction rate could be inf + # It is likely due to data inconsistency, e.g., zero processed in source but positive sink + # No adjustments were made since 1/inf become zero in the scaling process, preserving primary balance + left_join(Get_GROSS_EXTRACTION_RATE(DF_CURR_NEST, DF_ALL), by=c("APE_comm", "GCAM_region_ID", "year", "bal_source")) %>% + # Scale sink items to get source item equivalent + mutate(value = value / extraction_rate) %>% + select(-extraction_rate) -> + .df1 + + ## c. Aggregate sink_items are aggregated into "sink_item" ---- + # And production & processed are adjusted for primary aggregation + # Bind source items as well + DF_ALL %>% + filter(nest_level <= curr_nest) %>% + tidyr::unnest(c("data")) %>% + filter(element == "Processed", item_code %in% curr_source_items) %>% + select(-element) -> + .df2 + .df2 %>% + complete(GCAM_region_ID = GCAM_region_names$GCAM_region_ID, nesting(APE_comm, item_code, nest_level, year), fill=list(value=0)) %>% + complete(.df2 %>% distinct(APE_comm, item_code, nest_level), nesting(GCAM_region_ID, year), fill=list(value=0)) %>% + group_by(APE_comm, item_code) %>% + mutate(value = sum(value)) %>% + ungroup() %>% + group_by(APE_comm, GCAM_region_ID, year) %>% + mutate(share = value/ sum(value), + share = if_else(is.finite(share), share, dplyr::n()/sum(dplyr::n()))) %>% + ungroup() %>% + select(-value, source_item_code = item_code) -> + source_share + + + ## d. Merge sink SUA into source items SUA ---- + # Note that with multiple source items, sinks are aggregated into sources based on average processed shares across sources + # Prepare data to calculate world average source share + .df1 %>% + left_join(source_share, by=c("APE_comm", "GCAM_region_ID", "year")) %>% + filter(!is.na(share)) %>% + mutate(value = value * share, + item_code = source_item_code) %>% + select(-share) %>% + group_by(nest_level, APE_comm, GCAM_region_ID, year, item_code, element) %>% + summarize(value = sum(value)) %>% + ungroup() %>% + complete(element=All_Bal_element[All_Bal_element %in% c("Prodution", "Processed")], nesting(nest_level, APE_comm, GCAM_region_ID, year, item_code), fill=list(value=0)) %>% + group_by(nest_level, APE_comm, GCAM_region_ID, year, item_code) %>% + mutate(value = if_else(element == "Production" | element == "Processed", value - value[element == "Production"], value)) %>% + ungroup() -> + .df3 + + # Bind source item and aggregated across source & sink items based on primary equivalent + # Note we will bind and aggregate by nest, ultimately it seems unlikely nesting and provided + # any performance boost, but certainly didn't hurt either. + .df3 %>% + tidyr::nest(data = -nest_level) -> + df3_nested + + for(nest_i in df3_nested$nest_level) { + bind_rows( + DF_ALL[DF_ALL$nest_level == nest_i, "data", drop=TRUE][[1]], + df3_nested[df3_nested$nest_level == nest_i, "data", drop=TRUE][[1]]) %>% + group_by(APE_comm, GCAM_region_ID, year, item_code, element) %>% + summarize(value = sum(value)) %>% + ungroup() -> + AGG + DF_ALL %>% + filter(nest_level != nest_i) %>% + bind_rows(tibble(nest_level = nest_i, data = list(AGG))) -> + DF_ALL + } + # drop the processed tier as the data has now been aggregated and thus + # no longer needed + DF_ALL %>% filter(nest_level != curr_nest) -> + DF_ALL + } + + # Combine the remaining items by APE_comm + DF_ALL %>% + tidyr::unnest(c("data")) %>% + group_by(GCAM_region_ID, APE_comm, element, year) %>% + summarize(value = sum(value)) %>% + ungroup() %>% + spread(element, value, fill = 0.0) %>% + # Do a final balance cleaning + mutate(`Regional supply` = `Opening stocks` + Production + `Import`, + `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`, + Residuals = `Regional supply` - `Regional demand`) %>% + gather(element, value, -GCAM_region_ID, -APE_comm, -year) -> + APE_AGG + + # Aggregate by GCAM_commodity + # At this point we ditch the ID codes and factors as we return the data and + # make it available for the rest of the original processing + APE_AGG %>% + left_join_error_no_match(Mapping_SUA_PrimaryEquivalent %>% select(GCAM_commodity, APE_comm) %>% distinct(), + by = c("APE_comm")) %>% + group_by(GCAM_region_ID, GCAM_commodity, element, year) %>% + summarize(value = sum(value)) %>% + ungroup() %>% + left_join_error_no_match(GCAM_region_names, by=c("GCAM_region_ID")) %>% + mutate(element = as.character(element)) %>% + select(region, year, GCAM_commodity, element, value) -> + GCAM_APE_after2010 + + return(GCAM_APE_after2010) + } + + # 2.2. Execution: process data into APE ---- + + + ## Loop through all GCAM_commodity with available data ---- + + FAO_SUA_Kt_2010to2019_R %>% + left_join(Mapping_SUA_PrimaryEquivalent_ID %>% select(APE_comm, item_code = sink_item_code, nest_level) %>% distinct(), by = c("item_code")) %>% + left_join(Mapping_SUA_PrimaryEquivalent_ID %>% select(APE_comm_source = APE_comm, item_code = source_item_code) %>% distinct(), by=c("item_code")) %>% + # find SUA items which are truly not mapped to anything and filter them out + mutate(APE_comm = if_else(is.na(APE_comm), APE_comm_source, APE_comm)) %>% + select(-APE_comm_source) %>% + filter(!is.na(APE_comm)) %>% + # the remaining rows with NA nest_level are primary, we need to keep them + # around for processing even though they don't need to be aggregated themselves + # so we will give them a nest level of -1 + mutate(nest_level = if_else(is.na(nest_level), -1L, nest_level)) %>% + # we will literally nest by nest level to avoid constant subseting + # although we end up needed to unnest at times as well so ultimately, + # it likely makes little difference in performance + tidyr::nest(data = -nest_level) %>% + # we are now ready to recursively primarize APE commodities then aggregate + # to GCAM commodities + Proc_primarize() -> + GCAM_APE_after2010 + + rm(FAO_SUA_Kt_2010to2019_R) + + ## Done Section2 ---- + #****************************---- + + # Section3 [1970-2009] Food balance sheet (original) aggregation to GCAM regions and commodities ---- + + # 3.1. Helper functions ---- + + #' Balance gross trade + #' @description Scale gross export and import in all regions to make them equal at the world level. + #' @param .DF An input dataframe with an element col including Import and Export + #' @param .MIN_TRADE_PROD_RATIO Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1% default value) + #' @param .Reg_VAR Region variable name; default is ("area_code") + #' @param .GROUP_VAR Group variable; default is ("item_code", "year") + #' @return The same dataframe with balanced world export and import. + + GROSS_TRADE_ADJUST <- function(.DF, + .MIN_TRADE_PROD_RATIO = 0.01, + .Reg_VAR = 'area_code', + .GROUP_VAR = c("item_code", "year")){ + + # assert .DF structure + assertthat::assert_that(all(c("element", .GROUP_VAR) %in% names(.DF))) + assertthat::assert_that(dplyr::is.grouped_df(.DF) == F) + assertthat::assert_that(all(c("Import", "Export", "Production") %in% + c(.DF %>% distinct(element) %>% pull))) + + .DF %>% + # Join ExportScaler and ImportScaler + left_join( + .DF %>% + spread(element, value) %>% + dplyr::group_by_at(vars(dplyr::all_of(.GROUP_VAR))) %>% + # filter out items with zero world trade or production + # and replace na to zero later for scaler + replace_na(list(Export = 0, Import = 0, Production = 0)) %>% + filter(sum(Export) != 0, sum(Import) != 0, sum(Production) != 0) %>% + # world trade should be later than .MIN_TRADE_PROD_RATIO to have meaningful data + # depending on item group, .MIN_TRADE_PROD_RATIO can be set differently + filter(sum(Export) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>% + filter(sum(Import) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>% + # finally, + # use average gross trade value to calculate trade scaler + # the trade scalers will be applied to all regions + mutate(ExportScaler = (sum(Export) + sum(Import))/ 2 / sum(Export), + ImportScaler = (sum(Export) + sum(Import))/ 2 / sum(Import)) %>% + select(dplyr::all_of(c(.Reg_VAR, .GROUP_VAR)), ExportScaler, ImportScaler) %>% + ungroup(), + by = c(dplyr::all_of(c(.Reg_VAR, .GROUP_VAR)))) %>% + replace_na(list(ExportScaler = 0, ImportScaler = 0)) %>% + # If world export, import, or prod is 0, trade will be zero + mutate(value = case_when( + element %in% c("Export") ~ value * ExportScaler, + element %in% c("Import") ~ value * ImportScaler, + TRUE ~ value)) %>% + select(-ExportScaler, -ImportScaler) + + } + + # 3.2. Execution ---- + ## a. FBSH_CB aggregate to GCAM commodity and region---- + + GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009 %>% + gather_years() %>% + filter(year < Min_SUA_Year) %>% + filter(!is.na(value)) -> + FBSH_CB + + # asssert mapping is good + assertthat::assert_that( + Mapping_item_FBS_GCAM %>% filter(!is.na(GCAM_commodity)) %>% + distinct(item_code) %>% pull() %in% + c(FBSH_CB %>% distinct(item_code) %>% pull) %>% all) + + Mapping_item_FBS_GCAM %>% + select(item_code, GCAM_commodity)%>% + filter(!is.na(GCAM_commodity)) %>% + left_join(FBSH_CB, by = "item_code") %>% + gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>% + gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>% + gcamdata::left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>% + dplyr::group_by_at(vars(area = region, year, GCAM_commodity, element)) %>% + summarise(value = sum(value), .groups = "drop") %>% + # complete element + complete(nesting(area, year, GCAM_commodity), element, + fill = list(value = 0)) -> + FBSH_CB_GCAM + + + ## b. Get primary production in GCAM region and sector ---- + + GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020 %>% + gather_years() %>% + filter(year < Min_SUA_Year) %>% + filter(!is.na(value), element == "Production") %>% + inner_join( + Mapping_SUA_PrimaryEquivalent %>% filter(source_primary == T) %>% + distinct(GCAM_commodity, item = source_item), by = "item") %>% + gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>% + gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>% + gcamdata::left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>% + dplyr::group_by_at(vars(area = region, year, GCAM_commodity, element)) %>% + summarise(value = sum(value), .groups = "drop") -> + QCL_PROD_GCAM + + ## c. QCL_PROD_GCAM and FBSH_CB_GCAM: merge, scale, and connect---- + # Connect SUA (FBSH_CB) to primary production (QCL_PROD_GCAM) + # Primary production could be different due to aggregation or inconsistency + + QCL_PROD_GCAM %>% + # Complete elements in QCL_PROD + # also GCAM_commodity because no pork production in Pakistan + complete(area, year, GCAM_commodity, + element = unique(FBSH_CB_GCAM$element), fill = list(value = 0)) %>% + left_join_error_no_match( + FBSH_CB_GCAM %>% + # complete element to add zero productions + complete(area, year, GCAM_commodity, element, fill = list(value = 0)) %>% + rename(FBSH_CB = value), + by = c("area", "year", "GCAM_commodity", "element") + ) %>% + # mapping + group_by(area, GCAM_commodity, year) %>% + # When production in FBSH_CB > primary production (QCL_PROD_GCAM), adjust processed + # to ensure production <= primary production when processed is enough; (will scale later) + # calculate Prod_diff which is the diff in production b/t the two data sets + mutate(Prod_diff = FBSH_CB[element == "Production"] - value[element == "Production"] ) %>% + # adjust processed when prod_diff > 0 by canceling off production and processed + mutate(Prod_diff = if_else(Prod_diff > 0 & element %in% c("Production", "Processed"), Prod_diff, 0), + Processed = if_else(Prod_diff > 0 & element %in% c("Production", "Processed"), + FBSH_CB[element == "Processed"], 0), + FBSH_CB = FBSH_CB - pmin(Prod_diff, Processed), + # When production in FBSH_CB = 0 set to production in QCL so scaling will be consistent + FBSH_CB = if_else(FBSH_CB[element == "Production"] == 0 & element == "Production", value, FBSH_CB), + # After the above adjustments, re-scale SUA to match production in value + value = FBSH_CB * value[element == "Production"]/FBSH_CB[element == "Production"], + # fix NA + value = if_else(!is.finite(value), FBSH_CB, value)) %>% + ungroup() %>% + select(area, GCAM_commodity, element, year, value) %>% + # adjust gross trade + GROSS_TRADE_ADJUST(.MIN_TRADE_PROD_RATIO = 0.001, + .Reg_VAR = "area", + .GROUP_VAR = c("GCAM_commodity", "year")) %>% + spread(element, value) %>% + # Note that stock variation here was = opening - ending + # reversed here so the variation is a demand + mutate(`Stock Variation` = - `Stock Variation`, + `Regional supply` = Production + `Import`, + `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` + `Stock Variation`, + Residuals = `Regional supply` - `Regional demand`) %>% + tidyr::gather(element, value, -area, -GCAM_commodity, -year) %>% + ungroup() %>% + rename(region = area) -> + GCAM_APE_before2010 + + rm(FBSH_CB, FBSH_CB_GCAM) + + ## Done Section3 ---- + #****************************---- + + # Section4 [1970-2019] GCAM_APE SUA ---- + + # 4.1. Helper functions ---- + Check_Balance_SUA <- function(.DF){ + + assertthat::assert_that(all(c("element") %in% names(.DF))) + assertthat::assert_that(all(c("Import", "Export", "Production", + "Food", "Feed", "Other uses") %in% + c(.DF %>% distinct(element) %>% pull))) + # 0. Check NA + if (.DF %>% filter(is.na(value)) %>% nrow() > 0) { + warning("NA values in SUA Balance") + } + + + # 1. Positive value except stock variation and residues + if (isFALSE(.DF %>% filter(!element %in% c("Stock Variation", "Other uses")) %>% + summarise(min = min(value, na.rm = T)) %>% pull(min) >= -0.001)) { + warning("Negative values in key elements (not including stock variation and other uses)") + } + + # 2. Trade balance in all year and items + if (isFALSE(.DF %>% filter(element %in% c("Import", "Export")) %>% + group_by(year, GCAM_commodity, element) %>% + summarise(value = sum(value), .groups = "drop") %>% + spread(element, value) %>% filter(abs(Import - Export) > 0.0001) %>% nrow() == 0)) { + warning("Gross trade imbalance") + } + + # 3. SUA balance check + if (isFALSE(.DF %>% + spread(element, value) %>% + mutate(`Regional supply` = Production + `Import`, + `Regional demand` = `Export` + Feed + Food + `Other uses`, + bal = abs(`Regional supply` - `Regional demand`)) %>% + filter(bal > 0.0001) %>% nrow() == 0)) { + warning("Regional supply != Regional demand + Residuals") + } + + # 4. Balanced in all dimensions + assertthat::assert_that(.DF %>% nrow() == + .DF %>% distinct(year) %>% nrow * + .DF %>% distinct(GCAM_commodity) %>% nrow * + .DF %>% distinct(element) %>% nrow * + .DF %>% distinct(region) %>% nrow) + + } + + # 4.2. Connect and bind data from two periods ---- + + GCAM_AgLU_SUA_APE_1973_2019 <- + GCAM_APE_before2010 %>% + bind_rows(GCAM_APE_after2010) %>% + mutate(unit = "1000 tonnes") %>% + # clean and aggregate elements not using + filter(!element %in% c("Regional demand", "Regional supply", + "Opening stocks", "Closing stocks")) %>% + mutate(element = replace(element, + element %in% c("Stock Variation", "Processed", + "Seed", "Residuals", "Loss"), + "Other uses")) %>% + dplyr::group_by_at(dplyr::vars(-value)) %>% + summarise(value = sum(value), .groups = "drop") + + ## Check balance + GCAM_AgLU_SUA_APE_1973_2019 %>% Check_Balance_SUA + rm(GCAM_APE_before2010, GCAM_APE_after2010) + + + ## Done Section4 ---- + #****************************---- + + # Section5 [1970-2019] Connect production and area data ---- + + # This section gets crop and livestock production before aggregation (FAO region and items) + # For both before 2010 and after 2010 + # They are also aggregated to GCAM region and commodities to assert consistency + # The processing includes all crops (including fodder crops) and livestock items + + # 5.1. Get all mapping straight ---- + + Primary_Item_CROP <- + FAO_ag_items_PRODSTAT %>% + select(item, GCAM_commodity, GCAM_subsector) %>% + filter(!is.na(item), !is.na(GCAM_commodity)) %>% + # Fodder grass has a duplicate as it mapped to different GTAP crops + distinct %>% + mutate(CropMeat = if_else(GCAM_commodity %in% c("FodderGrass", "FodderHerb"), + "Crop_Fodder", "Crop_NonFodder")) + assertthat::assert_that( + all(Primary_Item_CROP %>% filter(CropMeat == "Crop_NonFodder") %>% pull(item) %in% + c(Mapping_SUA_PrimaryEquivalent %>% filter(source_primary == T) %>% + distinct(item = source_item) %>% pull)), + msg = "Inconsistent mapping of primary crops between FAO_ag_items_PRODSTAT and Mapping_SUA_PrimaryEquivalent" ) + + Primary_Item_MEAT <- + Mapping_SUA_PrimaryEquivalent %>% + # animal meat Eq since they are included as primary production after 2010 + filter(source_primary == T | grepl("MeatEq", APE_comm)) %>% + distinct(GCAM_commodity, item = source_item) %>% + filter(GCAM_commodity %in% + c(FAO_an_items_PRODSTAT %>% + filter(!is.na(GCAM_commodity)) %>% + distinct(GCAM_commodity) %>% pull))%>% + mutate(CropMeat = "Meat") + + # 5.2. Get primary production for all ---- + # Connecting, mapping, arrange, and assertion + + ## Bind production and area data for both fodder and nonfodder ---- + FAO_AgProd_Kt_Area_Kha <- + GCAMDATA_FAOSTAT_ProdArea_96Regs_16FodderItems_1973to2020%>% + mutate(item_set = "QCL_COMM_CROP_PRIMARY_FODDER") %>% + bind_rows(GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020) %>% + gather_years() %>% + filter(!is.na(value)) + + # Assert yield no inf. + assertthat::assert_that( + # only safeguard here as data was cleaned and area and prod are matched + FAO_AgProd_Kt_Area_Kha %>% + # filter only primary crop items (all crops with area) + filter(item_set %in% c("QCL_COMM_CROP_PRIMARY", + "QCL_COMM_CROP_PRIMARY_FODDER")) %>% + select(area_code, item_code, year, element, value) %>% + spread(element, value) %>% + filter(is.infinite(Production / `Area harvested`| + is.infinite(`Area harvested`/Production)) ) %>% + nrow == 0, + msg = "Check region/item for prod > 0 & area = 0 or prod = 0 & area > 0" ) + + # Assert primary production in two sources area consistent + assertthat::assert_that( + FAO_SUA_Kt_2010to2019 %>% filter(element == "Production") %>% + inner_join(FAO_AgProd_Kt_Area_Kha %>% + filter(item_set != "QCL_COMM_OTHERPROC") %>% + filter(element == "Production") %>% rename(value1 = value), + by = c("area_code", "item_code", "element", "year")) %>% + mutate(diff = abs(value1 - value)) %>% + filter(diff > 0.0001) %>% nrow() == 0, + msg = "Primary production in SUA (FAO_SUA_Kt_2010to2019) and + QCL (FAO_AgProd_Kt_Area_Kha) are inconsistent " + ) + + ## a. All production ---- + # Meat production is more than (QCL)FAO_AgProd_Kt_Area_Kha after 2010 + # Production in FAO_AgProd_Kt_Area_Kha before 2010 was used + FAO_AgProd_Kt_Area_Kha %>% + filter(element == "Production") %>% + filter(year < Min_SUA_Year) %>% + select(c(names(FAO_SUA_Kt_2010to2019), "item")) %>% + bind_rows( + # For after 2010 + # Note that not all meat items came from QCL_PROD (unlike primary crops) + # E.g., meat Eq, offals (livers chicken), etc. were from derivation or SCL + # But all items should exist in Bal_new_all + # And Bal_new_all is identical to QCL_PROD for primary productions + FAO_SUA_Kt_2010to2019 %>% + filter(element == "Production") %>% + # ensure we at least have a complete series across time otherwise it may + # throw off moving avg calculations + complete(area_code = Area_Region_Map$area_code, year = pull(., year) %>% unique(), nesting(item_code, element), fill=list(value=0)) %>% + left_join_error_no_match(SUA_item_code_map, by = c("item_code"))) %>% + bind_rows( + # bind fodder crops for after 2010 + FAO_AgProd_Kt_Area_Kha %>% + filter(item_set == "QCL_COMM_CROP_PRIMARY_FODDER", + element == "Production") %>% + filter(year >= Min_SUA_Year) %>% + select(c(names(FAO_SUA_Kt_2010to2019), "item")) + ) %>% + # Inner join works as filter here + # Keep subsector info for crops + inner_join(Primary_Item_CROP %>% + bind_rows(Primary_Item_MEAT %>% + mutate(GCAM_subsector = GCAM_commodity)), + by = "item") %>% + # add in iso and gcam regions ID + left_join_error_no_match(Area_Region_Map, by = "area_code") -> + FAO_AgProd_Kt_All + + FAO_AgProd_Kt_All %>% + dplyr::group_by_at(vars(region, year, GCAM_commodity, element, CropMeat)) %>% + summarise(value = sum(value), .groups = "drop") -> + QCL_PROD_GCAM + + FAO_AgProd_Kt_All %>% + select(-region) -> + FAO_AgProd_Kt_All + + assertthat::assert_that( + GCAM_AgLU_SUA_APE_1973_2019 %>% + filter(element == "Production") %>% + left_join_error_no_match( + QCL_PROD_GCAM %>% filter(CropMeat != "Crop_Fodder") %>% + select(-CropMeat) %>% + rename(value1 = value) %>% + complete(nesting(region, year, element), GCAM_commodity, fill = list(value1 = 0)), + by = c("region", "GCAM_commodity", "year", "element")) %>% + mutate(diff = abs(value1 - value)) %>% + filter(diff > 0.0001) %>% nrow() == 0, + msg = "Primary production from two sources + (GCAM_AgLU_SUA_APE_1973_2019 and FAO_AgProd_Kt_Area_Kha) are inconsistent." ) + + ## b. All area harvested ---- + + assertthat::assert_that( + all(Primary_Item_CROP %>% pull(item) %in% + c(FAO_AgProd_Kt_Area_Kha %>% + filter(item_set %in% c("QCL_COMM_CROP_PRIMARY", + "QCL_COMM_CROP_PRIMARY_FODDER")) %>% + pull(item)) ), + msg = "Not all required primary crop items included in FAO_AgProd_Kt_Area_Kha" ) + + FAO_AgProd_Kt_Area_Kha %>% + filter(element == "Area harvested") %>% + select(c(names(FAO_SUA_Kt_2010to2019), "item")) %>% + # Keep subsector info for crops + inner_join(Primary_Item_CROP, by = "item") %>% + # add in iso and gcam regions ID + left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") -> + FAO_AgArea_Kha_All + + #****************************---- + #Section6 Connect food items and macronutrient rates ---- + + # 6.1 Separate FAO food items into GCAM food items and NEC for macronutrient ---- + # GCAM included most of the food items + # All food item with available macronutrient info from FAOSTAT are included + + # a. Get all GCAM SUA items from the mapping by binding both source and sink items + # about 486 items (out of 530) used in GCAM + + Mapping_SUA_PrimaryEquivalent %>% + select(GCAM_commodity, item = source_item) %>% + bind_rows(Mapping_SUA_PrimaryEquivalent %>% + select(GCAM_commodity, item = sink_item)) %>% + distinct() -> + SUA_Items_GCAM + + assertthat::assert_that( + SUA_Items_GCAM %>% distinct(item) %>% nrow() == SUA_Items_GCAM %>% nrow(), + msg = "Check duplicates in Mapping_SUA_PrimaryEquivalent SUA items" + ) + + # highly processed products or other products are not included in GCAM + # (e.g., wine, infant food, or other nonfood items etc.) + + SUA_item_code_map %>% + filter(!item %in% unique(SUA_Items_GCAM$item)) -> SUA_Items_NonGCAM + + # b. There are 426 FAO food items, all included in FAO_SUA_Kt_2010to2019 (530 items) + # SUA_Items_Food includes both GCAM and NonGCAM(NEC) + SUA_item_code_map %>% + filter(item_code %in% unique(GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean$item_code)) %>% + left_join(SUA_Items_GCAM, by = "item") %>% + # For NA GCAM_commodity: not elsewhere classified (NEC) + # So we would know % of food calories not included in GCAM commodities + mutate(GCAM_commodity = if_else(is.na(GCAM_commodity), "NEC", GCAM_commodity)) -> + SUA_Items_Food + + + # 6.2 Get macronutrient values ---- + + ### a. Get world average macronutrient ---- + # For filling in missing values + + GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>% + tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>% + group_by(item, item_code, macronutrient) %>% + summarise(macronutrient_value_World = mean(macronutrient_value), .groups = "drop") %>% + ungroup() -> + SUA_food_macronutrient_rate_World + + + ### b. Calculate SUA food Calories consumption by joining macronutrient rates and SUA food ---- + + FAO_SUA_Kt_2010to2019 %>% + filter(element == "Food", item_code %in% SUA_Items_Food$item_code) %>% + # ensure we at least have a complete series across time otherwise it may + # throw off moving avg calculations + complete(area_code = Area_Region_Map$area_code, year = pull(., year) %>% unique(), nesting(item_code, element), fill=list(value=0)) %>% + rename(Food_Kt = value) %>% + select(-element) %>% + left_join_error_no_match(SUA_Items_Food, by = c("item_code")) %>% + repeat_add_columns( + tibble(macronutrient = c("calperg", "fatperc", "proteinperc"))) %>% + left_join( + GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>% + tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc), + by = c("area_code", "item_code", "item", "macronutrient") + ) %>% + left_join_error_no_match(SUA_food_macronutrient_rate_World, + by = c("item_code", "item", "macronutrient")) %>% + mutate(macronutrient_value = if_else(is.na(macronutrient_value), + macronutrient_value_World, + macronutrient_value), + # calculate total Cal, protein and fat in food + # value was in 1000 ton or 10^ 9 g + value = macronutrient_value * Food_Kt, + value = if_else(macronutrient %in% c("fatperc", "proteinperc"), + value / 100 /1000, value)) %>% # unit from perc to Mt + select(-macronutrient_value, -macronutrient_value_World, -Food_Kt) %>% + # rename element with units + mutate(macronutrient = case_when( + macronutrient == "calperg" ~ "MKcal", + macronutrient == "fatperc" ~ "MtFat", + macronutrient == "proteinperc" ~ "MtProtein" )) %>% + left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") -> + FAO_Food_Macronutrient_All_2010_2019 + + ### c. Get the max values of macronutrient conversion rate (per GCAM_commodity) ---- + # This will be used later as an upper bound to improve the data + GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>% + tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>% + left_join_error_no_match(SUA_Items_Food, + by = c("item_code", "item")) %>% + group_by(GCAM_commodity, macronutrient) %>% + summarise(max_macronutrient_value = max(macronutrient_value), .groups = "drop") -> + FAO_Food_MacronutrientRate_2010_2019_MaxValue + + + #****************************---- + # Produce outputs ---- + #******************************* + + GCAM_AgLU_SUA_APE_1973_2019 %>% + add_title("GCAM_AgLU_SUA_APE_1973_2019") %>% + add_units("kton") %>% + add_comments("Supply utilization balance for GCAM commodities and regions in primary equivalent") %>% + add_precursors("aglu/AGLU_ctry", + "common/iso_GCAM_regID", + "common/GCAM_region_names", + "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", + "aglu/FAO/GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020", + "aglu/FAO/Mapping_SUA_PrimaryEquivalent", + "aglu/FAO/SUA_item_code_map", + "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020", + "aglu/FAO/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009", + "aglu/FAO/Mapping_item_FBS_GCAM") -> + GCAM_AgLU_SUA_APE_1973_2019 + + FAO_AgProd_Kt_All %>% + add_title("FAO_AgProd_Kt_All") %>% + add_units("1000 tonnes") %>% + add_comments("Supply utilization balance for GCAM commodities and regions in primary equivalent") %>% + add_precursors("aglu/AGLU_ctry", + "common/iso_GCAM_regID", + "aglu/FAO/FAO_ag_items_PRODSTAT", + "aglu/FAO/FAO_an_items_PRODSTAT", + "aglu/FAO/Mapping_SUA_PrimaryEquivalent", + "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_96Regs_16FodderItems_1973to2020", + "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020") -> + FAO_AgProd_Kt_All + + FAO_AgArea_Kha_All %>% + add_title("FAO_AgArea_Kha_All") %>% + add_units("1000 ha") %>% + add_comments("Harvested area") %>% + add_precursors("aglu/AGLU_ctry", + "common/iso_GCAM_regID", + "aglu/FAO/FAO_ag_items_PRODSTAT", + "aglu/FAO/FAO_an_items_PRODSTAT", + "aglu/FAO/Mapping_SUA_PrimaryEquivalent", + "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_96Regs_16FodderItems_1973to2020", + "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020") -> + FAO_AgArea_Kha_All + + FAO_Food_Macronutrient_All_2010_2019 %>% + add_title("GCAM_AgLU_SUA_APE_1973_2019") %>% + add_units("MKcal, MtFat, MtProtein") %>% + add_comments("Macronutrient consumption values connected to food consumption in GCAM_AgLU_SUA_APE_1973_2019") %>% + add_precursors("aglu/AGLU_ctry", + "common/iso_GCAM_regID", + "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", + "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean", + "aglu/FAO/Mapping_SUA_PrimaryEquivalent") -> + FAO_Food_Macronutrient_All_2010_2019 + + FAO_Food_MacronutrientRate_2010_2019_MaxValue %>% + add_title("FAO_Food_MacronutrientRate_2010_2019_MaxValue") %>% + add_units("cal per g, fat perc. , protein perc.") %>% + add_comments("The max value of macronutrient conversion rate across region, year, and SUA items (per GCAM_commodity") %>% + add_precursors("aglu/AGLU_ctry", + "common/iso_GCAM_regID", + "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", + "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean", + "aglu/FAO/Mapping_SUA_PrimaryEquivalent") -> + FAO_Food_MacronutrientRate_2010_2019_MaxValue + + return_data(MODULE_OUTPUTS) + + + } else { + stop("Unknown command") + } +} diff --git a/R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R b/R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R deleted file mode 100644 index 8dd7d67d..00000000 --- a/R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R +++ /dev/null @@ -1,1034 +0,0 @@ -# Copyright 2019 Battelle Memorial Institute; see the LICENSE file. - -#' module_aglu_L100_FAOSTAT_SUA_PrimaryEquivalent -#' -#' Generate supply utilization balance in primary equivalent -#' -#' @param command API command to execute -#' @param ... other optional parameters, depending on command -#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if -#' \code{command} is "MAKE") all the generated outputs: \code{GCAM_AgLU_SUA_APE_2010_2019}, -#' \code{FAO_AgProd_Kt_All},\code{FAO_AgArea_Kha_All},\code{FAO_Food_Macronutrient_All_2010_2019}, -#' \code{FAO_Food_MacronutrientRate_2010_2019_MaxValue} -#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities. -#' A method to generate primary equivalent is created for the new FAOSTAT supply utilization data (2010 to 2019). -#' New SUA balance is connected to the old one (before 2010). Production and harvested area data with FAO region and item -#' for primary production are provided. For FAO food items, macronutrient values are calculated at SUA item level. -#' Data processing was consistent across scales. Note that GCAM regions and commodities in aggregation mapping can -#' be changed in corresponding mappings. The output data is not averaged over time. -#' @importFrom assertthat assert_that -#' @importFrom dplyr bind_rows filter if_else inner_join left_join mutate rename select n group_by_at vars case_when arrange -#' @importFrom tidyr complete drop_na gather nesting spread replace_na -#' @importFrom tibble tibble -#' @author XZ 2022 -module_aglu_L100_FAOSTAT_SUA_PrimaryEquivalent <- function(command, ...) { - - MODULE_INPUTS <- - c(FILE = "aglu/AGLU_ctry", - FILE = "common/iso_GCAM_regID", - FILE = "common/GCAM_region_names", - FILE = "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", - FILE = "aglu/FAO/GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020", - FILE = "aglu/FAO/Mapping_SUA_PrimaryEquivalent", - FILE = "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020", - FILE = "aglu/FAO/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009", - FILE = "aglu/FAO/Mapping_item_FBS_GCAM", - FILE = "aglu/FAO/FAO_ag_items_PRODSTAT", - FILE = "aglu/FAO/FAO_an_items_PRODSTAT", - FILE = "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean" - ) - - MODULE_OUTPUTS <- - c("GCAM_AgLU_SUA_APE_2010_2019", - "FAO_AgProd_Kt_All", - "FAO_AgArea_Kha_All", - "FAO_Food_Macronutrient_All_2010_2019", - "FAO_Food_MacronutrientRate_2010_2019_MaxValue") - - if(command == driver.DECLARE_INPUTS) { - return(MODULE_INPUTS) - } else if(command == driver.DECLARE_OUTPUTS) { - return(MODULE_OUTPUTS) - } else if(command == driver.MAKE) { - - year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check. - - all_data <- list(...)[[1]] - - # Load required inputs ---- - - lapply(MODULE_INPUTS, function(d){ - # get name as the char after last / - nm <- tail(strsplit(d, "/")[[1]], n = 1) - # get data and assign - assign(nm, get_data(all_data, d, strip_attributes = T), - envir = parent.env(environment())) }) - - # Section1: [2010-2019] Region aggregation of supply-utilization-accounting data ---- - - # 1.1. Helper functions for SUA regional aggregation ---- - - #' Adjust gross trade in SUA data to ensure regional export is smaller than production for an SUA item - #' @description The function is used as an option in SUA_REG_Agg - #' @param .DF input supply-utilization accounting data frame - #' @return Trade-adjusted supply-utilization accounting data frame - - RM_GTOSS_TRADE_TRIANGLE_INEQUALITY <- function(.DF){ - - # need to remove gross trade when export > production - # to maintain triangle the inequality rule - .DF %>% group_by(area, year, item) %>% - # Calculate GrossTradeRM when prod - export < 0 and element in export and import - # Note that when import < export, e.g., export from stock or discrepancy, import is used for adjustments - mutate(GrossTradeRM = pmax(value[element == "Production"] - value[element == "Export"], - -value[element == "Import"])) %>% - mutate(GrossTradeRM = ifelse(GrossTradeRM < 0 & element %in% c("Export", "Import"), - GrossTradeRM, 0)) %>% - # Remove both import and export - mutate(value = value + GrossTradeRM) %>% select(-GrossTradeRM) %>% - ungroup() - } - - - #' Regional SUA aggregation (e.g., 32 in GCAM) with an option of removing intra-regional trade - #' @description intra-regional trade, by default, removed only for the SUA items with bilateral trade data available. - #' E.g., for 32 regions, if removed, global trade becomes inter-32-region trade. - #' @param .DF_SUA input supply-utilization accounting data frame - #' @param .RM_IntraRegTrade If TRUE, intra-regional trade is removed after aggregation - #' @param .TM_Bilateral A data frame of bilateral trade data - #' @param .RM_TRIANGLE_INEQUALITY If TRUE, adjust gross trade in SUA data to ensure regional export is smaller than production for an SUA item - #' @return - - SUA_REG_Agg <- function(.DF_SUA, - .RM_IntraRegTrade = TRUE, - .TM_Bilateral, - .RM_TRIANGLE_INEQUALITY = TRUE) { - # Assertion - # iso_GCAM_regID specified regional aggregation mapping - # Need to get intra-regional area-source pairs based on the info - assertthat::assert_that(is.data.frame(AGLU_ctry)) - assertthat::assert_that(is.data.frame(iso_GCAM_regID)) - assertthat::assert_that(is.data.frame(GCAM_region_names)) - if (.RM_IntraRegTrade == TRUE) { - assertthat::assert_that(is.data.frame(.TM_Bilateral)) - } - if (.RM_TRIANGLE_INEQUALITY == TRUE) { - assertthat::assert_that(is.function(RM_GTOSS_TRADE_TRIANGLE_INEQUALITY)) - } - - - # Aggregation based on region mapping ---- - .DF_SUA %>% - # Join GCAM region mappings - gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>% - gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>% - gcamdata::left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>% - dplyr::group_by_at(vars(area = region, year, item, element)) %>% - summarise(value = sum(value), .groups = "drop") -> - DF_SUA_Agg - - if (.RM_IntraRegTrade == TRUE) { - ## If removing intra-regional trade after aggregation ---- - - ### Get bilateral pairs that need subtraction ---- - .TM_Bilateral %>% distinct(area, source) %>% - gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>% - gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>% - select(-iso) %>% - gcamdata::left_join_error_no_match(AGLU_ctry %>% select(source = FAO_country, iso), by = "source") %>% - gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, Source_GCAM_region_ID = GCAM_region_ID), - by = "iso") %>% - filter(GCAM_region_ID == Source_GCAM_region_ID) %>% - select(-GCAM_region_ID, -Source_GCAM_region_ID, -iso) -> - TM_intra_REG_pair - - ### Get trade flow and aggregate to gross and aggregated region---- - .TM_Bilateral %>% inner_join(TM_intra_REG_pair, by = c("area", "source")) -> TM_bilateral_rm - - # gross trade agg. - TM_bilateral_rm %>% # Trade is balanced if aggregating bilateral - mutate(element = "Import") %>% - group_by(area_code, area, item_code, item, element, year) %>% - summarise(TCL = sum(value, na.rm = T), .groups = "drop") %>% - bind_rows( - TM_bilateral_rm %>% - mutate(element = "Export") %>% - group_by( - area_code = source_code, - area = source, - item_code, - item, - element, - year - ) %>% - summarise(TCL = sum(value, na.rm = T), .groups = "drop") - ) %>% - # regional agg. - gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>% - gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>% - left_join(GCAM_region_names, by = "GCAM_region_ID") %>% - dplyr::group_by_at(vars(area = region, year, item, element)) %>% - summarise(TCL = sum(TCL), .groups = "drop") -> - TCL_rm - - ### Join aggregated data and subtract intra-regional trade ---- - ## be careful with the unit - DF_SUA_Agg %>% left_join(TCL_rm, by = c("area", "year", "item", "element")) %>% - mutate(value = if_else(is.na(TCL), value, value - TCL / 1000)) %>% - select(-TCL) -> - DF_SUA_Agg - } - - if (.RM_TRIANGLE_INEQUALITY == TRUE) { - ### Remove gross trade to comply with TRIANGLE_INEQUALITY ---- - ## Remove gross trade to ensure prod > export - DF_SUA_Agg %>% - RM_GTOSS_TRADE_TRIANGLE_INEQUALITY -> - DF_SUA_Agg1 - return(DF_SUA_Agg1) - } else { - return(DF_SUA_Agg) - } - } - - - # 1.2. Execution: regional aggregation ---- - - # Get SUA data ready - FAO_SUA_Kt_2010to2019 <- - GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 %>% - gather_years() %>% - filter(!is.na(value)) # filter out nonexist regions years due to gather e.g., USSR after 1991 - # Get bilateral trade data ready - FAO_BiTrade_Kt_2010to2019 <- - GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020 %>% - gather_years() %>% - filter(!is.na(value)) - ## Aggregate to *FAO_SUA_Kt_2010to2019_R ---- - FAO_SUA_Kt_2010to2019_R <- - SUA_REG_Agg( - .DF_SUA = FAO_SUA_Kt_2010to2019, - .RM_IntraRegTrade = TRUE, - .TM_Bilateral = FAO_BiTrade_Kt_2010to2019 - ) - ## Clean up - rm(FAO_BiTrade_Kt_2010to2019) - ## Done Section1 ---- - #****************************---- - - # Section2: [2010-2019] Primary equivalent aggregation to GCAM commodities ---- - - # 2.1 Helper functions for SUA primary equivalent aggregation ---- - - #' Check and assert SUA data frame structure - #' @param .DF Input supply-utilization accounting data frame - #' @return Logical T or F - - is.SUA <- function(.DF){ - # Generate SUA class - - assertthat::assert_that(is.data.frame(.DF)) - - # Data col names check - assertthat::assert_that(all( c("element", "value")%in% names(.DF))) - assertthat::assert_that(any( c("item", "item_code", "GCAM_commodity")%in% names(.DF))) - - # Has all elements - c("Opening stocks", "Production", "Import", - "Export", "Processed", "Food", "Feed", "Seed", "Other uses", "Loss", "Closing stocks", - "Stock Variation") -> - Bal_element - - assertthat::assert_that( all(Bal_element %in% unique(.DF$element))) - - } - - #' Get extraction rate - #' @description Gross extraction rate is calculated for domestic, traded, and lagged values. - #' By gross, it means sink items are aggregated. - #' The function is used in Proc_primarize. - #' @param .DF Input supply-utilization accounting data frame with one tier of processing - #' @param .source_item Source items or primary items in the processing - #' @param .sink_item Sink items or processed items in the processing - #' @return A data frame including regional, traded, and world extraction rates of a processing - - Get_GROSS_EXTRACTION_RATE <- function(.DF, .source_item, .sink_item){ - - assertthat::assert_that(is.SUA(.DF)) - assertthat::assert_that(is.data.frame(Mapping_SUA_PrimaryEquivalent)) - - Mapping_SUA_PrimaryEquivalent %>% filter(source_item %in% .source_item) %>% - distinct(extraction_rate_Q25, Q25asMin) %>% - mutate(minimium_extraction_rate = if_else(Q25asMin == T, extraction_rate_Q25, 0)) %>% - pull(minimium_extraction_rate) -> MIN_EXTRACTION_RATE - - .DF %>% - #Prepare data to calculate regional, traded, and world average extraction rates - filter((element %in% c("Production", "Export") & item %in% .sink_item) | - (element == "Processed" & item %in% .source_item)) %>% - group_by(area, year, element) %>% - # sum across year and also sink or source items - summarise(value = sum(value), .groups = "drop") %>% - spread(element, value) %>% - group_by(year) %>% - mutate(extraction_rate_world = sum(Production) / sum(Processed)) %>% - # in case sum(Processed) or sum(Production) == 0 - mutate(extraction_rate_world = if_else(extraction_rate_world != 0 & is.finite(extraction_rate_world), - extraction_rate_world, 1)) %>% - # Regional extraction rate = prod of an aggregated processed item / Processed use of an aggregated primary item - # Use world average to fill in NA or zero - mutate(extraction_rate = if_else(is.na(Production / Processed), extraction_rate_world, Production / Processed)) %>% - mutate(extraction_rate = if_else(extraction_rate == 0, extraction_rate_world, extraction_rate)) %>% - # Using minimum extraction rate here - mutate(minimium_extraction_rate = MIN_EXTRACTION_RATE) %>% - mutate(extraction_rate = pmax(MIN_EXTRACTION_RATE, extraction_rate)) %>% - # Calculate traded extraction rate - mutate(extraction_rate_trade = sum(Export) / sum(Export / extraction_rate), - extraction_rate_trade = if_else(is.na(extraction_rate_trade), extraction_rate, extraction_rate_trade), - # both processed and production > 0 - positive_prod = if_else(Production >0 & Processed > 0 , T, F) ) %>% - ungroup() %>% - # Calculate lagged extraction_rate but replace NA with current rate (first period) - group_by(area) %>% - mutate(extraction_rate_lag = lag(extraction_rate)) %>% - mutate(extraction_rate_lag = if_else(is.na(extraction_rate_lag), extraction_rate, extraction_rate_lag)) %>% - ungroup() %>% - select(area, year, extraction_rate, extraction_rate_lag, extraction_rate_trade, extraction_rate_world, positive_prod) - } - - - #' Separate the SUA balance into domestic and imported balanced for sink_item - #' @description The function is used in Proc_primarize - #' @param .DF Input supply-utilization accounting data frame with one tier of processing - #' @param .SINK_ITEM Sink items or processed items in the processing - #' @return SUA DF - - Get_ARMINGTON_BALANCE <- function(.DF, .SINK_ITEM){ - - assertthat::assert_that(is.SUA(.DF)) - Import_Demand_Item <- c("Food", "Feed", "Processed", "Other uses", "Seed", "Loss") - - .DF %>% filter(item %in% .SINK_ITEM) %>% - group_by(area, year, item) %>% - # Calculate imported consumption share - # The assumption is that a portion of Import_Demand_Items was imported - # so they need to be scaled by an international extraction rate - # Note that stock variation is not included in import consumption to maintain stock balance - # so additional adjustment may be needed - mutate(Import_Demand_Share = value[element == "Import"] / - sum(value[element %in% Import_Demand_Item])) %>% - # replace NA and inf - mutate(Import_Demand_Share = if_else(!is.finite(Import_Demand_Share), 0, - Import_Demand_Share)) %>% - # The share should be small than 1 though outlier regions may import for storage - mutate(Import_Demand_Share = pmin(Import_Demand_Share, 1)) %>% - # when Import_Demand_Item consumption < Import they are used to share out Import consumptions - # otherwise, Residuals is used for adjustments - mutate(bal_import = case_when( - element %in% c("Import") ~ value, - element %in% Import_Demand_Item ~ Import_Demand_Share * value, - TRUE ~ 0) ) %>% - mutate(bal_import = if_else(element %in% c("Residuals"), - bal_import[element == "Import"] - sum(bal_import[element %in% Import_Demand_Item]), - bal_import )) %>% - select(-Import_Demand_Share) %>% - # Calculate domestic balance - mutate(bal_domestic = value - bal_import) %>% - select(-value) %>% ungroup() %>% - tidyr::gather(bal_source, value, bal_import, bal_domestic) %>% - # Clean the bal items - spread(element, value) %>% - mutate(`Regional supply` = `Opening stocks` + Production + `Import`, - `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`, - Residuals = `Regional supply` - `Regional demand`) %>% - tidyr::gather(element, value, -area, -item, -year, -bal_source) - - } - - - #' Separate the domestic SUA balance into current and lagged balanced for sink_item - #' @description The function is used in Proc_primarize - #' @param .DF Output from Get_ARMINGTON_BALANCE. Input supply-utilization accounting data frame with one tier of processing and - #' @param .SINK_ITEM Sink items or processed items in the processing - #' @return SUA DF - - Get_STOCK_BALANCE <- function(.DF, .SINK_ITEM){ - - assertthat::assert_that(is.SUA(.DF)) - Opening_Stock_Item <- c("Food", "Feed", "Processed", "Other uses", "Seed", "Loss") - - .DF %>% filter(item %in% .SINK_ITEM, - bal_source == "bal_domestic") %>% - select(-bal_source) %>% - group_by(area, year, item) %>% - mutate(Ostock = value[element == "Opening stocks"] ) %>% - mutate(Opening_Stock_Demand_Share = Ostock / - sum(value[element %in% Opening_Stock_Item])) %>% - mutate(Opening_Stock_Demand_Share = if_else(Ostock == 0, 0, - Opening_Stock_Demand_Share)) %>% - replace_na(list(Opening_Stock_Demand_Share = 1)) %>% - # The share should be small than 1 - # Other elements will be adjusted if not - mutate(Opening_Stock_Demand_Share = pmin(Opening_Stock_Demand_Share, 1)) %>% - mutate(bal_domestic_lag = case_when( - element %in% c("Opening stocks") ~ value, - element %in% Opening_Stock_Item ~ Opening_Stock_Demand_Share * value, - TRUE ~ 0) ) %>% - mutate(bal_domestic_lag = if_else(element %in% c("Residuals"), - bal_domestic_lag[element == "Opening stocks"] - sum(bal_domestic_lag[element %in% Opening_Stock_Item]), - bal_domestic_lag )) %>% - - select(-Opening_Stock_Demand_Share, -Ostock) %>% - # Calculate domestic balance - mutate(bal_domestic_current = value - bal_domestic_lag) %>% - select(-value) %>% ungroup() %>% - tidyr::gather(bal_source, value, bal_domestic_lag, bal_domestic_current) %>% - # Clean the bal items - spread(element, value) %>% - mutate(`Regional supply` = `Opening stocks` + Production + `Import`, - `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`, - `Stock Variation` = `Closing stocks` - `Opening stocks`, - Residuals = `Regional supply` - `Regional demand`) %>% - tidyr::gather(element, value, -area, -item, -year, -bal_source) %>% - bind_rows( - .DF %>% filter(item %in% .SINK_ITEM, - bal_source != "bal_domestic") ) - } - - - #' Primary equivalent aggregation - #' @param .DF Input supply-utilization accounting data frame with one tier of processing - #' @param .SOURCE_ITEM Source items or primary items in the processing - #' @param .SINK_ITEM Sink items or processed items in the processing - #' @param .OUTPUT_SPECIFIC_EXTRACTION_RATE A data frame with item and output_specific_extraction_rate. - #' # In some cases, prescale sink item SUA using output_specific_extraction_rate can improve the processing. - #' # e.g., when coproduction shares are not fixed. - #' @return A supply-utilization accounting data frame of a data tier in source item equivalent - - Proc_primarize <- function(.DF, .SOURCE_ITEM, .SINK_ITEM, - .OUTPUT_SPECIFIC_EXTRACTION_RATE = NA){ - - ## Assertion - assertthat::assert_that(is.SUA(.DF)) - assertthat::assert_that(is.character(.SOURCE_ITEM)) - assertthat::assert_that(is.character(.SINK_ITEM)) - # .DF has all items needed - assertthat::assert_that(all(c(.SOURCE_ITEM, .SINK_ITEM) %in% c(.DF %>% ungroup() %>% distinct(item) %>% pull))) - assertthat::assert_that(is.function(Get_ARMINGTON_BALANCE)) - assertthat::assert_that(is.function(Get_STOCK_BALANCE)) - assertthat::assert_that(is.function(Get_GROSS_EXTRACTION_RATE)) - - ## a. Pre-scale sink item data when .OUTPUT_SPECIFIC_EXTRACTION_RATE is available ---- - if (is.data.frame(.OUTPUT_SPECIFIC_EXTRACTION_RATE)) { - .DF %>% left_join(.OUTPUT_SPECIFIC_EXTRACTION_RATE, by = "item") %>% - replace_na(list(output_specific_extraction_rate = 1)) %>% - mutate(value = value / output_specific_extraction_rate) %>% - select(-output_specific_extraction_rate) -> - .DF - } - - ## b. For the sink items of the tier, separate balance into domestic and imported ---- - # Note that the method here relies on Get_GROSS_EXTRACTION_RATE and Get_ARMINGTON_BALANCE - .DF %>% Get_ARMINGTON_BALANCE(.SINK_ITEM) %>% - Get_STOCK_BALANCE(.SINK_ITEM) %>% - # Join extraction rate for the tier - gcamdata::left_join_error_no_match( - # Get extraction rate for domestic and traded - # Note that extraction rates are mean values across time - # Note that regional extraction rate could be inf - # It is likely due to data inconsistency, e.g., zero processed in source but positive sink - # No adjustments were made since 1/inf become zero in the scaling process, preserving primary balance - .DF %>% Get_GROSS_EXTRACTION_RATE(.SOURCE_ITEM, .SINK_ITEM) %>% - select(area, year, - bal_domestic_current = extraction_rate, - bal_domestic_lag = extraction_rate_lag, - bal_import = extraction_rate_trade) %>% - tidyr::gather(bal_source, extraction_rate, bal_domestic_current, bal_domestic_lag, bal_import), - by = c("area", "year", "bal_source") - ) %>% - # Scale sink items to get source item equivalent - mutate(value = if_else(item %in% .SINK_ITEM, value / extraction_rate, value)) %>% - select(-extraction_rate) -> .df1 - - ## c. Aggregate sink_items are aggregated into "sink_item" ---- - # And production & processed are adjusted for primary aggregation - # Bind source items as well - .df1 %>% mutate(item = replace(item, item %in% .SINK_ITEM, "sink_item")) %>% - group_by(area, year, item, element) %>% - summarise(value = sum(value), .groups = "drop") %>% - # Move production to negative processed for aggregation - group_by(area, year, item) %>% - mutate(value = if_else(item == "sink_item" & element %in% c("Production", "Processed"), - value - value[element == "Production"], value)) %>% - ungroup() %>% - # Bind source items and other items not used - bind_rows(.DF %>% filter(!item %in% .SINK_ITEM))-> .df2 - - ## d. Merge sink SUA into source items SUA ---- - # Note that with multiple source items, sinks are aggregated into sources based on average processed shares across sources - # Prepare data to calculate world average source share - .df2 %>% filter(element == "Processed", item %in% .SOURCE_ITEM) %>% - group_by(item) %>% mutate(value = sum(value)) %>% ungroup() %>% - group_by(area, year) %>% - mutate(share = value/sum(value)) %>% - # in case of inf. use simple share - mutate(share = if_else(is.finite(share), share, dplyr::n()/sum(dplyr::n()))) %>% - ungroup() %>% - select(-element, - value) %>% - # full join is needed since item (left) and element(right) are needed - full_join(.df2 %>% filter(item == "sink_item") %>% - select(-item), by = c("area", "year")) %>% - mutate(value = share * value) %>% select(-share) %>% - # Bind source item and aggregated across source & sink items based on primary equivalent - bind_rows(.df2 %>% filter(item != "sink_item")) %>% - group_by(area, year, item, element) %>% - summarise(value = sum(value), .groups = "drop") -> .df3 - - .df3 %>% - spread(element, value) %>% - mutate(`Regional supply` = `Opening stocks` + Production + `Import`, - `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`, - Residuals = `Regional supply` - `Regional demand`) %>% - tidyr::gather(element, value, -area, -item, -year) -> - .df4 - - ## e. Return data - return(.df4) - - ## Done Proc_primarize function - - } - - - ### FN for SUA primary equivalent aggregation - - #' Recursively aggregate nest into primary equivalent using from bottom to top - #' @description The function is a recursive wrapper of Proc_primarize - #' @param .COMM Primary commodity in the FAO item mapping (APE_comm). - #' @param .MAPPING Mapping of SUA items to primary products by level of nestings. Mapping_SUA_PrimaryEquivalent was constructed. - #' @param .DF_SUA Input SUA data frame - #' @param .ITEM_SUFFIX Adding a suffix (_APE) for item in primary equivalent in output. Note that an aggregated primary commodity is also kept. - #' @param .RECUR_TOP_NEST Bottom nest_level in mapping. If 1, all the way through APE but if 2 only run to top but 1 nest - #' @return SUA data frame in primary equivalent - - recursively_primarize <- function(.COMM, - .MAPPING = Mapping_SUA_PrimaryEquivalent, - .DF_SUA = Bal_new_all, - .ITEM_SUFFIX = "_APE", - .RECUR_TOP_NEST = 1 - ) { - - - ## Assertion - # Need Proc_primarize exist - assertthat::assert_that(is.function(Proc_primarize)) - assertthat::assert_that(is.character(.COMM)) - # Required format for .MAPPING - assertthat::assert_that(is.data.frame(.MAPPING)) - assertthat::assert_that(all(c("APE_comm", "nest_level", - "source_item", "sink_item", - "output_specific_extraction_rate") %in% names(.MAPPING))) - - ## a. Get data for all commodities, both primary and processed, needed ---- - COMM_ALL <- - .MAPPING %>% - filter(APE_comm == .COMM) %>% - select(source_item, sink_item) %>% unlist() %>% - unique() - assertthat::assert_that(length(COMM_ALL) >0, msg = "Item not exist; check mapping") - # Subset of relevant data only and assert all item need exist - assertthat::assert_that(is.data.frame(.DF_SUA)) - DF_BAL <- - .DF_SUA %>% filter(item %in% COMM_ALL) - assertthat::assert_that(length(setdiff(COMM_ALL, unique(DF_BAL$item))) == 0) - - - ## b. Get total number of nests based on nest_level in .MAPPING ---- - .MAPPING %>% - filter(APE_comm == .COMM) %>% pull(nest_level) %>% max -> - nNests - - ## c. Get primary commodities set ---- - .MAPPING %>% filter(source_primary == TRUE) %>% - distinct(source_item) %>% pull %>% - intersect(COMM_ALL) -> - COMM_primary - - ## d. Wrapper function of Proc_primarize to use nest_level and mapping inputs---- - Proc_primarize1 <- function(.DF, .COMM, .NEST_LEVEL){ - - .MAPPING %>% - filter(APE_comm == .COMM, nest_level == .NEST_LEVEL) -> - .SUA - - ### Get OUTPUT_SPECIFIC_EXTRACTION_RATE ---- - .SUA %>% - filter(!is.na(output_specific_extraction_rate)) %>% - select(item = sink_item, output_specific_extraction_rate) -> - OUTPUT_SPECIFIC_EXTRACTION_RATE - - .DF %>% Proc_primarize(.SOURCE_ITEM = unique(.SUA$source_item), - .SINK_ITEM = unique(.SUA$sink_item), - .OUTPUT_SPECIFIC_EXTRACTION_RATE = OUTPUT_SPECIFIC_EXTRACTION_RATE) - } - - ## e. A smart recursive function ---- - recursively_repeat <- function(.x, .reps, Proc_primarize1) { - if (.reps == .RECUR_TOP_NEST - 1 ) { - .x - } else { - recursively_repeat(Proc_primarize1(.x, .COMM, - .NEST_LEVEL = .reps), - .reps - 1, Proc_primarize1) - } - } - - ## f. Execute the recursive function to realize recursively_primarize ---- - # through all nests - recursively_repeat(DF_BAL, .reps = nNests, Proc_primarize1) -> DF_BAL_PE - - ## g. Return data ---- - # mutate APE item if .RECUR_TOP_NEST == 1 and - # also bind primary commodity balance - # otherwise, keep item names - if (.RECUR_TOP_NEST == 1) { - return(DF_BAL_PE %>% - # Update APE item name - mutate(item = paste0(.COMM, .ITEM_SUFFIX)) %>% - # Aggregate APE items when needed - group_by(area, year, item, element) %>% - summarise(value = sum(value), .groups = "drop") %>% - ungroup() %>% - # Keep an aggregated primary commodity - bind_rows(DF_BAL %>% - filter(item %in% COMM_primary) %>% - mutate(item = .COMM) %>% - group_by(area, year, item, element) %>% - summarise(value = sum(value), .groups = "drop") %>% - ungroup()) ) - } else { - return(DF_BAL_PE) - } - ## Done recursively_primarize function - } - - # 2.2. Execution: process data into APE ---- - - - ## Loop through all GCAM_commodity with available data ---- - Mapping_SUA_PrimaryEquivalent %>% distinct(GCAM_commodity) %>% pull -> - GCAM_commodity - - lapply(GCAM_commodity, function(.GCAM_COMM){ - - ### Assert that items needed included in data - assertthat::assert_that( - Mapping_SUA_PrimaryEquivalent %>% - filter(GCAM_commodity == .GCAM_COMM) %>% - distinct(sink_item) %>% pull %>% - setdiff(FAO_SUA_Kt_2010to2019_R %>% pull(item)) %>% length() == 0 - ) - ### Loop and bind - Mapping_SUA_PrimaryEquivalent %>% - filter(GCAM_commodity == .GCAM_COMM) %>% - distinct(APE_comm) %>% pull %>% - # Get the APE_comm and enter them into recursively_primarize - lapply(FUN = recursively_primarize, - .DF_SUA = FAO_SUA_Kt_2010to2019_R) %>% - bind_rows() %>% - # Aggregate APE_comm into GCAM_commodity - mutate(GCAM_commodity = if_else(grepl("_APE", item), .GCAM_COMM, - paste0(.GCAM_COMM, "_Primary"))) %>% - # Keep APE balance only - filter(grepl("_APE", item)) %>% - group_by(region = area, year, GCAM_commodity, element) %>% - summarise(value = sum(value), .groups = "drop") - }) %>% bind_rows() -> - GCAM_APE_after2010 - - rm(FAO_SUA_Kt_2010to2019_R) - - ## Done Section2 ---- - #****************************---- - - - # Section4 [1970-2019] GCAM_APE SUA ---- - - # 4.1. Helper functions ---- - Check_Balance_SUA <- function(.DF){ - - assertthat::assert_that(all(c("element") %in% names(.DF))) - assertthat::assert_that(all(c("Import", "Export", "Production", - "Food", "Feed", "Other uses") %in% - c(.DF %>% distinct(element) %>% pull))) - # 0. Check NA - if (isTRUE(.DF %>% filter(is.na(value)) %>% nrow == 0)) { - message("Good! No NA in value") } else{ - warning("NA in value") - } - - - # 1. Positive value except stock variation and residues - if (isTRUE(.DF %>% filter(!element %in% c("Stock Variation", "Other uses")) %>% - summarise(min = min(value, na.rm = T)) %>% pull(min) >= -0.001)) { - message("Good! Signs checked") } else{ - warning("Negative values in key elements (not including stock variation and other uses)") - } - - # 2. Trade balance in all year and items - if (isTRUE(.DF %>% filter(element %in% c("Import", "Export")) %>% - group_by(year, GCAM_commodity, element) %>% - summarise(value = sum(value), .groups = "drop") %>% - spread(element, value) %>% filter(abs(Import - Export) > 0.0001) %>% nrow() == 0)) { - message("Good! Gross trade in balance") } else{ - warning("Gross trade imbalance") - } - - # 3. SUA balance check - if (isTRUE(.DF %>% - spread(element, value) %>% - mutate(`Regional supply` = Production + `Import`, - `Regional demand` = `Export` + Feed + Food + `Other uses`, - bal = abs(`Regional supply` - `Regional demand`)) %>% - filter(bal > 0.0001) %>% nrow() == 0)) { - message("Good! Regional supply = Regional demand + Residuals") } else{ - warning("Regional supply != Regional demand + Residuals") - } - - # 4. Balanced in all dimensions - assertthat::assert_that(.DF %>% nrow() == - .DF %>% distinct(year) %>% nrow * - .DF %>% distinct(GCAM_commodity) %>% nrow * - .DF %>% distinct(element) %>% nrow * - .DF %>% distinct(region) %>% nrow) - - } - - # 4.2. Connect and bind data from two periods ---- - - GCAM_AgLU_SUA_APE_2010_2019 <- - GCAM_APE_after2010 %>% - mutate(unit = "1000 tonnes") %>% - # clean and aggregate elements not using - filter(!element %in% c("Regional demand", "Regional supply", - "Opening stocks", "Closing stocks")) %>% - mutate(element = replace(element, - element %in% c("Stock Variation", "Processed", - "Seed", "Residuals", "Loss"), - "Other uses")) %>% - dplyr::group_by_at(dplyr::vars(-value)) %>% - summarise(value = sum(value), .groups = "drop") - - ## Check balance - GCAM_AgLU_SUA_APE_2010_2019 %>% Check_Balance_SUA - rm(GCAM_APE_after2010) - - - ## Done Section4 ---- - #****************************---- - - # Section5 [1970-2019] Connect production and area data ---- - - # This section gets crop and livestock production before aggregation (FAO region and items) - # For both before 2010 and after 2010 - # They are also aggregated to GCAM region and commodities to assert consistency - # The processing includes all crops (including fodder crops) and livestock items - - # 5.1. Get all mapping straight ---- - - Primary_Item_CROP <- - FAO_ag_items_PRODSTAT %>% - select(item, GCAM_commodity, GCAM_subsector) %>% - filter(!is.na(item), !is.na(GCAM_commodity)) %>% - # Fodder grass has a duplicate as it mapped to different GTAP crops - distinct %>% - mutate(CropMeat = if_else(GCAM_commodity %in% c("FodderGrass", "FodderHerb"), - "Crop_Fodder", "Crop_NonFodder")) - assertthat::assert_that( - all(Primary_Item_CROP %>% filter(CropMeat == "Crop_NonFodder") %>% pull(item) %in% - c(Mapping_SUA_PrimaryEquivalent %>% filter(source_primary == T) %>% - distinct(item = source_item) %>% pull)), - msg = "Inconsistent mapping of primary crops between FAO_ag_items_PRODSTAT and Mapping_SUA_PrimaryEquivalent" ) - - Primary_Item_MEAT <- - Mapping_SUA_PrimaryEquivalent %>% - # animal meat Eq since they are included as primary production after 2010 - filter(source_primary == T | grepl("MeatEq", APE_comm)) %>% - distinct(GCAM_commodity, item = source_item) %>% - filter(GCAM_commodity %in% - c(FAO_an_items_PRODSTAT %>% - filter(!is.na(GCAM_commodity)) %>% - distinct(GCAM_commodity) %>% pull))%>% - mutate(CropMeat = "Meat") - - # 5.2. Get primary production for all ---- - # Connecting, mapping, arrange, and assertion - - ## Bind production and area data for both fodder and nonfodder ---- - FAO_AgProd_Kt_Area_Kha <- - GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020 %>% - gather_years() %>% - filter(!is.na(value)) - - # Assert yield no inf. - assertthat::assert_that( - # only safeguard here as data was cleaned and area and prod are matched - FAO_AgProd_Kt_Area_Kha %>% - # filter only primary crop items (all crops with area) - filter(item_set %in% c("QCL_COMM_CROP_PRIMARY", - "QCL_COMM_CROP_PRIMARY_FODDER")) %>% - select(-unit) %>% spread(element, value) %>% - filter(is.infinite(Production / `Area harvested`| - is.infinite(`Area harvested`/Production)) ) %>% - nrow == 0, - msg = "Check region/item for prod > 0 & area = 0 or prod = 0 & area > 0" ) - - # Assert primary production in two sources area consistent - assertthat::assert_that( - FAO_SUA_Kt_2010to2019 %>% filter(element == "Production") %>% - inner_join(FAO_AgProd_Kt_Area_Kha %>% - filter(item_set != "QCL_COMM_OTHERPROC") %>% - filter(element == "Production") %>% rename(value1 = value), - by = c("area_code", "item_code", "element", "item", "area", "year")) %>% - mutate(diff = abs(value1 - value)) %>% - filter(diff > 0.0001) %>% nrow() == 0, - msg = "Primary production in SUA (FAO_SUA_Kt_2010to2019) and - QCL (FAO_AgProd_Kt_Area_Kha) are inconsistent " - ) - - ## a. All production ---- - # Meat production is more than (QCL)FAO_AgProd_Kt_Area_Kha after 2010 - # Production in FAO_AgProd_Kt_Area_Kha before 2010 was used - FAO_AgProd_Kt_Area_Kha %>% - filter(element == "Production") %>% - filter(year < min(FAO_SUA_Kt_2010to2019$year)) %>% - select(names(FAO_SUA_Kt_2010to2019)) %>% - bind_rows( - # For after 2010 - # Note that not all meat items came from QCL_PROD (unlike primary crops) - # E.g., meat Eq, offals (livers chicken), etc. were from derivation or SCL - # But all items should exist in Bal_new_all - # And Bal_new_all is identical to QCL_PROD for primary productions - FAO_SUA_Kt_2010to2019 %>% - filter(element == "Production") ) %>% - bind_rows( - # bind fodder crops for after 2010 - FAO_AgProd_Kt_Area_Kha %>% - filter(item_set == "QCL_COMM_CROP_PRIMARY_FODDER", - element == "Production") %>% - filter(year >= min(FAO_SUA_Kt_2010to2019$year)) %>% - select(names(FAO_SUA_Kt_2010to2019)) - ) %>% - # Inner join works as filter here - # Keep subsector info for crops - inner_join(Primary_Item_CROP %>% - bind_rows(Primary_Item_MEAT %>% - mutate(GCAM_subsector = GCAM_commodity)), - by = "item") %>% - # add in iso and gcam regions ID - left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>% - left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") -> - FAO_AgProd_Kt_All - - FAO_AgProd_Kt_All %>% - left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>% - dplyr::group_by_at(vars(region, year, GCAM_commodity, element, CropMeat)) %>% - summarise(value = sum(value), .groups = "drop") -> - QCL_PROD_GCAM - - assertthat::assert_that( - GCAM_AgLU_SUA_APE_2010_2019 %>% - filter(element == "Production") %>% - left_join_error_no_match( - QCL_PROD_GCAM %>% filter(CropMeat != "Crop_Fodder") %>% - select(-CropMeat) %>% - rename(value1 = value) %>% - complete(nesting(region, year, element), GCAM_commodity, fill = list(value1 = 0)), - by = c("region", "GCAM_commodity", "year", "element")) %>% - mutate(diff = abs(value1 - value)) %>% - filter(diff > 0.0001) %>% nrow() == 0, - msg = "Primary production from two sources - (GCAM_AgLU_SUA_APE_2010_2019 and FAO_AgProd_Kt_Area_Kha) are inconsistent." ) - - ## b. All area harvested ---- - - - FAO_AgProd_Kt_Area_Kha %>% - filter(element == "Area harvested") %>% - select(names(FAO_SUA_Kt_2010to2019)) %>% - # Keep subsector info for crops - inner_join(Primary_Item_CROP, by = "item") %>% - # add in iso and gcam regions ID - left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>% - left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") -> - FAO_AgArea_Kha_All - - #****************************---- - #Section6 Connect food items and macronutrient rates ---- - - # 6.1 Separate FAO food items into GCAM food items and NEC for macronutrient ---- - # GCAM included most of the food items - # All food item with available macronutrient info from FAOSTAT are included - - # a. Get all GCAM SUA items from the mapping by binding both source and sink items - # about 486 items (out of 530) used in GCAM - - Mapping_SUA_PrimaryEquivalent %>% - select(GCAM_commodity, item = source_item) %>% - bind_rows(Mapping_SUA_PrimaryEquivalent %>% - select(GCAM_commodity, item = sink_item)) %>% - distinct() %>% arrange(GCAM_commodity) -> - SUA_Items_GCAM - - assertthat::assert_that( - SUA_Items_GCAM %>% distinct(item) %>% nrow() == SUA_Items_GCAM %>% nrow(), - msg = "Check duplicates in Mapping_SUA_PrimaryEquivalent SUA items" - ) - - # highly processed products or other products are not included in GCAM - # (e.g., wine, infant food, or other nonfood items etc.) - - FAO_SUA_Kt_2010to2019 %>% distinct(item) %>% - filter(!item %in% unique(SUA_Items_GCAM$item)) -> SUA_Items_NonGCAM - - # b. There are 426 FAO food items, all included in FAO_SUA_Kt_2010to2019 (530 items) - # SUA_Items_Food includes both GCAM and NonGCAM(NEC) - FAO_SUA_Kt_2010to2019 %>% distinct(item, item_code) %>% - filter(item %in% unique(GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean$item)) %>% - left_join(SUA_Items_GCAM, by = "item") %>% - # For NA GCAM_commodity: not elsewhere classified (NEC) - # So we would know % of food calories not included in GCAM commodities - mutate(GCAM_commodity = if_else(is.na(GCAM_commodity), "NEC", GCAM_commodity)) -> - SUA_Items_Food - - - # 6.2 Get macronutrient values ---- - - ### a. Get world average macronutrient ---- - # For filling in missing values - - GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>% - tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>% - group_by(item, item_code, macronutrient) %>% - summarise(macronutrient_value_World = mean(macronutrient_value), .groups = "drop") %>% - ungroup() -> - SUA_food_macronutrient_rate_World - - - ### b. Calculate SUA food Calories consumption by joining macronutrient rates and SUA food ---- - - FAO_SUA_Kt_2010to2019 %>% - filter(element == "Food", item_code %in% SUA_Items_Food$item_code) %>% - rename(Food_Kt = value) %>% - select(-element) %>% - left_join_error_no_match(SUA_Items_Food, by = c("item_code", "item")) %>% - repeat_add_columns( - tibble(macronutrient = c("calperg", "fatperc", "proteinperc"))) %>% - left_join( - GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>% - tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc), - by = c("area_code", "item_code", "item", "macronutrient") - ) %>% - left_join_error_no_match(SUA_food_macronutrient_rate_World, - by = c("item_code", "item", "macronutrient")) %>% - mutate(macronutrient_value = if_else(is.na(macronutrient_value), - macronutrient_value_World, - macronutrient_value)) %>% - # calculate total Cal, protein and fat in food - # value was in 1000 ton or 10^ 9 g - mutate(value = macronutrient_value * Food_Kt, - value = if_else(macronutrient %in% c("fatperc", "proteinperc"), - value / 100 / 1000, value)) %>% # unit from perc to Mt - select(-macronutrient_value, -macronutrient_value_World, -Food_Kt) %>% - # rename element with units - mutate(macronutrient = case_when( - macronutrient == "calperg" ~ "MKcal", - macronutrient == "fatperc" ~ "MtFat", - macronutrient == "proteinperc" ~ "MtProtein" )) %>% - left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>% - left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") -> - FAO_Food_Macronutrient_All_2010_2019 - - ### c. Get the max values of macronutrient conversion rate (per GCAM_commodity) ---- - # This will be used later as an upper bound to improve the data - GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>% - tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>% - left_join_error_no_match(SUA_Items_Food, - by = c("item_code", "item")) %>% - group_by(GCAM_commodity, macronutrient) %>% - summarise(max_macronutrient_value = max(macronutrient_value), .groups = "drop") -> - FAO_Food_MacronutrientRate_2010_2019_MaxValue - - - #****************************---- - # Produce outputs ---- - #******************************* - - GCAM_AgLU_SUA_APE_2010_2019 %>% - add_title("GCAM_AgLU_SUA_APE_2010_2019") %>% - add_units("kton") %>% - add_comments("Supply utilization balance for GCAM commodities and regions in primary equivalent") %>% - add_precursors("aglu/AGLU_ctry", - "common/iso_GCAM_regID", - "common/GCAM_region_names", - "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", - "aglu/FAO/GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020", - "aglu/FAO/Mapping_SUA_PrimaryEquivalent", - "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020", - "aglu/FAO/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009", - "aglu/FAO/Mapping_item_FBS_GCAM") -> - GCAM_AgLU_SUA_APE_2010_2019 - - FAO_AgProd_Kt_All %>% - add_title("FAO_AgProd_Kt_All") %>% - add_units("1000 tonnes") %>% - add_comments("Supply utilization balance for GCAM commodities and regions in primary equivalent") %>% - add_precursors("aglu/AGLU_ctry", - "common/iso_GCAM_regID", - "aglu/FAO/FAO_ag_items_PRODSTAT", - "aglu/FAO/FAO_an_items_PRODSTAT", - "aglu/FAO/Mapping_SUA_PrimaryEquivalent", - "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020") -> - FAO_AgProd_Kt_All - - FAO_AgArea_Kha_All %>% - add_title("FAO_AgArea_Kha_All") %>% - add_units("1000 ha") %>% - add_comments("Harvested area") %>% - add_precursors("aglu/AGLU_ctry", - "common/iso_GCAM_regID", - "aglu/FAO/FAO_ag_items_PRODSTAT", - "aglu/FAO/FAO_an_items_PRODSTAT", - "aglu/FAO/Mapping_SUA_PrimaryEquivalent", - "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020") -> - FAO_AgArea_Kha_All - - FAO_Food_Macronutrient_All_2010_2019 %>% - add_title("FAO_Food_Macronutrient_All_2010_2019") %>% - add_units("MKcal, MtFat, MtProtein") %>% - add_comments("Macronutrient consumption values connected to food consumption in GCAM_AgLU_SUA_APE_1973_2019") %>% - add_precursors("aglu/AGLU_ctry", - "common/iso_GCAM_regID", - "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", - "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean", - "aglu/FAO/Mapping_SUA_PrimaryEquivalent") -> - FAO_Food_Macronutrient_All_2010_2019 - - FAO_Food_MacronutrientRate_2010_2019_MaxValue %>% - add_title("FAO_Food_MacronutrientRate_2010_2019_MaxValue") %>% - add_units("cal per g, fat perc. , protein perc.") %>% - add_comments("The max value of macronutrient conversion rate across region, year, and SUA items (per GCAM_commodity") %>% - add_precursors("aglu/AGLU_ctry", - "common/iso_GCAM_regID", - "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019", - "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean", - "aglu/FAO/Mapping_SUA_PrimaryEquivalent") -> - FAO_Food_MacronutrientRate_2010_2019_MaxValue - - return_data(MODULE_OUTPUTS) - - - } else { - stop("Unknown command") - } -} diff --git a/data-raw/generate_package_data_faostat_helper_funcs.R b/data-raw/generate_package_data_faostat_helper_funcs.R index a851903a..5d6e37cf 100644 --- a/data-raw/generate_package_data_faostat_helper_funcs.R +++ b/data-raw/generate_package_data_faostat_helper_funcs.R @@ -68,9 +68,13 @@ rm_accent <- function(.df, ...){ length(intersect(c(...), names(.df))) == length(c(...)), msg = "Columns listed not included in the data frame") + # .df %>% + # mutate_at(c(...), iconv, to = 'ASCII//TRANSLIT') %>% + # mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "") + .df %>% - mutate_at(c(...), iconv, to = 'ASCII//TRANSLIT') %>% - mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "") + mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>% + mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = "")) } diff --git a/inst/extdata/aglu/FAO/SUA_item_code_map.csv b/inst/extdata/aglu/FAO/SUA_item_code_map.csv new file mode 100644 index 00000000..980a4b3b --- /dev/null +++ b/inst/extdata/aglu/FAO/SUA_item_code_map.csv @@ -0,0 +1,540 @@ +# File: SUA_item_code_map.csv +# Title: Mapping of SUA items to their item_code +# Units: NA +# Description: Ideally we just use SUA (supply utilization accounts) item names directly. However, given the volume of data +# it helps to use item_code to reduce the size of the input data as well as speed up processing. We will then need this mapping +# to ultimately translate back to item names. +# Last update: 8-15-2022 (XZ) +# Column types: ci +# ---------- +item,item_code +Wheat,15 +"Rice, paddy",27 +Barley,44 +Beer of barley,51 +Maize,56 +"Oil, maize",60 +Rye,71 +Oats,75 +Millet,79 +Sorghum,83 +Buckwheat,89 +Quinoa,92 +Fonio,94 +Triticale,97 +Canary seed,101 +"Grain, mixed",103 +Potatoes,116 +Sweet potatoes,122 +Cassava,125 +Roots and tubers nes,149 +Sugar beet,157 +Sugar crops nes,161 +Sugar Raw Centrifugal,162 +Molasses,165 +"Beans, dry",176 +"Broad beans, horse beans, dry",181 +"Peas, dry",187 +Chick peas,191 +Lentils,201 +Bambara beans,203 +Vetches,205 +"Cashew nuts, with shell",217 +Chestnut,220 +"Almonds, with shell",221 +"Walnuts, with shell",222 +Pistachios,223 +Kola nuts,224 +"Hazelnuts, with shell",225 +Areca nuts,226 +Nuts nes,234 +Soybeans,236 +"Oil, groundnut",244 +Coconuts,249 +"Oil, coconut (copra)",252 +"Oil, palm",257 +"Oil, palm kernel",258 +Olives,260 +"Oil, olive, virgin",261 +Karite nuts (sheanuts),263 +Sunflower seed,267 +"Oil, sunflower",268 +Rapeseed,270 +"Oil, rapeseed",271 +"Oil, safflower",281 +Sesame seed,289 +"Oil, sesame",290 +Mustard seed,292 +Poppy seed,296 +Cottonseed,329 +"Oil, cottonseed",331 +Linseed,333 +"Oil, linseed",334 +Oilseeds nes,339 +Cabbages and other brassicas,358 +Artichokes,366 +Asparagus,367 +Lettuce and chicory,372 +Spinach,373 +Tomatoes,388 +Cauliflowers and broccoli,393 +"Pumpkins, squash and gourds",394 +Cucumbers and gherkins,397 +Eggplants (aubergines),399 +"Chillies and peppers, green",401 +"Onions, shallots, green",402 +"Onions, dry",403 +Garlic,406 +"Leeks, other alliaceous vegetables",407 +"Beans, green",414 +"Peas, green",417 +Carrots and turnips,426 +"Maize, green",446 +Mushrooms and truffles,449 +"Vegetables, fresh nes",463 +Bananas,486 +Plantains and others,489 +Oranges,490 +"Tangerines, mandarins, clementines, satsumas",495 +Lemons and limes,497 +Grapefruit (inc. pomelos),507 +Apples,515 +Pears,521 +Quinces,523 +Apricots,526 +"Cherries, sour",530 +Cherries,531 +Peaches and nectarines,534 +Plums and sloes,536 +Strawberries,544 +Gooseberries,549 +Currants,550 +Blueberries,552 +Cranberries,554 +Grapes,560 +Wine,564 +Watermelons,567 +"Melons, other (inc.cantaloupes)",568 +Figs,569 +"Mangoes, mangosteens, guavas",571 +Avocados,572 +Pineapples,574 +Dates,577 +Persimmons,587 +Cashewapple,591 +Kiwi fruit,592 +Papayas,600 +"Fruit, tropical fresh nes",603 +"Fruit, fresh nes",619 +"Coffee, green",656 +"Cocoa, beans",661 +Tea,667 +Mate,671 +Pepper (piper spp.),687 +"Chillies and peppers, dry",689 +Vanilla,692 +Cinnamon (cannella),693 +Cloves,698 +"Nutmeg, mace and cardamoms",702 +"Anise, badian, fennel, coriander",711 +Ginger,720 +Spices nes,723 +"Meat, cattle",867 +"Offals, edible, cattle",868 +"Fat, cattle",869 +"Milk, whole fresh cow",882 +Cream fresh,885 +"Butter, cow milk",886 +"Milk, skimmed cow",888 +"Milk, whole condensed",889 +"Whey, condensed",890 +Yoghurt,891 +"Milk, whole evaporated",894 +"Milk, whole dried",897 +"Milk, skimmed dried",898 +"Whey, dry",900 +"Cheese, whole cow milk",901 +"Meat, sheep",977 +"Offals, sheep,edible",978 +"Cheese, sheep milk",984 +"Meat, goat",1017 +"Offals, edible, goats",1018 +"Meat, pig",1035 +"Offals, pigs, edible",1036 +"Fat, pigs",1037 +Lard,1043 +"Meat, chicken",1058 +"Eggs, hen, in shell",1062 +"Meat, duck",1069 +"Meat, goose and guinea fowl",1073 +"Meat, turkey",1080 +"Eggs, other bird, in shell",1091 +"Meat, horse",1097 +"Fat, camels",1129 +"Meat, rabbit",1141 +"Meat, game",1163 +Meat nes,1166 +"Honey, natural",1182 +Tallow,1225 +"Margarine, short",1242 +"Flour, wheat",16 +"Bran, wheat",17 +Macaroni,18 +Bread,20 +Bulgur,21 +Pastry,22 +"Rice, husked",28 +"Rice, milled/husked",29 +"Rice, milled",31 +"Rice, broken",32 +"Oil, rice bran",36 +"Flour, rice",38 +"Beverages, fermented rice",39 +"Cereals, breakfast",41 +"Barley, pearled",46 +Malt,49 +"Germ, maize",57 +"Flour, maize",58 +"Bran, maize",59 +Oats rolled,76 +"Bran, millet",81 +"Bran, sorghum",85 +"Flour, mixed grain",104 +Infant food,109 +Wafers,110 +"Flour, cereals",111 +Cereal preparations nes,113 +Mixes and doughs,114 +"Food preparations, flour, malt extract",115 +"Flour, potatoes",117 +"Potatoes, frozen",118 +"Flour, cassava",126 +Cassava dried,128 +"Starch, cassava",129 +"Flour, roots and tubers nes",150 +Maple sugar and syrups,160 +Sugar non-centrifugal,163 +Sugar refined,164 +"Fructose and syrup, other",166 +Sugar nes,167 +Sugar confectionery,168 +Glucose and dextrose,172 +Lactose,173 +"Flour, pulses",212 +"Brazil nuts, shelled",229 +"Cashew nuts, shelled",230 +Almonds shelled,231 +"Walnuts, shelled",232 +"Hazelnuts, shelled",233 +"Nuts, prepared (exc. groundnuts)",235 +"Oil, soybean",237 +Soya sauce,239 +Soya paste,240 +"Groundnuts, shelled",243 +"Groundnuts, prepared",246 +Peanut butter,247 +"Coconuts, desiccated",250 +Copra,251 +Olives preserved,262 +Butter of karite nuts,264 +"Oil, castor beans",266 +"Oil, olive residues",274 +"Flour, mustard",295 +"Oil, vegetable origin nes",340 +"Juice, tomato",390 +"Tomatoes, paste",391 +"Tomatoes, peeled",392 +Sweet corn frozen,447 +Sweet corn prep or preserved,448 +"Mushrooms, canned",451 +"Vegetables, dehydrated",469 +Vegetables in vinegar,471 +"Vegetables, preserved nes",472 +"Vegetables, frozen",473 +"Vegetables, temporarily preserved",474 +"Vegetables, preserved, frozen",475 +"Vegetables, homogenized preparations",476 +"Juice, orange, single strength",491 +"Juice, orange, concentrated",492 +"Juice, lemon, single strength",498 +"Juice, lemon, concentrated",499 +"Juice, grapefruit",509 +"Juice, grapefruit, concentrated",510 +"Juice, citrus, single strength",513 +"Juice, citrus, concentrated",514 +Cider etc,517 +"Juice, apple, single strength",518 +"Juice, apple, concentrated",519 +"Apricots, dry",527 +Plums dried (prunes),537 +Raisins,561 +"Juice, grape",562 +Vermouths & similar,565 +Figs dried,570 +Pineapples canned,575 +"Juice, pineapple",576 +"Juice, pineapple, concentrated",580 +"Fruit, dried nes",620 +"Juice, fruit nes",622 +"Fruit, prepared nes",623 +"Fruit, cooked, homogenized preparations",626 +"Beverages, distilled alcoholic",634 +"Coffee, roasted",657 +"Coffee, substitutes containing coffee",658 +"Coffee, extracts",659 +"Cocoa, paste",662 +"Cocoa, butter",664 +"Cocoa, powder & cake",665 +Chocolate products nes,666 +"Tea, mate extracts",672 +"Feed and meal, gluten",846 +"Meat, cattle, boneless (beef & veal)",870 +"Meat, beef and veal sausages",874 +"Meat, beef, preparations",875 +"Yoghurt, concentrated or not",892 +"Buttermilk, curdled, acidified milk",893 +"Cheese, processed",907 +"Milk, products of natural constituents nes",909 +Ice cream and edible ice,910 +Grease incl. lanolin wool,994 +"Meat, pork",1038 +Bacon and ham,1039 +"Meat, pig sausages",1041 +"Meat, pig, preparations",1042 +"Offals, liver chicken",1059 +"Fat, liver prepared (foie gras)",1060 +"Meat, chicken, canned",1061 +"Eggs, liquid",1063 +"Eggs, dried",1064 +"Offals, liver geese",1074 +"Offals, liver duck",1075 +"Meat, dried nes",1164 +"Oils, fats of animal nes",1168 +Food prep nes,1232 +"Margarine, liquid",1241 +"Fat nes, prepared",1243 +"Oil, boiled etc",1274 +Fatty acids,1276 +Fatty substance residues,1277 +Cereals nes,108 +Yautia (cocoyam),135 +Taro (cocoyam),136 +Yams,137 +Sugar cane,156 +"Cow peas, dry",195 +Pigeon peas,197 +Lupins,210 +Pulses nes,211 +"Brazil nuts, with shell",216 +"Groundnuts, with shell",242 +Oil palm fruit,254 +Palm kernels,256 +Castor oil seed,265 +Tung nuts,275 +Jojoba seed,277 +Safflower seed,280 +Melonseed,299 +Tallowtree seed,305 +Kapok fruit,310 +Kapokseed in shell,311 +Hempseed,336 +Cassava leaves,378 +"Vegetables, leguminous nes",420 +String beans,423 +Okra,430 +Chicory roots,459 +Carobs,461 +"Fruit, citrus nes",512 +"Fruit, stone nes",541 +"Fruit, pome nes",542 +Raspberries,547 +Berries nes,558 +"Ghee, butteroil of cow milk",887 +"Milk, skimmed evaporated",895 +"Milk, skimmed condensed",896 +"Milk, dry buttermilk",899 +"Cheese, skimmed cow milk",904 +"Meat, buffalo",947 +"Offals, edible, buffaloes",948 +"Fat, buffaloes",949 +"Milk, whole fresh buffalo",951 +"Butter, buffalo milk",952 +"Ghee, buffalo milk",953 +"Cheese, buffalo milk",955 +"Fat, sheep",979 +"Milk, whole fresh sheep",982 +"Butter and ghee, sheep milk",983 +"Fat, goats",1019 +"Milk, whole fresh goat",1020 +"Cheese, goat milk",1021 +"Meat, bird nes",1089 +"Offals, horses",1098 +"Meat, ass",1108 +"Meat, mule",1111 +"Meat, camel",1127 +"Offals, edible, camels",1128 +"Milk, whole fresh camel",1130 +"Meat, other rodents",1151 +"Snails, not sea",1176 +"Germ, wheat",19 +"Starch, wheat",23 +"Gluten, wheat",24 +"Beverages, fermented wheat",26 +"Gluten, rice",33 +"Starch, rice",34 +"Bran, rice",35 +"Barley, pot",45 +"Bran, barley",47 +"Flour, barley and grits",48 +Malt extract,50 +"Gluten, maize",63 +"Starch, maize",64 +Beer of maize,66 +"Flour, rye",72 +"Bran, rye",73 +"Bran, oats",77 +"Flour, millet",80 +Beer of millet,82 +"Flour, sorghum",84 +Beer of sorghum,86 +"Flour, buckwheat",90 +"Bran, buckwheat",91 +"Flour, fonio",95 +"Bran, fonio",96 +"Flour, triticale",98 +"Bran, triticale",99 +"Bran, mixed grains",105 +"Bran, cereals nes",112 +"Starch, potatoes",119 +"Tapioca, potatoes",121 +"Tapioca, cassava",127 +Roots and tubers dried,151 +Fructose chemically pure,154 +Maltose chemically pure,155 +"Sugar, cane, raw, centrifugal",158 +Isoglucose,175 +"Bran, pulses",213 +Soya curd,241 +"Oil, tung nuts",276 +"Oil, jojoba",278 +"Oil, mustard",293 +"Oil, poppy",297 +Vegetable tallow,306 +Kapokseed shelled,312 +"Oil, kapok",313 +"Oil, hempseed",337 +"Flour, oilseeds",343 +"Mushrooms, dried",450 +"Juice, vegetables nes",466 +"Juice, tangerine",496 +"Juice, plum, single strength",538 +"Juice, plum, concentrated",539 +"Grapes, must",563 +"Juice, mango",583 +"Fruit, tropical dried nes",604 +"Flour, fruit",624 +"Fruits, nuts, peel, sugar preserved",625 +Alcohol non food,632 +"Fat, cattle butcher",871 +"Meat, beef, dried, salted, smoked",872 +"Meat, extracts",873 +"Meat, homogenized preparations",877 +Liver prep.,878 +"Whey, fresh",903 +"Whey, cheese",905 +"Milk, reconstituted",908 +Egg albumine,916 +Casein,917 +"Milk, skimmed buffalo",954 +"Milk, skimmed sheep",985 +"Butter, goat milk",1022 +"Milk, skimmed goat",1023 +"Fat, pig butcher",1040 +"Fat, poultry",1065 +"Fat, poultry, rendered",1066 +"Offals, liver turkeys",1081 +"Fat, other camelids",1160 +Offals nes,1167 +"Meat nes, preparations",1172 +Lard stearine oil,1221 +Degras,1222 +"Castor oil, hydrogenated (opal wax)",1273 +"Oil, hydrogenated",1275 +"Cake, soybeans",238 +"Cake, groundnuts",245 +"Cake, copra",253 +"Cake, palm kernel",259 +"Cake, sunflower",269 +"Cake, rapeseed",272 +"Cake, sesame seed",291 +"Cake, mustard",294 +"Cake, cottonseed",332 +"Cake, others",341 +"Rice, paddy (rice milled equivalent)",30 +Seed cotton,328 +Hops,677 +Peppermint,748 +"Pyrethrum, dried",754 +Cotton lint,767 +Flax fibre and tow,773 +Hemp tow waste,777 +Kapok fibre,778 +Jute,780 +"Bastfibres, other",782 +Ramie,788 +Sisal,789 +Agave fibres nes,800 +Manila fibre (abaca),809 +Coir,813 +Fibre crops nes,821 +"Tobacco, unmanufactured",826 +"Rubber, natural",836 +"Gums, natural",839 +"Hides, cattle, fresh",919 +"Hides, buffalo, fresh",957 +"Wool, greasy",987 +"Skins, sheep, fresh",995 +"Skins, goat, fresh",1025 +"Meat, other camelids",1158 +Beeswax,1183 +"Silk-worm cocoons, reelable",1185 +"Silk, raw",1186 +Freshwater Fish,2761 +Demersal Fish,2762 +Pelagic Fish,2763 +"Marine Fish, Other",2764 +Crustaceans,2765 +Cephalopods,2766 +"Molluscs, Other",2767 +"Meat, Aquatic Mammals",2768 +"Aquatic Animals, Others",2769 +Aquatic Plants,2775 +"Fish, Body Oil",2781 +"Fish, Liver Oil",2782 +"Cake, rice bran",360000 +"Cake, maize",600000 +"Cake, olive residues",2740000 +"Cake, jojoba",2780000 +"Cake, safflower",2810000 +"Cake, poppy",2970000 +"Cake, kapok",3130000 +"Fiber, cottonseed",3320000 +"Cake, linseed",3340000 +"Cake, hempseed",3370000 +"Cake, vegetable origin nes",3400000 +"AnMeatEq, cattle",8670000 +"AnMeatEq, buffalo",9470000 +"AnMeatEq, sheep",9770000 +"AnMeatEq, goat",10170000 +"AnMeatEq, pig",10350000 +"AnMeatEq, chicken",10580000 +"AnMeatEq, duck",10690000 +"AnMeatEq, goose and guinea fowl",10730000 +"AnMeatEq, turkey",10800000 +"AnMeatEq, horse",10970000 +"AnMeatEq, ass",11080000 +"AnMeatEq, mule",11110000 +"AnMeatEq, camel",11270000 +"AnMeatEq, rabbit",11410000 +"AnMeatEq, other rodents",11510000 +"AnMeatEq, other camelids",11580000 diff --git a/man/FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd b/man/FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd index 473a9257..ae84e6ad 100644 --- a/man/FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd +++ b/man/FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd @@ -6,6 +6,8 @@ \usage{ FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL( .DF, + .FAO_AREA_CODE_COL = "area_code", + .AREA_COL = "area", SUDAN2012_BREAK = F, SUDAN2012_MERGE = T ) @@ -13,6 +15,8 @@ FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL( \arguments{ \item{.DF}{Input data frame} +\item{.AREA_COL}{} + \item{SUDAN2012_BREAK}{If T break Sudan before 2012 based on 2013- 2016 data} \item{SUDAN2012_MERGE}{If T merge South Sudan into Sudan} diff --git a/man/get_data_list.Rd b/man/get_data_list.Rd new file mode 100644 index 00000000..b1b3aaac --- /dev/null +++ b/man/get_data_list.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-data.R +\name{get_data_list} +\alias{get_data_list} +\title{get_data_list} +\usage{ +get_data_list(all_data, data_list, strip_attributes = FALSE, environ = NULL) +} +\arguments{ +\item{all_data}{Data structure} + +\item{data_list}{A character vector of data to load into the given environment} + +\item{strip_attributes}{A logical vector which will be passed on to \code{get_data}. The length +must be 1 (use the save logical for all values of data_list) or match the length of +\code{data_list} (one logical value for each data_list item).} + +\item{environ}{The environment into which the data should be loaded. If NULL (the default) +the caller's environment will be used.} +} +\description{ +This function calls \code{get_data} for each data name in \code{data_list} and assigns +the resulting data into the given \code{environ} with the data name being used as the +variable name. Note: for values in data_list "named" FILE the "basename" of the string +will be used as the variable name. +}