From 6647ca633dd7124117fb4de4334b15c75569cde8 Mon Sep 17 00:00:00 2001
From: "Zhao, Xin" <xin.zhao@pnnl.gov>
Date: Sun, 6 Aug 2023 21:17:21 -0400
Subject: [PATCH] package processing updates

moving preprocessing into the package structure to ensure traceability.
However, vector size could be a concern, requiring larger memory.
---
 NAMESPACE                                     |    3 +-
 R/utils-data.R                                |   55 +
 R/xfaostat_L100_constants.R                   |    6 +
 R/xfaostat_L101_RawDataPreProcessing1.R       |  196 +++
 R/xfaostat_L101_RawDataPreProcessing2.R       |  178 +++
 R/xfaostat_L101_RawDataPreProcessing3.R       |  142 +++
 R/xfaostat_helper_funcs.R                     |  521 ++++++++
 R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R      | 1108 +++++++++++++++++
 R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R    | 1034 ---------------
 ...nerate_package_data_faostat_helper_funcs.R |    8 +-
 inst/extdata/aglu/FAO/SUA_item_code_map.csv   |  540 ++++++++
 ..._AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd |    4 +
 man/get_data_list.Rd                          |   26 +
 13 files changed, 2784 insertions(+), 1037 deletions(-)
 create mode 100644 R/xfaostat_L100_constants.R
 create mode 100644 R/xfaostat_L101_RawDataPreProcessing1.R
 create mode 100644 R/xfaostat_L101_RawDataPreProcessing2.R
 create mode 100644 R/xfaostat_L101_RawDataPreProcessing3.R
 create mode 100644 R/xfaostat_helper_funcs.R
 create mode 100644 R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R
 delete mode 100644 R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R
 create mode 100644 inst/extdata/aglu/FAO/SUA_item_code_map.csv
 create mode 100644 man/get_data_list.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 87694965..9c2cb966 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -28,11 +28,11 @@ export(standardize_iso)
 export(unprotect_integer_cols)
 importFrom(assertthat,assert_that)
 importFrom(data.table,data.table)
-importFrom(dplyr,arrange)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
 importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
+importFrom(dplyr,first)
 importFrom(dplyr,full_join)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_by_at)
@@ -48,6 +48,7 @@ importFrom(dplyr,right_join)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
 importFrom(dplyr,summarise_all)
+importFrom(dplyr,summarize)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,vars)
 importFrom(grDevices,rainbow)
diff --git a/R/utils-data.R b/R/utils-data.R
index 74b1f970..443de588 100644
--- a/R/utils-data.R
+++ b/R/utils-data.R
@@ -233,6 +233,61 @@ get_data <- function(all_data, name, strip_attributes = FALSE) {
   }
 }
 
+#' get_data_list
+#'
+#' This function calls \code{get_data} for each data name in \code{data_list} and assigns
+#' the resulting data into the given \code{environ} with the data name being used as the
+#' variable name.  Note: for values in data_list "named" FILE the "basename" of the string
+#' will be used as the variable name.
+#'
+#' @param all_data Data structure
+#' @param data_list A character vector of data to load into the given environment
+#' @param strip_attributes A logical vector which will be passed on to \code{get_data}.  The length
+#' must be 1 (use the save logical for all values of data_list) or match the length of
+#' \code{data_list} (one logical value for each data_list item).
+#' @param environ The environment into which the data should be loaded.  If NULL (the default)
+#' the caller's environment will be used.
+get_data_list <- function(all_data, data_list, strip_attributes = FALSE, environ = NULL) {
+  # expecting a (potentially named) character vector for data_list
+  assertthat::assert_that(is.character(data_list))
+  data_list_names <- names(data_list)
+
+  # strip_attributes must be logical and either be length 1 (in which case the same value
+  # will be used for all calls to get_data) or match the length of data_list (values will be
+  # matched up per index)
+  assertthat::assert_that(is.logical(strip_attributes))
+  assertthat::assert_that(length(strip_attributes) == 1 || length(strip_attributes) == length(data_list))
+  if(length(strip_attributes) == 1) {
+    strip_attributes = rep_len(strip_attributes, length(data_list))
+  }
+
+  # if no environment was explicitly given the default behavior is to load into the caller's
+  # environment
+  if(is.null(environ)) {
+    environ = parent.frame()
+  }
+
+  # loop over each data_list, call get_data and assign the result to the same data name in
+  # the given environment
+  for(i in seq_along(data_list)) {
+    curr_var_name <- data_list[i]
+    # the variable name to assign for FILE is the "basename" of the file
+    # i.e. `FILE = "common/GCAM_region_names"` will result in `GCAM_region_names` being set
+    if(!is.null(data_list_names) && data_list_names[i] %in% c("FILE", "OPTIONAL_FILE")) {
+      # Note: strsplit returns a list (one per each str to be split) of character vector
+      # (one for each token split out).  Given we are split one string at a time
+      # we will just grab the first element of the list (`[[1]]`)
+      data_name_split = strsplit(curr_var_name, "/")[[1]]
+      # get the last element of the char vec to use as the var name
+      curr_var_name = tail(data_name_split, n = 1)
+    }
+    # get the data
+    data = get_data(all_data, data_list[i], strip_attributes[i])
+    # assign it into the environment
+    assign(curr_var_name, data, envir = environ)
+  }
+}
+
 
 #' return_data
 #'
diff --git a/R/xfaostat_L100_constants.R b/R/xfaostat_L100_constants.R
new file mode 100644
index 00000000..86270287
--- /dev/null
+++ b/R/xfaostat_L100_constants.R
@@ -0,0 +1,6 @@
+# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.
+
+# General behavior constants ======================================================================
+
+# having issues with package check here
+ DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata")
diff --git a/R/xfaostat_L101_RawDataPreProcessing1.R b/R/xfaostat_L101_RawDataPreProcessing1.R
new file mode 100644
index 00000000..b9b2acaa
--- /dev/null
+++ b/R/xfaostat_L101_RawDataPreProcessing1.R
@@ -0,0 +1,196 @@
+# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.
+
+#' module_xfaostat_L101_RawDataPreProcessing1
+#'
+#' Preprocess raw faostat data
+#'
+#' @param command API command to execute
+#' @param ... other optional parameters, depending on command
+#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
+#'   \code{command} is "MAKE") all the generated outputs
+#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
+#' @importFrom assertthat assert_that
+#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
+#' first case_when vars
+#' @importFrom tibble tibble
+#' @importFrom tidyr complete drop_na gather nesting spread replace_na
+#' @author XZ 2023
+module_xfaostat_L101_RawDataPreProcessing1 <- function(command, ...) {
+
+  MODULE_INPUTS <-
+    c(FILE = "aglu/AGLU_ctry")
+
+  MODULE_OUTPUTS <-
+    c("QCL",              # Ag production quantity and harvested area
+      "PP",               # Producer prices
+      "SCL",              # Supply utilization accounting
+      "FBS",              # New food balance sheet
+      "QCL_area_code_map" # Country code
+    )
+
+  if(command == driver.DECLARE_INPUTS) {
+    return(MODULE_INPUTS)
+  } else if(command == driver.DECLARE_OUTPUTS) {
+    return(MODULE_OUTPUTS)
+  } else if(command == driver.MAKE) {
+
+    year <- value <- Year <- Value <- FAO_country <- iso <- NULL    # silence package check.
+
+    all_data <- list(...)[[1]]
+
+    # Load required inputs ----
+
+    get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)
+
+    #source("data-raw/generate_package_data_faostat_helper_funcs.R")
+    #DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata.faostat")
+
+    # *[QCL] FAOSTAT Production and area ----
+
+    ## Load raw data
+    FAOSTAT_load_raw_data(DATASETCODE = "QCL", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT)
+
+
+    QCL %>%
+      # Remove aggregated areas and items
+      filter(area_code < 350, item_code < 1700) %>%
+      select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
+      # When dealing with animal/livestock data, units are important
+      # Prod Popultn (5314) for Beewax and honey is removed since data is only before 1990
+      filter(element_code != 5314) %>%
+      # Remove NA for simplicity for now; expend.grid later
+      # All Coir (coconut fiber) is filtered out due to NA
+      filter(!is.na(value)) %>%
+      # remove accent
+      rm_accent("item", "area") -> QCL1
+
+
+  QCL1 %>%
+    distinct(area_code, area) %>%
+      add_title("FAO primary production country and code") %>%
+      add_units("NA") %>%
+      add_comments("FAO Country and code") ->
+      QCL_area_code_map
+
+    # Other data uses OCL area for consistency
+    QCL_area_code <- QCL1 %>% distinct(area_code) %>% pull()
+
+
+    QCL1 %>%
+      add_title("FAO primary production") %>%
+      add_units("USD/tonne") %>%
+      add_comments("Preprocessed FAOSTAT primary production") ->
+      QCL
+
+
+
+    # *[PP] Producer price ----
+
+    FAOSTAT_load_raw_data(DATASETCODE = "PP", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT)
+
+    PP %>% distinct(element, element_code, unit)
+
+
+    PP %>%
+      filter(area_code < 350,  # rm aggregated regions
+             item_code < 1700, #rm aggregated items
+             area_code %in% QCL_area_code, # only keep regions with production
+             element_code %in% c(5532, 5539)) %>% #keep USD/tonne and index
+      rm_accent("item", "area") -> PP1
+
+
+    # Using index to fill in missing across years
+    PP1 %>%
+      filter(element_code %in% c(5532, 5539)) %>%
+      select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
+      # Not completing year and area here
+      spread(element, value) %>%
+      left_join(
+        PP1 %>% filter(element_code %in% c(5532, 5539)) %>%
+          select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
+          spread(element, value) %>%
+          rename(pp_base = `Producer Price (USD/tonne)`,
+                 pp_baseindex = `Producer Price Index (2014-2016 = 100)`) %>%
+          filter(!is.na(pp_base)) %>%
+          group_by(area, area_code, item) %>%
+          filter(year == 2015) %>% within(rm(year)) %>%
+          ungroup(),
+        by = c("area_code", "area", "item_code", "item")
+      ) %>% mutate(
+        `Producer Price (USD/tonne)` = if_else(is.na(`Producer Price (USD/tonne)`),
+                                               pp_base* `Producer Price Index (2014-2016 = 100)` /pp_baseindex,
+                                               `Producer Price (USD/tonne)`)
+      )  %>%
+      select(area_code, area, item_code, item, year, `Producer Price (USD/tonne)`) %>%
+      gather(element, value, `Producer Price (USD/tonne)`) %>%
+      mutate(element_code = 5532) -> PP2
+
+
+    ### output PP and clean memory ----
+    PP2 %>%
+      add_title("FAO producer prices") %>%
+      add_units("USD/tonne") %>%
+      add_comments("Preprocessed FAOSTAT producer prices") ->
+      PP
+
+    rm(PP, PP1, PP2)
+
+    # Food balance and Supply-Utilization-Account
+
+    ## *[FBS] new food balance sheet (2010-) ----
+
+    ## Load raw data
+    FAOSTAT_load_raw_data("FBS") # New FBS  2010+
+    FBS %>% distinct(element, element_code, unit)
+
+    FBS %>% filter(item_code < 2901, item_code != 2501,
+                   !element_code %in% c(511, 5301),
+                   area_code %in% QCL_area_code) %>%
+      select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
+      rm_accent("item", "area") -> FBS1
+
+
+    ### output FBS and clean memory ----
+    FBS1 %>%
+      add_title("FAO SCL") %>%
+      add_units("tonne") %>%
+      add_comments("Preprocessed FAOSTAT SCL") ->
+      FBS
+    rm(FBS, FBS1)
+
+
+    ## *[SCL] SUA: supply utilization accounting ----
+
+    FAOSTAT_load_raw_data("SCL")   # SUA      2010+
+    SCL %>% distinct(element, element_code, unit)
+    # FAOSTAT "accidentally" used CPC code in SCL;
+    if (is.numeric(SCL$item_code)) {
+      SCL %>% filter(item_code <= 1700, item_code != 1) -> SCL
+    }
+
+    SCL %>% filter(!element_code %in% c(664, 665, 674, 684, 511),
+                   # it is not useful to calculate cal/g using `Food supply (kcal/capita/day)` /`Food supply quantity (g/capita/day)`
+                   # unit too small so remove them here
+                   # `Calories/Year` / `Food supply quantity (tonnes)` is more accurate!
+                   # similarly for protein and fat
+                   # Use annual value in SUA to calculate the conversion rate!
+                   area_code %in% QCL_area_code) %>%
+      select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
+      rm_accent("item", "area") -> SCL1
+
+
+     ### output SCL and clean memory ----
+    SCL1 %>%
+       add_title("FAO SCL") %>%
+       add_units("tonne") %>%
+       add_comments("Preprocessed FAOSTAT SCL") ->
+      SCL
+     rm(SCL, SCL1)
+
+
+    return_data(MODULE_OUTPUTS)
+
+  } else {
+    stop("Unknown command")
+  }
+}
diff --git a/R/xfaostat_L101_RawDataPreProcessing2.R b/R/xfaostat_L101_RawDataPreProcessing2.R
new file mode 100644
index 00000000..13ba10cd
--- /dev/null
+++ b/R/xfaostat_L101_RawDataPreProcessing2.R
@@ -0,0 +1,178 @@
+# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.
+
+#' module_xfaostat_L101_RawDataPreProcessing2
+#'
+#' Preprocess raw faostat data
+#'
+#' @param command API command to execute
+#' @param ... other optional parameters, depending on command
+#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
+#'   \code{command} is "MAKE") all the generated outputs
+#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
+#' @importFrom assertthat assert_that
+#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
+#' first case_when vars
+#' @importFrom tibble tibble
+#' @importFrom tidyr complete drop_na gather nesting spread replace_na
+#' @author XZ 2023
+module_xfaostat_L101_RawDataPreProcessing2 <- function(command, ...) {
+
+  MODULE_INPUTS <-
+    c(FILE = "aglu/AGLU_ctry",
+      "QCL_area_code_map")
+
+  MODULE_OUTPUTS <-
+    c("FBSH",             # Old food balance sheet
+      "CB",               # Old non food utilization accounting
+      "FBSH_CB",          # Combined FBSH and CB
+      "OA"                # Population
+    )
+
+  if(command == driver.DECLARE_INPUTS) {
+    return(MODULE_INPUTS)
+  } else if(command == driver.DECLARE_OUTPUTS) {
+    return(MODULE_OUTPUTS)
+  } else if(command == driver.MAKE) {
+
+    year <- value <- Year <- Value <- FAO_country <- iso <- NULL    # silence package check.
+
+    all_data <- list(...)[[1]]
+
+    # Load required inputs ----
+
+    get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)
+
+
+
+    QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull()
+
+    # *[QCL] FAOSTAT Production and area ----
+
+    ## Load raw data
+    FAOSTAT_load_raw_data(DATASETCODE = "QCL", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT)
+
+
+    # Food balance and Supply-Utilization-Account
+
+    ## *[FBSH] old food balance sheet (-2013) ----
+
+    FAOSTAT_load_raw_data("FBSH")  # Old FBS -2013
+    FBSH %>% distinct(element, element_code, unit)
+    # Keep population (old)
+    FBSH %>% filter(item_code < 2901,
+                    !element_code %in% c(5301),
+                    area_code %in% QCL_area_code) %>%
+      select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
+      rm_accent("item", "area") -> FBSH1
+
+
+    ### output FBSH and clean memory ----
+
+    FBSH1 %>%
+      add_title("FAO FBSH") %>%
+      add_units("tonne") %>%
+      add_comments("Preprocessed FAO FBSH") ->
+      FBSH
+
+    rm(FBSH1)
+
+
+    ## *[CB] Non-food Balance ----
+
+    FAOSTAT_load_raw_data("CB")    # Old FBS-nonfood -2013
+    CB %>% distinct(element, element_code, unit)
+    # Keep population (old)
+    CB %>% filter(item_code < 2901,
+                  !element_code %in% c(5300),
+                  area_code %in% QCL_area_code) %>%
+      select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
+      rm_accent("item", "area") -> CB1
+
+    ### output CB and clean memory ----
+
+    CB1 %>%
+      add_title("FAO CB") %>%
+      add_units("tonne") %>%
+      add_comments("Preprocessed FAO CB") ->
+      CB
+
+    rm(CB1)
+
+
+    ## *FBSH_CB merge the two----
+    # load processed data
+
+    FBSH %>% distinct(item_code) %>%
+      dplyr::intersect(CB %>% distinct(item_code) ) %>%
+      pull ->
+      dup_item_code
+
+
+    FBSH %>% mutate(value = value * 1000) %>% # convert to ton
+      bind_rows(CB %>% filter(!item_code %in% dup_item_code)) %>%
+      select(-unit) %>%
+      filter(!element_code %in% c(645, 664, 674, 684, 511)) %>%  # remove non-balance items
+      mutate(element = gsub(" Quantity| supply quantity \\(tonnes\\)| \\(non-food\\)", "", element)) %>%
+      mutate( element = replace(element, element == "Losses", "Loss"),
+              element = replace(element, element == "Processing", "Processed")) %>%
+      select(-element_code) %>%
+      mutate(unit = "tonnes") ->
+      FBSH_CB
+
+    # Rice and products and Groundnuts related adjustments
+    # to make FBS and FBSH more consistent
+    SHELL_RATE_groundnuts <- 0.7
+    Mill_RATE_rice <- 0.667
+
+    FBSH_CB %>% distinct(element, unit)
+    FBSH_CB %>%
+      filter(!item_code %in% c(2805, 2556)) %>%
+      # Adjust to Rice and products and Groundnuts in FBS and bind
+      bind_rows(
+        FBSH_CB %>% filter(item_code %in%  c(2805)) %>%
+          mutate(item = "Rice and products", item_code = 2807,
+                 value = value / Mill_RATE_rice) %>%
+          bind_rows(FBSH_CB %>% filter(item_code %in%  c(2556)) %>%
+                      mutate(item = "Groundnuts", item_code = 2552,
+                             value = value / SHELL_RATE_groundnuts) )
+      ) ->
+      FBSH_CB1
+
+    ### output FBSH_CB and clean memory ----
+
+    FBSH_CB1 %>%
+      add_title("FAO FBSH_CB", overwrite = T) %>%
+      add_units("tonne") %>%
+      add_comments("Preprocessed FAO FBSH_CB") ->
+      FBSH_CB
+
+    rm(SHELL_RATE_groundnuts, Mill_RATE_rice);
+    rm(FBSH_CB1, FBSH, CB, dup_item_code)
+
+
+    # *[OA]: Population ----
+
+    FAOSTAT_load_raw_data("OA")    # Population
+    OA %>% distinct(element, element_code)
+    OA %>% distinct(item, item_code)
+
+    OA %>% filter(element_code == 511, item_code == 3010)  %>%
+      select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
+      rm_accent("item", "area") -> OA1
+
+    ### output OA and clean memory ----
+    OA1 %>%
+      add_title("FAO OA") %>%
+      add_units("tonne") %>%
+      add_comments("Preprocessed FAO OA") ->
+      OA
+    rm(OA, OA1)
+    rm(QCL_area_code)
+
+
+    return_data(MODULE_OUTPUTS)
+
+  } else {
+    stop("Unknown command")
+  }
+}
diff --git a/R/xfaostat_L101_RawDataPreProcessing3.R b/R/xfaostat_L101_RawDataPreProcessing3.R
new file mode 100644
index 00000000..80b3c6b4
--- /dev/null
+++ b/R/xfaostat_L101_RawDataPreProcessing3.R
@@ -0,0 +1,142 @@
+# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.
+
+#' module_xfaostat_L101_RawDataPreProcessing3
+#'
+#' Preprocess raw faostat data
+#'
+#' @param command API command to execute
+#' @param ... other optional parameters, depending on command
+#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
+#'   \code{command} is "MAKE") all the generated outputs
+#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
+#' @importFrom assertthat assert_that
+#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
+#' first case_when vars
+#' @importFrom tibble tibble
+#' @importFrom tidyr complete drop_na gather nesting spread replace_na
+#' @author XZ 2023
+module_xfaostat_L101_RawDataPreProcessing3 <- function(command, ...) {
+
+  MODULE_INPUTS <-
+    c(FILE = "aglu/AGLU_ctry",
+      "QCL_area_code_map")
+
+  MODULE_OUTPUTS <-
+    c("TCL",  "TM"   # Gross and bilateral trade
+      )
+
+  if(command == driver.DECLARE_INPUTS) {
+    return(MODULE_INPUTS)
+  } else if(command == driver.DECLARE_OUTPUTS) {
+    return(MODULE_OUTPUTS)
+  } else if(command == driver.MAKE) {
+
+    year <- value <- Year <- Value <- FAO_country <- iso <- NULL    # silence package check.
+
+    all_data <- list(...)[[1]]
+
+    # Load required inputs ----
+
+    get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)
+
+    #source("data-raw/generate_package_data_faostat_helper_funcs.R")
+    #DIR_RAW_DATA_FAOSTAT <- system.file("extdata", "aglu/FAO/FAOSTAT", package = "gcamdata.faostat")
+
+
+    QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull()
+
+
+    # *[TCL] Gross trade ----
+    FAOSTAT_load_raw_data("TCL")   # Gross trade
+
+    TCL %>% distinct(element, element_code, unit)
+    TCL %>% distinct(item, item_code)
+
+    TCL %>%
+      filter(item_code <= 1700,
+             # only keep quantity
+             !element_code %in% c(5622 , 5922),
+             area_code %in% QCL_area_code) %>%
+      select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
+      rm_accent("item", "area") -> TCL1
+
+    ### output OA and clean memory ----
+    TCL1 %>%
+      add_title("FAO TCL") %>%
+      add_units("tonne") %>%
+      add_comments("Preprocessed FAO TCL") ->
+      TCL
+
+    rm(TCL1)
+
+
+
+    # *[TM] Bilateral trade ----
+    #*FAO has better quality bilateral data since 1992, covering most SUA items
+    FAOSTAT_load_raw_data("TM")    # Bilateral trade
+
+    TM %>%
+      # Only keep quantities for elements with a unit of tonnes
+      filter(element_code %in% c(5910, 5610),
+             item_code < 1700,
+             # Bilateral trade year starts from 1986 but higher quality after 1992
+             # Subset data also to shrink the size
+             year >= 1992,
+             partner_country_code %in% QCL_area_code,
+             reporter_country_code %in% QCL_area_code) %>%
+      select(reporter_country_code, reporter_countries,
+             partner_country_code, partner_countries,
+             item_code, item, element_code, element, year, value, unit)  ->
+      TM1
+    rm(TM)
+
+
+    ## **Reconcile export and import bilateral flow ----
+    # Full join export and import and use available import to fill missing and zero export
+    TM1 %>% filter(element %in% c("Export Quantity")) %>% spread(element, value) %>%
+      select(exporter = reporter_country_code,
+             importer = partner_country_code, item_code, year, expflow = `Export Quantity`) %>%
+      full_join(
+        TM1 %>% filter(element %in% c("Import Quantity")) %>% spread(element, value)%>%
+          select(importer = reporter_country_code,
+                 exporter = partner_country_code, item_code, year, impflow = `Import Quantity`),
+        by = c("exporter", "importer", "item_code", "year")
+      )  %>%
+      # replace na with zero but use import to replace zero export later
+      replace_na(list(expflow = 0, impflow = 0)) %>%
+      transmute(area_code = importer, year, item_code, source_code = exporter,
+                value = if_else(expflow == 0, impflow, expflow)) %>%
+      mutate(element = "Import Quantity") ->
+      TM2
+
+
+    TM2 %>%
+      # remove self-trade (per unaggregated area_code) which existed in FAO TM importing data and likely due to data processing mistakes.
+      filter(area_code != source_code) %>%
+      left_join(TM1 %>% distinct(item, item_code), by = c("item_code")) %>%
+      left_join(TM1 %>% distinct(area = partner_countries, area_code = partner_country_code), by = c("area_code")) %>%
+      left_join(TM1 %>% distinct(source = partner_countries, source_code = partner_country_code), by = c("source_code")) %>%
+      rm_accent("item", "area", "source") %>%
+      mutate(unit = "tonnes") ->
+      TM3
+    rm(TM1, TM2)
+
+    TM3 %>% spread(year, value) -> TM4
+
+    ### output OA and clean memory ----
+    TM4 %>%
+      add_title("FAO TM") %>%
+      add_units("tonne") %>%
+      add_comments("Preprocessed FAO TM") ->
+      TM
+
+    rm(TM3, TM4)
+    rm(QCL_area_code)
+
+
+    return_data(MODULE_OUTPUTS)
+
+  } else {
+    stop("Unknown command")
+  }
+}
diff --git a/R/xfaostat_helper_funcs.R b/R/xfaostat_helper_funcs.R
new file mode 100644
index 00000000..23d54e99
--- /dev/null
+++ b/R/xfaostat_helper_funcs.R
@@ -0,0 +1,521 @@
+
+
+# Helper functions for downloading FAOSTAT data ----
+FAOSTAT_metadata <- function (code = NULL){
+  FAOxml <- XML::xmlParse(xml2::read_xml("http://fenixservices.fao.org/faostat/static/bulkdownloads/datasets_E.xml"))
+  metadata <- XML::xmlToDataFrame(FAOxml, stringsAsFactors = FALSE)
+  names(metadata) <- tolower(gsub("\\.", "_", names(metadata)))
+
+  # Bug fix for CB; can remove later if FAOSTAT udpate the link later
+  metadata["CB" == metadata[, "datasetcode"],"filelocation"] <-
+    "https://fenixservices.fao.org/faostat/static/bulkdownloads/CommodityBalances_(non-food)_E_All_Data_(Normalized).zip"
+
+  if (!is.null(code)) {
+    metadata <- metadata[code == metadata[, "datasetcode"],]
+  }
+
+  return(metadata)
+}
+
+FAOSTAT_download_bulk <- function(DATASETCODE,
+                                  DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){
+
+  assertthat::assert_that(is.character(DATASETCODE))
+  assertthat::assert_that(is.character(DATA_FOLDER))
+
+
+  lapply(DATASETCODE, function(d){
+    metadata <- FAOSTAT_metadata(code = d)
+    url_bulk = metadata$filelocation
+
+    file_name <- basename(url_bulk)
+    download.file(url_bulk, file.path(DATA_FOLDER, file_name))
+  })
+
+}
+
+FAOSTAT_load_raw_data <- function(DATASETCODE,
+                                  DATA_FOLDER = DIR_RAW_DATA_FAOSTAT){
+  assertthat::assert_that(is.character(DATASETCODE))
+  assertthat::assert_that(is.character(DATA_FOLDER))
+
+  metadata <- FAOSTAT_metadata()
+
+  # Loop through each code
+  for (CODE in DATASETCODE) {
+
+    metadata %>% filter(datasetcode == CODE) -> metadata1
+
+    zip_file_name <- file.path(DATA_FOLDER, basename(metadata1$filelocation))
+    assertthat::assert_that(file.exists(zip_file_name))
+    # assuming the csv in zip has the same base name
+    csv_file_name <- gsub(".zip$", ".csv", basename(zip_file_name))
+
+    df <- readr::read_csv(unz(zip_file_name, csv_file_name), col_types = NULL)
+    # Lower case col names and use _ as delimiter
+    names(df) <- tolower(gsub("\\.| ", "_", names(df)))
+    # Assigned to parent env
+    assign(CODE, df, envir = parent.env(environment()))
+    # Assigned to current env
+    #assign(CODE, df, envir = environment())
+  }
+
+}
+
+
+#remove accent and apostrophe for cols in a df
+rm_accent <- function(.df, ...){
+
+  assertthat::assert_that(
+    length(intersect(c(...), names(.df))) == length(c(...)),
+    msg = "Columns listed not included in the data frame")
+
+  # .df %>%
+  #   mutate_at(c(...), iconv,  to = 'ASCII//TRANSLIT') %>%
+  #   mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "")
+
+  .df %>%
+    mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>%
+    mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = ""))
+
+}
+
+
+
+
+# Helper function for processing FAOSTAT data
+
+FAO_AREA_RM_NONEXIST <- function(.DF,
+                                 SUDAN2012_MERGE = F,
+                                 RM_AREA_CODE = c(69, 87, 127, 135, 145, 182, 299)){
+
+  assertthat::assert_that("area_code" %in% names(.DF),
+                          msg = "Date frame is required and need a col of area_code")
+
+  # Removed area due to missing data in other places and incomplete years mostly
+  # There are 7
+  # 69 French Guyana
+  # 87 Guadeloupe
+  # 127 Marshall Islands
+  # 135 Martinique
+  # 145 Micronesia (Federated States of)
+  # 299 Palestine
+  # 182 Reunion
+  # area_code_remove <- c(69, 87, 127, 135, 145, 182, 299)
+
+
+  # In 1991 USSR(228) collapsed into 15 countries
+  area_code_USSR = c(228, 1, 52, 57, 63, 73, 108, 113, 119, 126, 146, 185, 208, 213, 230, 235)
+
+  # In 1992 Yugoslav SFR dissolved into 5 countries
+  # Yugoslav SFR (248)
+  # Croatia (98)
+  # North Macedonia (154)
+  # Slovenia (198)
+  # Bosnia and Herzegovina (80)
+  # Serbia and Montenegro (186)
+  # In 2006 further broke into 2:
+  # Montenegro (273)
+  # Serbia (272)
+  # These regions will be merged for all years in data as most models aggregated them into a single region
+  area_code_Yugoslav <- c(248, 98, 154, 198, 80, 186)
+  area_code_SerbiaandMontenegro <- c(186, 273, 272)
+  # In 1999/2000 Belgium-Luxembourg (15) partitioned in 1999 to 255 (Belgium) and 256 (Luxembourg)
+  area_code_Belgium_Luxembourg <- c(15, 255, 256)
+  # In 1993 Czechoslovakia (51) to Czechia (167) and Slovakia (199)
+  area_code_Czechoslovakia <- c(51, 167, 199)
+  # In 2011 Sudan (former) (206) broke into South Sudan (277) and Sudan (276)
+  area_code_Sudan <- c(206, 276, 277)
+  # Ethiopia PDR (62) dissolved into Ethiopia (238) and Eritrea (178) in 1993
+  area_code_Ethiopia <- c(62, 238, 178)
+
+  .DF %>%
+    filter(!area_code %in% RM_AREA_CODE) %>%
+    # remove USSR related regions by their years
+    filter(!(area_code %in% area_code_USSR[1] & year >= 1992),
+           !(area_code %in% area_code_USSR[-1] & year <= 1991)) %>%
+    # remove Yugoslav and Serbia & Montenegro related regions by their years
+    filter(!(area_code %in% area_code_Yugoslav[1] & year >= 1992),
+           !(area_code %in% area_code_Yugoslav[-1] & year <= 1991)) %>%
+    filter(!(area_code %in% area_code_SerbiaandMontenegro[1] & year >= 2006),
+           !(area_code %in% area_code_SerbiaandMontenegro[-1] & year <= 2005)) %>%
+    # remove Belgium_Luxembourg related regions by their years
+    filter(!(area_code %in% area_code_Belgium_Luxembourg[1] & year >= 2000),
+           !(area_code %in% area_code_Belgium_Luxembourg[-1] & year <= 1999)) %>%
+    # remove area_code_Czechoslovakia related regions by their years
+    filter(!(area_code %in% area_code_Czechoslovakia[1] & year >= 1993),
+           !(area_code %in% area_code_Czechoslovakia[-1] & year <= 1992)) %>%
+    # remove area_code_Ethiopia related regions by their years
+    filter(!(area_code %in% area_code_Ethiopia[1] & year >= 1993),
+           !(area_code %in% area_code_Ethiopia[-1] & year <= 1992)) ->
+    .DF1
+
+  if (SUDAN2012_MERGE == F) {
+    .DF1 %>%
+      # remove area_code_Sudan related regions by their years
+      filter(!(area_code %in% area_code_Sudan[1] & year >= 2012),
+             !(area_code %in% area_code_Sudan[-1] & year <= 2011)) ->
+      .DF1
+  } else {
+    .DF1 %>%
+      filter(!(area_code %in% area_code_Sudan[-1])) ->
+      .DF1
+  }
+
+
+  return(.DF1)
+
+}
+
+
+assert_FBS_balance <- function(.DF){
+
+
+  # assert .DF structure
+  assert_that(is.data.frame(.DF))
+  assertthat::assert_that(all(c("element") %in% names(.DF)))
+  assertthat::assert_that(is.grouped_df(.DF) == F)
+
+  # Check data
+  # 1. Positive value except stock variation and residues
+  if (isTRUE(.DF %>%
+             filter(!element %in% c("Stock Variation", "Residuals")) %>%
+             summarise(min = min(value, na.rm = T)) %>% pull(min) >= -0.001)) {
+    message("Good! Signs checked") } else{
+      warning("Negative values in key elements (not including stock variation and Residuals)")
+    }
+
+  # 2. Trade balance in all year and items
+  if (isTRUE(.DF %>% filter(element %in% c("Import", "Export")) %>%
+             group_by(year, item_code, element) %>%
+             summarise(value = sum(value, na.rm = T), .groups = "drop") %>%
+             spread(element, value) %>% filter(abs(Import - Export) > 0.0001) %>% nrow() == 0)) {
+    message("Good! Gross trade in balance") } else{
+      warning("Gross trade imbalance")
+    }
+
+  # 3. SUA balance check
+  if (isTRUE(.DF %>%
+             spread(element, value) %>%
+             mutate(`Regional supply` = Production + `Import`,
+                    `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` + `Stock Variation`,
+                    bal = abs(`Regional supply` -  `Regional demand` - Residuals)) %>%
+             filter(bal > 0.0001) %>% nrow() == 0)) {
+    message("Good! Regional supply = Regional demand + Residuals") } else{
+      warning("Regional supply != Regional demand + Residuals")
+    }
+
+  # 4. Storage in balance across time
+  if (isTRUE(.DF %>% filter(element %in% c("Opening stocks", "Closing stocks", "Stock Variation")) %>%
+             spread(element, value) %>%
+             filter(`Opening stocks` + `Stock Variation` - `Closing stocks` != 0) %>% nrow() == 0 &
+             .DF %>% filter(element %in% c("Opening stocks", "Closing stocks", "Stock Variation")) %>%
+             spread(element, value) %>% group_by(area_code, item_code) %>%
+             arrange(area_code, item_code, year) %>%
+             mutate(bal = abs(lag(`Closing stocks`) - `Opening stocks`)) %>%
+             filter(is.na(bal) == F, bal > 0.0001) %>% nrow() == 0)){
+    message("Good! Storage in balance across time") } else{
+      warning("Stock imbalance across time or inconsistent stock variation")
+    }
+
+}
+
+
+# Be careful with the unit as value / 1000 here
+# Light adjustments to merge Tourist consumption into food
+# And maintain balance across dimensions
+SUA_bal_adjust <- function(.df){
+
+  SCL_element_new <-
+    c("Opening stocks", "Production", "Export", "Import", "Stock Variation",
+      "Food", "Feed", "Seed", "Processed", "Other uses",
+      "Tourist consumption", "Loss", "Residuals")
+
+  .df %>%
+    mutate(element = factor(element,levels = SCL_element_new),
+           value = value / 1000) %>%  # unit: 1000 tonnes
+    replace_na(list(value = 0)) %>%
+    spread(element, value) %>%
+    #Merge tourist consumption into food
+    mutate(Food = Food + `Tourist consumption`) %>%
+    select(- `Tourist consumption`) %>%
+    #Maintain stock balance across time; SCL data (2010-) quality was high
+    #Calculate closing stock, when negative, shift up stocks in all years.
+    group_by(area_code, item_code) %>% arrange(-year, item_code) %>%
+    mutate(cumSV = cumsum(`Stock Variation`) - first(`Stock Variation`),
+           `Opening stocks1` = first(`Opening stocks`) - cumSV) %>%
+    select(-cumSV, -`Opening stocks`) %>%
+    rename(`Opening stocks` = `Opening stocks1`) %>%
+    mutate(`Closing stocks` = `Opening stocks` + `Stock Variation`) %>%
+    mutate(Stockshifter = if_else(`Closing stocks` < 0, abs(`Closing stocks`), 0)) %>%
+    mutate(Stockshifter = if_else(`Opening stocks` < 0 & `Opening stocks` < `Closing stocks`,
+                                  abs(`Opening stocks`), Stockshifter)) %>%
+    mutate(Stockshifter = max(Stockshifter),
+           `Opening stocks` = `Opening stocks` + Stockshifter,
+           `Closing stocks` = `Opening stocks` + `Stock Variation`) %>%
+    select(-Stockshifter) %>%
+    ungroup() %>%
+    #Correct negative processed where applicable
+    #For few regions (e.g., Congo), move residual to food so SUA data was not exist
+    mutate(Processed = if_else(Processed < 0, 0, Processed),
+           Food = ifelse(Production >0 & Food == 0 & Feed == 0 & Processed == 0 & Seed == 0 & `Other uses` == 0,
+                         Production + Import - Export + `Stock Variation`, Food),
+           Food = ifelse(Food < 0, 0, Food)) %>%
+    #Check regional supply, demand, and residue
+    mutate(`Regional supply` = `Opening stocks` + Production + `Import`,
+           `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`,
+           Residuals = `Regional supply` -  `Regional demand`) %>%
+    gather(element, value, -area_code, -item_code, -year) %>%
+    mutate(element = factor(element, levels = Bal_element_new))
+}
+
+
+# Function to dissaggregate dissolved regions in historical years ----
+# copyed in gcamdata
+
+#' FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION
+#'
+#' @param .DF dataframe to disaggregate
+#' @param AFFECTED_AREA_CODE  FAO area codes for regions affected; first one should be pre-dissolved region (e.g., USSR) followed by post-dissolved regions.
+#' @param YEAR_DISSOLVE_DONE  First year after dissolution
+#' @param YEAR_AFTER_DISSOLVE_ACCOUNT Number of years of data after dissolution used for sharing historical data
+#'
+#' @return Disaggregated data for the historical period for of the dissolved region
+#'
+FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION <-
+  function(.DF,
+           AFFECTED_AREA_CODE, #first one should be dissolved area
+           YEAR_DISSOLVE_DONE,
+           # using 3 year data after dissolution for sharing
+           YEAR_AFTER_DISSOLVE_ACCOUNT = 3){
+
+    .DF %>%
+      # filter dissolved region related areas by their years
+      filter((area_code %in% AFFECTED_AREA_CODE[-1] & year >= YEAR_DISSOLVE_DONE)|
+               (area_code %in% AFFECTED_AREA_CODE[1] & year <= YEAR_DISSOLVE_DONE)) ->
+      .DF1
+
+    Number_of_Regions_After_Dissolution <- AFFECTED_AREA_CODE %>% length -1
+
+    .DF1 %>% filter(year < YEAR_DISSOLVE_DONE) %>%
+      select(-area_code) %>%
+      right_join(
+        .DF1 %>% filter(year %in% c(YEAR_DISSOLVE_DONE:(YEAR_DISSOLVE_DONE + YEAR_AFTER_DISSOLVE_ACCOUNT))) %>%
+          dplyr::group_by_at(dplyr::vars(-year, -value)) %>%
+          replace_na(list(value = 0)) %>%
+          summarise(value = sum(value), .groups = "drop") %>%
+          dplyr::group_by_at(dplyr::vars(-value, -area_code)) %>%
+          mutate(Share = value/sum(value)) %>%
+          # using average share if data after dissolved does not exist
+          mutate(NODATA = if_else(sum(value) == 0, T, F)) %>%
+          mutate(Share = if_else(NODATA == T, 1/Number_of_Regions_After_Dissolution, Share)) %>%
+          ungroup() %>%select(-value, -NODATA),
+        by = names(.) %>% setdiff(c("year", "value", "area_code"))
+      ) %>% mutate(value = value * Share) %>% select(-Share)
+
+  }
+
+#' FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL
+#'
+#' @param .DF Input data frame
+#' @param SUDAN2012_BREAK If T break Sudan before 2012 based on 2013- 2016 data
+#' @param SUDAN2012_MERGE If T merge South Sudan into Sudan
+#' @param .FAO_AREA_CODE_COL
+#' @param .AREA_COL
+#'
+#' @return data with historical periods of dissolved region disaggregated to small pieces.
+
+FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL <- function(.DF,
+                                                       .FAO_AREA_CODE_COL = "area_code",
+                                                       .AREA_COL = "area",
+                                                       SUDAN2012_BREAK = F,
+                                                       SUDAN2012_MERGE = T){
+
+  assertthat::assert_that(all(.FAO_AREA_CODE_COL %in% names(.DF)),
+                          msg = "Date frame is required and need a col of area_code")
+
+  # Remove area if exist
+  .DF0 <- .DF
+  if (all_of(.AREA_COL) %in% names(.DF)) {
+    .DF %>% select(-all_of(.AREA_COL)) -> .DF }
+
+  if(.FAO_AREA_CODE_COL != "area_code"){
+    # Check if "area_code" exist, replace to area_code_TEMP
+    if ("area_code" %in% names(.DF)) {
+      assertthat::assert_that("area_code_TEMP" %in% names(.DF) == F)
+      .DF %>% rename(area_code_TEMP = area_code) -> .DF
+    }
+
+    # rename .FAO_AREA_CODE_COL to area_code
+
+    .DF %>% rename(area_code = .FAO_AREA_CODE_COL) -> .DF
+
+    # Will need to replace back later
+  }
+
+
+  # Define area code based on FAO ----
+  # first one is dissolved area
+  # In 1991 USSR(228) collapsed into 15 countries
+  area_code_USSR = c(228, 1, 52, 57, 63, 73, 108, 113, 119, 126, 146, 185, 208, 213, 230, 235)
+  # first one is Russia
+
+  # In 1992 Yugoslav SFR dissolved into 5 countries
+  # Yugoslav SFR (248)
+  # Croatia (98)
+  # North Macedonia (154)
+  # Slovenia (198)
+  # Bosnia and Herzegovina (80)
+  # Serbia and Montenegro (186)
+  # In 2006 further broke into 2:
+  # Montenegro (273)
+  # Serbia (272)
+  # These regions will be merged for all years in data as most models aggregated them into a single region
+  area_code_Yugoslav <- c(248, 98, 154, 198, 80, 186)
+  area_code_SerbiaandMontenegro <- c(186, 273, 272)
+  # In 1999/2000 Belgium-Luxembourg (15) partitioned in 1999 to 255 (Belgium) and 256 (Luxembourg)
+  area_code_Belgium_Luxembourg <- c(15, 255, 256)
+  # In 1993 Czechoslovakia (51) to Czechia (167) and Slovakia (199)
+  area_code_Czechoslovakia <- c(51, 167, 199)
+  # In 2011 Sudan (former) (206) broke into South Sudan (277) and Sudan (276)
+  area_code_Sudan <- c(206, 276, 277)
+  # Ethiopia PDR (62) dissolved into Ethiopia (238) and Eritrea (178) in 1993
+  area_code_Ethiopia <- c(62, 238, 178)
+
+  .DF %>%
+    # remove Yugoslav by their years first and area_code_SerbiaandMontenegro later
+    filter(!(area_code %in% area_code_Yugoslav[1] )) %>%
+    bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF, area_code_Yugoslav, 1992, 3))->
+    .DF1
+
+
+  FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_USSR, 1992, 3) %>%
+    bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_SerbiaandMontenegro, 2006, 3)) %>%
+    bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_Belgium_Luxembourg, 2000, 3)) %>%
+    bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_Czechoslovakia, 1993, 3)) %>%
+    bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_Ethiopia, 1993, 3)) ->
+    DF_FAO_AREA_DISAGGREGATE_HIST
+
+  .DF1 %>%
+    # remove USSR by their years
+    filter(!(area_code %in% area_code_USSR[1])) %>%
+    # remove Serbia & Montenegro by their years
+    filter(!(area_code %in% area_code_SerbiaandMontenegro[1] )) %>%
+    # remove Belgium_Luxembourg by their years
+    filter(!(area_code %in% area_code_Belgium_Luxembourg[1])) %>%
+    # remove area_code_Czechoslovakia by their years
+    filter(!(area_code %in% area_code_Czechoslovakia[1] )) %>%
+    # remove area_code_Ethiopia by their years
+    filter(!(area_code %in% area_code_Ethiopia[1] )) %>%
+    bind_rows(DF_FAO_AREA_DISAGGREGATE_HIST) ->
+    .DF2
+
+  if (SUDAN2012_BREAK == T) {
+    .DF2 %>%
+      # remove area_code_Sudan by their years
+      filter(!(area_code %in% area_code_Sudan[1] )) %>%
+      bind_rows(FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION(.DF1, area_code_Sudan, 2012, 3)) ->
+      .DF2
+  }
+
+  if (SUDAN2012_MERGE == T) {
+
+    .DF2 %>%
+      mutate(area_code = replace(area_code, area_code %in% area_code_Sudan, area_code_Sudan[1])) %>%
+      dplyr::group_by_at(dplyr::vars(-value)) %>%
+      summarise(value = sum(value, na.rm = T), .groups = "drop") %>%
+      ungroup() -> .DF2
+  }
+
+
+
+  if(.FAO_AREA_CODE_COL != "area_code"){
+
+    # rename .FAO_AREA_CODE_COL to area_code
+    .DF2[[.FAO_AREA_CODE_COL]] <- .DF2[["area_code"]]
+    .DF2 <- .DF2[, !names(.DF2) %in% "area_code"]
+
+    # Check if "area_code_TEMP" exist, replace to area_code
+    if ("area_code_TEMP" %in% names(.DF2)) {
+      assertthat::assert_that("area_code" %in% names(.DF2) == F)
+      .DF2 %>% rename(area_code = area_code_TEMP) -> .DF2
+    }
+  }
+
+
+
+  if (all_of(.AREA_COL) %in% names(.DF0)) {
+    .DF2 %>%# Get area back
+      left_join(.DF0 %>% distinct_at(vars(all_of(.AREA_COL), .FAO_AREA_CODE_COL)),
+                by = .FAO_AREA_CODE_COL) -> .DF2
+  }
+
+  return(.DF2)
+
+}
+
+
+# Count item_code and area_code by year
+FAOSTAT_check_count_plot <- function(.DF, .ELEMENT = c()){
+  if (.ELEMENT %>% length() == 0 ) {
+    .DF %>% distinct(element) %>% pull -> .ELEMENT
+  }
+  .DF %>% group_by(year, element) %>%
+    summarise(Country = length(unique(area_code)),
+              Item = length(unique(item_code)), .groups = "drop") %>%
+    gather(header, count, -year, -element) %>%
+    filter(element %in% .ELEMENT) %>%
+    ggplot() + facet_wrap(~header, scales = "free") +
+    geom_line(aes(x = year, y = count, color = element)) +
+    theme_bw()
+}
+
+# decimal places in ggplot
+scaleFUN <- function(x) sprintf("%.0f", x)
+
+lookup <- function(.lookupvalue, .lookup_df, .lookup_col, .target_col){
+
+  assert_that(is.character(.lookupvalue))
+  assert_that(is.data.frame(.lookup_df))
+  assert_that(.lookup_col %in% colnames(.lookup_df))
+  assert_that(.target_col %in% colnames(.lookup_df))
+
+  .lookup_df[grep(paste0("^",.lookupvalue,"$"),
+                  .lookup_df[, .lookup_col]), .target_col]
+}
+
+output_csv_data <- function(gcam_dataset, col_type_nonyear,
+                            title, unit, description = NA,
+                            code,
+                            out_dir = out_dir,
+                            GZIP = F){
+
+  if (!missing(code)) {code = code} else {
+    code = lookup(gcam_dataset, data_map, "name", "FAO_domain_code")
+  }
+
+
+  col_type = paste0(col_type_nonyear, paste0(rep("n", ncol(get(gcam_dataset)) - nchar(col_type_nonyear)), collapse = "") )
+
+  cmnts <- c(
+    paste0("File: ", gcam_dataset, ifelse(GZIP, ".csv.gz", ".csv")),
+    paste0("Title: ", title),
+    paste0("Units: ", unit),
+    paste0("Description:  ", description),
+    paste0("Data source: FAOSTAT (main domain:", code ," FAO.udpate:",FAOSTAT_metadata(code = code)$dateupdate,")"),
+    paste0("Date of CSV last update: ", Sys.Date()),
+    paste0("Column types: ",col_type) ,
+    "----------"
+  )
+  fqfn <- file.path(out_dir, paste0(gcam_dataset, ".csv"))
+  suppressWarnings(file.remove(fqfn))
+
+  if (GZIP == F) {
+    cat(paste("#", cmnts), file = fqfn, sep = "\n", append = TRUE)
+    readr::write_csv(get(gcam_dataset), fqfn, append = TRUE, col_names = TRUE, na = "")
+  } else {
+    cat(paste("#", cmnts), file = gzfile(paste0(fqfn, ".gz")), sep = "\n", append = TRUE)
+    readr::write_csv(get(gcam_dataset), gzfile(paste0(fqfn, ".gz")), append = TRUE, col_names = TRUE, na = "")
+  }
+}
+
diff --git a/R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R b/R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R
new file mode 100644
index 00000000..5930fbed
--- /dev/null
+++ b/R/zaglu_L100.FAO_SUA_PrimaryEquivalent.R
@@ -0,0 +1,1108 @@
+# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.
+
+#' module_aglu_L100.FAO_SUA_PrimaryEquivalent
+#'
+#' Generate supply utilization balance in primary equivalent
+#'
+#' @param command API command to execute
+#' @param ... other optional parameters, depending on command
+#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
+#'   \code{command} is "MAKE") all the generated outputs: \code{GCAM_AgLU_SUA_APE_1973_2019},
+#'   \code{FAO_AgProd_Kt_All},\code{FAO_AgArea_Kha_All},\code{FAO_Food_Macronutrient_All_2010_2019},
+#'   \code{FAO_Food_MacronutrientRate_2010_2019_MaxValue}
+#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
+#' A method to generate primary equivalent is created for the new FAOSTAT supply utilization data (2010 to 2019).
+#' New SUA balance is connected to the old one (before 2010). Production and harvested area data with FAO region and item
+#' for primary production are provided. For FAO food items, macronutrient values are calculated at SUA item level.
+#' Data processing was consistent across scales. Note that GCAM regions and commodities in aggregation mapping can
+#' be changed in corresponding mappings. The output data is not averaged over time.
+#' @importFrom assertthat assert_that
+#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
+#' first case_when vars
+#' @importFrom tibble tibble
+#' @importFrom tidyr complete drop_na gather nesting spread replace_na
+#' @author XZ 2022
+module_aglu_L100.FAO_SUA_PrimaryEquivalent <- function(command, ...) {
+
+  MODULE_INPUTS <-
+    c(FILE = "aglu/AGLU_ctry",
+      FILE = "common/iso_GCAM_regID",
+      FILE = "common/GCAM_region_names",
+      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019",
+      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020",
+      FILE = "aglu/FAO/Mapping_SUA_PrimaryEquivalent",
+      FILE = "aglu/FAO/SUA_item_code_map",
+      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020",
+      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009",
+      FILE = "aglu/FAO/Mapping_item_FBS_GCAM",
+      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_96Regs_16FodderItems_1973to2020",
+      FILE = "aglu/FAO/FAO_ag_items_PRODSTAT",
+      FILE = "aglu/FAO/FAO_an_items_PRODSTAT",
+      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean"
+    )
+
+  MODULE_OUTPUTS <-
+    c("GCAM_AgLU_SUA_APE_1973_2019",
+      "FAO_AgProd_Kt_All",
+      "FAO_AgArea_Kha_All",
+      "FAO_Food_Macronutrient_All_2010_2019",
+      "FAO_Food_MacronutrientRate_2010_2019_MaxValue")
+
+  if(command == driver.DECLARE_INPUTS) {
+    return(MODULE_INPUTS)
+  } else if(command == driver.DECLARE_OUTPUTS) {
+    return(MODULE_OUTPUTS)
+  } else if(command == driver.MAKE) {
+
+    year <- value <- Year <- Value <- FAO_country <- iso <- NULL    # silence package check.
+
+    all_data <- list(...)[[1]]
+
+    # Load required inputs ----
+
+    get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)
+
+    # Get Supply-utilization account (SUA) elements and use as factor
+    All_Bal_element <- levels(GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019$element)
+    All_Bal_element <- factor(All_Bal_element, levels = All_Bal_element)
+
+    # Section1: [2010-2019] Region aggregation of supply-utilization-accounting data ----
+
+    # Note: the volume of data in this processing is quite large.  Therefore we took
+    # extra care to be cognizant of processing speed and memory usage through section 1 and 2.
+    # In particular we rely on ID codes and factors are much as possible to speed up joins.
+    # In addition, we have filtered zero rows from the raw data to significantly reduce
+    # the overall volume.  Unfortunately, this change makes the processing riddled with
+    # trap doors where we need to be extra careful to complete / refill zeros or risk loosing
+    # rows of legitimate data.
+
+    # create a complete area / iso / GCAM region mapping
+    GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020 %>%
+      select(area_code, area) %>%
+      distinct() %>%
+      left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by="area") %>%
+      left_join(iso_GCAM_regID %>%select(iso, GCAM_region_ID), by = "iso") %>%
+      left_join(GCAM_region_names, by = "GCAM_region_ID") ->
+      Area_Region_Map
+
+    # 1.1 Helper functions for regional aggregation ----
+
+    # Aggregate SUA to GCAM regions
+    # Intra regional trade is removed when bilateral trade data is available
+    SUA_Reg_Agg <- function() {
+
+      GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 %>%
+        left_join_error_no_match(Area_Region_Map %>% select(area_code, GCAM_region_ID), by="area_code") %>%
+        group_by(GCAM_region_ID, item_code, element, year) %>%
+        summarize(value = sum(value)) %>%
+        ungroup() ->
+        DF_SUA_Agg
+
+      # Calculate intra regional trade
+      GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020 %>%
+        left_join_error_no_match(Area_Region_Map %>% select(area_code, GCAM_region_ID), by="area_code") %>%
+        left_join_error_no_match(Area_Region_Map %>% select(source_code = area_code, source_GCAM_region_ID = GCAM_region_ID), by="source_code") %>%
+        filter(GCAM_region_ID == source_GCAM_region_ID) %>%
+        group_by(GCAM_region_ID, item_code, year) %>%
+        summarize(value = sum(value)) %>%
+        ungroup() %>%
+        mutate(value = -value / 1000.0) ->
+        DF_INTRA_REG_TRADE
+
+      # SUA has fewer items and years than the bilateral data set and in addition
+      # there are some small discrepancies zero import/export in SUA vs tiny amounts of trade
+      # in the bilateral.  Doing a left_join here will drop these dependencies which is
+      # what we want.
+      bind_rows(DF_INTRA_REG_TRADE %>% mutate(element = All_Bal_element[All_Bal_element == "Export"]),
+                DF_INTRA_REG_TRADE %>% mutate(element = All_Bal_element[All_Bal_element == "Import"])) %>%
+        rename(TCL = value) %>%
+        right_join(DF_SUA_Agg, by = c("GCAM_region_ID", "item_code", "year", "element")) %>%
+        # equivalent to
+        #left_join(DF_SUA_Agg, ., by=c("GCAM_region_ID", "item_code", "year", "element")) %>%
+        mutate(value = if_else(is.na(TCL), value, value + TCL)) %>%
+        select(-TCL) %>%
+        filter(value != 0.0) ->
+        DF_SUA_Agg_TradeAdj
+
+
+      # need to remove gross trade when export > production
+      # to maintain triangle the inequality rule
+      # Note that Prod < export is still possivle due to "residuals"
+      DF_SUA_Agg_TradeAdj %>%
+        filter(element %in% c("Production", "Import", "Export")) %>%
+        spread(element, value, fill=0.0) %>%
+        mutate(value = pmax(Production - Export, -Import)) %>%
+        filter(value < 0) %>%
+        select(-Production, -Import, -Export) ->
+        GrossTradeRM
+
+      bind_rows(GrossTradeRM %>% mutate(element = All_Bal_element[All_Bal_element == "Export"]),
+                GrossTradeRM %>% mutate(element = All_Bal_element[All_Bal_element == "Import"]),
+                DF_SUA_Agg_TradeAdj) %>%
+        group_by(GCAM_region_ID, item_code, element, year) %>%
+        summarize(value = sum(value)) %>%
+        ungroup() ->
+        DF_SUA_Agg_TradeAdj_TriagAdj
+
+    return(DF_SUA_Agg_TradeAdj_TriagAdj)
+    }
+
+    # 1.2. Execution: regional aggregation ----
+    # Get SUA data ready
+    FAO_SUA_Kt_2010to2019_R <- SUA_Reg_Agg()
+
+    Min_SUA_Year <- min(FAO_SUA_Kt_2010to2019_R$year)
+    FAO_SUA_Kt_2010to2019 <- GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019
+    ## Clean up
+    rm(GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019)
+    rm(GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020)
+    ## Done Section1 ----
+    #****************************----
+
+    # Section2: [2010-2019] Primary equivalent aggregation to GCAM commodities ----
+
+    Mapping_SUA_PrimaryEquivalent %>%
+      left_join_error_no_match(SUA_item_code_map %>% rename(sink_item_code = item_code), by=c("sink_item" = "item")) %>%
+      left_join_error_no_match(SUA_item_code_map %>% rename(source_item_code = item_code), by=c("source_item" = "item")) %>%
+      mutate(APE_comm = as.factor(APE_comm)) ->
+      Mapping_SUA_PrimaryEquivalent_ID
+
+    #Mapping_SUA_PrimaryEquivalent_ID[Mapping_SUA_PrimaryEquivalent_ID$sink_item_code == 235, "source_primary"] = FALSE
+
+    Mapping_SUA_PrimaryEquivalent_ID %>%
+      select(item_code = sink_item_code, output_specific_extraction_rate) %>%
+      filter(!is.na(output_specific_extraction_rate)) ->
+      OUTPUT_SPECIFIC_EXTRACTION_RATE
+
+    # 2.1 Helper functions for SUA primary equivalent aggregation ----
+
+
+    #' Get extraction rate
+    #' @description Gross extraction rate is calculated for domestic, traded, and lagged values.
+    #' By gross, it means sink items are aggregated.
+    #' The function is used in Proc_primarize.
+    #' @param DF_CURR_NEST Input supply-utilization accounting data frame with one tier of processing
+    #' @param DF_ALL Input supply-utilization accounting data frame with ALL the data
+    #' @return A data frame including regional, traded, and world extraction rates of a processing
+
+    Get_GROSS_EXTRACTION_RATE <- function(DF_CURR_NEST, DF_ALL) {
+      curr_sink_items = unique(DF_CURR_NEST$item_code)
+      Mapping_SUA_PrimaryEquivalent_ID %>%
+        filter(sink_item_code %in% curr_sink_items) ->
+        Curr_Sink_Mapping
+      curr_source_items = unique(Curr_Sink_Mapping$source_item_code)
+      Mapping_SUA_PrimaryEquivalent_ID %>%
+        filter(source_item_code %in% curr_source_items) ->
+        Curr_Source_Mapping
+      Curr_Source_Mapping %>%
+        group_by(APE_comm) %>%
+        mutate(minimium_extraction_rate = if_else(Q25asMin, extraction_rate_Q25, 0)) %>%
+        select(APE_comm, minimium_extraction_rate) %>%
+        distinct() ->
+        MIN_EXTRACTION_RATE
+
+      DF_ALL %>%
+        #Prepare data to calculate regional, traded, and world average extraction rates
+        tidyr::unnest(c(data)) %>%
+        filter(element == "Processed", item_code %in% curr_source_items) %>%
+        select(-nest_level) %>%
+        bind_rows(DF_CURR_NEST %>% filter(element == "Production" | element == "Export")) %>%
+        dplyr::group_by_at(vars(-item_code, -value)) %>%
+        summarize(value=sum(value)) %>%
+        ungroup() %>%
+        complete(GCAM_region_ID = GCAM_region_names$GCAM_region_ID, nesting(element, year, APE_comm), fill=list(value=0)) %>%
+        spread(element, value, fill = 0.0) %>%
+        left_join_error_no_match(MIN_EXTRACTION_RATE, by=c("APE_comm")) %>%
+        group_by(APE_comm, year) %>%
+        mutate(extraction_rate_world = sum(Production) / sum(Processed),
+               # in case sum(Processed) or sum(Production) == 0
+               extraction_rate_world = if_else(extraction_rate_world != 0 & is.finite(extraction_rate_world),
+                                               extraction_rate_world, 1),
+               extraction_rate = Production / Processed,
+               # Regional extraction rate = prod of an aggregated processed item  / Processed use of an aggregated primary item
+               # Use world average to fill in NA or zero
+               extraction_rate = if_else(is.na(extraction_rate) | extraction_rate == 0, extraction_rate_world, extraction_rate),
+               # Using minimum extraction rate here
+               extraction_rate = pmax(extraction_rate, minimium_extraction_rate),
+               extraction_rate_trade = sum(Export) / sum(Export / extraction_rate),
+               extraction_rate_trade = if_else(is.na(extraction_rate_trade), extraction_rate, extraction_rate_trade),
+               # both processed and production > 0
+               positive_prod = Production > 0 & Processed > 0) %>%
+        ungroup() %>%
+        group_by(APE_comm, GCAM_region_ID) %>%
+        # Calculate lagged extraction_rate but replace NA with current rate (first period)
+        mutate(extraction_rate_lag = lag(extraction_rate, default=extraction_rate[1])) %>%
+        ungroup() %>%
+        select(APE_comm, GCAM_region_ID, year, bal_import = extraction_rate_trade, bal_domestic_lag = extraction_rate_lag, bal_domestic_current = extraction_rate) %>%
+        gather(bal_source, extraction_rate, bal_import, bal_domestic_lag, bal_domestic_current, factor_key = TRUE)
+    }
+
+
+    #' Separate the SUA balance into domestic and imported balanced for sink_item
+    #' @description The function is used in Proc_primarize
+    #' @param DF_CURR_NEST Input supply-utilization accounting data frame with one tier of processing
+    #' @return SUA DF
+
+    Get_ARMINGTON_BALANCE <- function(DF_CURR_NEST) {
+      Import_Demand_Item <- factor(c("Food", "Feed", "Processed", "Other uses", "Seed", "Loss"), levels=All_Bal_element)
+
+      DF_CURR_NEST %>%
+        # Calculate imported consumption share
+        # The assumption is that a portion of Import_Demand_Items was imported
+        # so they need to be scaled by an international extraction rate
+        # Note that stock variation is not included in import consumption to maintain stock balance
+        # so additional adjustment may be needed
+        filter(element == "Import" | element %in% Import_Demand_Item) %>%
+        mutate(is_import = element == "Import") %>%
+        spread(is_import, value, fill=0.0) %>%
+        group_by(APE_comm, GCAM_region_ID, year, item_code) %>%
+        summarize(import = sum(`TRUE`),
+                  import_demand = sum(`FALSE`)) %>%
+        ungroup() %>%
+        mutate(Import_Demand_Share = import / import_demand,
+               # replace NA and inf
+               Import_Demand_Share = if_else(is.finite(Import_Demand_Share), Import_Demand_Share, 0),
+               # The share should be small than 1 though outlier regions may import for storage
+               Import_Demand_Share = pmin(Import_Demand_Share, 1),
+               residual = import - import_demand * Import_Demand_Share) %>%
+        ungroup() %>%
+        select(APE_comm, GCAM_region_ID, item_code, year, Import_Demand_Share, residual) %>%
+        left_join(DF_CURR_NEST, ., by=c("APE_comm", "GCAM_region_ID", "item_code", "year")) %>%
+        # when Import_Demand_Item consumption < Import they are used to share out Import consumptions
+        # otherwise, Residuals is used for adjustments
+        mutate(bal_import = case_when(element == "Import" ~ value,
+                                      element %in% Import_Demand_Item ~ value * Import_Demand_Share,
+                                      element == "Residuals" ~ residual,
+                                      TRUE ~ 0),
+               # Calculate domestic balance
+               bal_domestic = value - bal_import) %>%
+        select(-value, -Import_Demand_Share, -residual) %>%
+        gather(bal_source, value, bal_import, bal_domestic, factor_key = TRUE) ->
+        TradeBal_Data
+
+      Regional_supply_elements <- factor(c("Opening stocks", "Production", "Import"), levels=All_Bal_element)
+      Regional_demand_elements <- factor(c("Export", "Feed", "Food", "Loss", "Processed", "Seed", "Other uses", "Closing stocks"), levels=All_Bal_element)
+      TradeBal_Data %>%
+        mutate(is_supply = element %in% Regional_supply_elements,
+               is_demand = element %in% Regional_demand_elements) %>%
+        filter(is_supply | is_demand) %>%
+        group_by(APE_comm, GCAM_region_ID, year, item_code, bal_source) %>%
+        # Clean the bal items
+        summarize(`Regional supply` = sum(value[is_supply]),
+                  `Regional demand` = sum(value[is_demand])) %>%
+        ungroup() %>%
+        mutate(`Residuals` = `Regional supply` - `Regional demand`) %>%
+        gather(element, value, `Regional supply`, `Regional demand`, `Residuals`) %>%
+        mutate(element = factor(element, levels=All_Bal_element)) %>%
+        bind_rows(TradeBal_Data %>% filter(!element %in% c("Regional supply", "Regional demand", "Residuals")), .)
+    }
+
+
+    #' Separate the domestic SUA balance into current and lagged balanced for sink_item
+    #' @description The function is used in Proc_primarize
+    #' @param DF_CURR_NEST_TradeAdj Output from Get_ARMINGTON_BALANCE. Input supply-utilization accounting data frame with one tier of processing and
+    #' @param .SINK_ITEM Sink items or processed items in the processing
+    #' @return SUA DF
+
+    Get_STOCK_BALANCE <- function(DF_CURR_NEST_TradeAdj) {
+      Opening_Stock_Item <- factor(c("Food", "Feed", "Processed", "Other uses", "Seed", "Loss"), levels=All_Bal_element)
+
+      get_bal_source_data <- function(data, bal_source_key) {
+        data[data$bal_source == bal_source_key, "data", drop = TRUE][[1]]
+      }
+
+      DF_CURR_NEST_TradeAdj %>%
+        tidyr::nest(data = -bal_source) ->
+        StockCalcNested
+
+      StockCalcNested %>%
+        get_bal_source_data("bal_domestic") %>%
+        filter(element == "Opening stocks" | element %in% Opening_Stock_Item) %>%
+        mutate(is_opening = element == "Opening stocks") %>%
+        spread(is_opening, value, fill=0.0) %>%
+        group_by(APE_comm, GCAM_region_ID, year, item_code) %>%
+        summarize(Ostock = sum(`TRUE`),
+                  Ostock_demand = sum(`FALSE`)) %>%
+        ungroup() %>%
+        mutate(Ostock_Demand_Share = Ostock / Ostock_demand,
+               # The share should be small than 1
+               # Other elements will be adjusted if not
+               Ostock_Demand_Share = if_else(is.finite(Ostock_Demand_Share), Ostock_Demand_Share, 0),
+               Ostock_Demand_Share = pmin(Ostock_Demand_Share, 1),
+               residual = Ostock - Ostock_demand * Ostock_Demand_Share) %>%
+        ungroup() %>%
+        select(APE_comm, GCAM_region_ID, item_code, year, Ostock_Demand_Share, residual) %>%
+        left_join(StockCalcNested %>% get_bal_source_data("bal_domestic"), ., by=c("APE_comm", "GCAM_region_ID", "item_code", "year")) %>%
+        mutate(bal_domestic_lag = case_when(element == "Opening stocks" ~ value,
+                                            element %in% Opening_Stock_Item ~ value * Ostock_Demand_Share,
+                                            element == "Residuals" ~ residual,
+                                            TRUE ~ 0),
+               # Calculate domestic balance
+               bal_domestic_current = value - bal_domestic_lag) %>%
+        select(-value, -Ostock_Demand_Share, -residual) %>%
+        gather(bal_source, value, bal_domestic_lag, bal_domestic_current, factor_key = TRUE) ->
+        StockBal_Data
+
+      Regional_supply_elements <- factor(c("Opening stocks", "Production", "Import"), levels=All_Bal_element)
+      Regional_demand_elements <- factor(c("Export", "Feed", "Food", "Loss", "Processed", "Seed", "Other uses", "Closing stocks"), levels=All_Bal_element)
+      Bal_types = c("bal_import", "bal_domestic_lag", "bal_domestic_current")
+      Bal_types = factor(Bal_types, levels=Bal_types)
+      StockBal_Data %>%
+        complete(year = unique(StockBal_Data$year), nesting(GCAM_region_ID, item_code, element, APE_comm, bal_source), fill=list(value=0)) %>%
+        mutate(is_supply = element %in% Regional_supply_elements,
+               is_demand = element %in% Regional_demand_elements) %>%
+        group_by(APE_comm, GCAM_region_ID, year, item_code, bal_source) %>%
+        # Clean the bal items
+        summarize(`Regional supply` = sum(value[is_supply]),
+                  `Regional demand` = sum(value[is_demand]),
+                  # using max to guard against missing Closing stocks row
+                  `Stock Variation` = max(value[element == "Closing stocks"], 0) - max(value[element == "Opening stocks"], 0)) %>%
+        ungroup() %>%
+        mutate(`Residuals` = `Regional supply` - `Regional demand`) %>%
+        gather(element, value, `Regional supply`, `Regional demand`, `Stock Variation`, `Residuals`) %>%
+        mutate(element = factor(element, levels=All_Bal_element)) %>%
+        bind_rows(StockBal_Data %>% filter(!element %in% c("Regional supply", "Regional demand", "Stock Variation", "Residuals")), .) %>%
+        tidyr::nest(data = - bal_source) %>%
+        mutate(bal_source = factor(bal_source, levels=Bal_types)) %>%
+        bind_rows(StockCalcNested %>% filter(bal_source == "bal_import") %>% mutate(bal_source = factor(bal_source, levels=Bal_types))) %>%
+        tidyr::unnest(c("data"))
+    }
+
+
+    #' Primary equivalent aggregation
+    #' @param DF_ALL Input supply-utilization accounting data frame with all levels of data nested which need to be primarized
+    #' @return A supply-utilization accounting data frame with all levels processed and aggregated to GCAM_commodity
+
+    Proc_primarize <- function(DF_ALL){
+      MaxNest = max(DF_ALL$nest_level)
+      MinNest = 1
+      for(curr_nest in MaxNest:MinNest) {
+        # get the current tier to process
+        DF_ALL %>%
+          filter(nest_level == curr_nest) %>%
+          pull(data) %>%
+          first() ->
+          DF_CURR_NEST
+
+        # Sink items or processed items in the processing
+        curr_sink_items = unique(DF_CURR_NEST$item_code)
+        Mapping_SUA_PrimaryEquivalent_ID %>%
+          filter(sink_item_code %in% curr_sink_items) ->
+          Curr_Sink_Mapping
+        # Source items or primary items in the processing
+        curr_source_items = unique(Curr_Sink_Mapping$source_item_code)
+        Mapping_SUA_PrimaryEquivalent_ID %>%
+          filter(source_item_code %in% curr_source_items) ->
+          Curr_Source_Mapping
+
+        # OUTPUT_SPECIFIC_EXTRACTION_RATE A data frame with item and output_specific_extraction_rate.
+        #' # In some cases, prescale sink item SUA using output_specific_extraction_rate can improve the processing.
+        #' # e.g., when coproduction shares are not fixed.
+        if(nrow(OUTPUT_SPECIFIC_EXTRACTION_RATE %>% filter(item_code %in% curr_sink_items)) > 0) {
+          ## a. Pre-scale sink item data when .OUTPUT_SPECIFIC_EXTRACTION_RATE is available ----
+          DF_CURR_NEST %>%
+            left_join(OUTPUT_SPECIFIC_EXTRACTION_RATE, by=c("item_code")) %>%
+            replace_na(list(output_specific_extraction_rate = 1)) %>%
+            mutate(value = value / output_specific_extraction_rate) %>%
+            select(-output_specific_extraction_rate) ->
+            DF_CURR_NEST
+        }
+
+        ## b. For the sink items of the tier, separate balance into domestic and imported ----
+        # Note that the method here relies on Get_GROSS_EXTRACTION_RATE and Get_ARMINGTON_BALANCE
+        DF_CURR_NEST %>%
+          Get_ARMINGTON_BALANCE() %>%
+          Get_STOCK_BALANCE() %>%
+          # Get extraction rate for domestic and traded
+          # Note that extraction rates are mean values across time
+          # Note that regional extraction rate could be inf
+          # It is likely due to data inconsistency, e.g., zero processed in source but positive sink
+          # No adjustments were made since 1/inf become zero in the scaling process, preserving primary balance
+          left_join(Get_GROSS_EXTRACTION_RATE(DF_CURR_NEST, DF_ALL), by=c("APE_comm", "GCAM_region_ID", "year", "bal_source")) %>%
+          # Scale sink items to get source item equivalent
+          mutate(value = value / extraction_rate) %>%
+          select(-extraction_rate) ->
+          .df1
+
+        ## c. Aggregate sink_items are aggregated into "sink_item" ----
+        # And production & processed are adjusted for primary aggregation
+        # Bind source items as well
+        DF_ALL %>%
+          filter(nest_level <= curr_nest) %>%
+          tidyr::unnest(c("data")) %>%
+          filter(element == "Processed", item_code %in% curr_source_items) %>%
+          select(-element) ->
+          .df2
+        .df2 %>%
+          complete(GCAM_region_ID = GCAM_region_names$GCAM_region_ID, nesting(APE_comm, item_code, nest_level, year), fill=list(value=0)) %>%
+          complete(.df2 %>% distinct(APE_comm, item_code, nest_level), nesting(GCAM_region_ID, year), fill=list(value=0)) %>%
+          group_by(APE_comm, item_code) %>%
+          mutate(value = sum(value)) %>%
+          ungroup() %>%
+          group_by(APE_comm, GCAM_region_ID, year) %>%
+          mutate(share = value/ sum(value),
+                 share = if_else(is.finite(share), share, dplyr::n()/sum(dplyr::n()))) %>%
+          ungroup() %>%
+          select(-value, source_item_code = item_code) ->
+          source_share
+
+
+        ## d. Merge sink SUA into source items SUA  ----
+        # Note that with multiple source items, sinks are aggregated into sources based on average processed shares across sources
+        # Prepare data to calculate world average source share
+        .df1 %>%
+          left_join(source_share, by=c("APE_comm", "GCAM_region_ID", "year")) %>%
+          filter(!is.na(share)) %>%
+          mutate(value = value * share,
+                 item_code = source_item_code) %>%
+          select(-share) %>%
+          group_by(nest_level, APE_comm, GCAM_region_ID, year, item_code, element) %>%
+          summarize(value = sum(value)) %>%
+          ungroup() %>%
+          complete(element=All_Bal_element[All_Bal_element %in% c("Prodution", "Processed")], nesting(nest_level, APE_comm, GCAM_region_ID, year, item_code), fill=list(value=0)) %>%
+          group_by(nest_level, APE_comm, GCAM_region_ID, year, item_code) %>%
+          mutate(value = if_else(element == "Production" | element == "Processed", value - value[element == "Production"], value)) %>%
+          ungroup() ->
+          .df3
+
+        # Bind source item and aggregated across source & sink items based on primary equivalent
+        # Note we will bind and aggregate by nest, ultimately it seems unlikely nesting and provided
+        # any performance boost, but certainly didn't hurt either.
+        .df3 %>%
+          tidyr::nest(data = -nest_level) ->
+          df3_nested
+
+        for(nest_i in df3_nested$nest_level) {
+          bind_rows(
+            DF_ALL[DF_ALL$nest_level == nest_i, "data", drop=TRUE][[1]],
+            df3_nested[df3_nested$nest_level == nest_i, "data", drop=TRUE][[1]]) %>%
+            group_by(APE_comm, GCAM_region_ID, year, item_code, element) %>%
+            summarize(value = sum(value)) %>%
+            ungroup() ->
+            AGG
+          DF_ALL %>%
+            filter(nest_level != nest_i) %>%
+            bind_rows(tibble(nest_level = nest_i, data = list(AGG))) ->
+            DF_ALL
+        }
+        # drop the processed tier as the data has now been aggregated and thus
+        # no longer needed
+        DF_ALL %>% filter(nest_level != curr_nest) ->
+          DF_ALL
+      }
+
+      # Combine the remaining items by APE_comm
+      DF_ALL %>%
+        tidyr::unnest(c("data")) %>%
+        group_by(GCAM_region_ID, APE_comm, element, year) %>%
+        summarize(value = sum(value)) %>%
+        ungroup() %>%
+        spread(element, value, fill = 0.0) %>%
+        # Do a final balance cleaning
+        mutate(`Regional supply` = `Opening stocks` + Production + `Import`,
+               `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`,
+               Residuals = `Regional supply` -  `Regional demand`) %>%
+        gather(element, value, -GCAM_region_ID, -APE_comm, -year) ->
+        APE_AGG
+
+      # Aggregate by GCAM_commodity
+      # At this point we ditch the ID codes and factors as we return the data and
+      # make it available for the rest of the original processing
+      APE_AGG %>%
+        left_join_error_no_match(Mapping_SUA_PrimaryEquivalent %>% select(GCAM_commodity, APE_comm) %>% distinct(),
+                                 by = c("APE_comm")) %>%
+        group_by(GCAM_region_ID, GCAM_commodity, element, year) %>%
+        summarize(value = sum(value)) %>%
+        ungroup() %>%
+        left_join_error_no_match(GCAM_region_names, by=c("GCAM_region_ID")) %>%
+        mutate(element = as.character(element)) %>%
+        select(region, year, GCAM_commodity, element, value) ->
+        GCAM_APE_after2010
+
+      return(GCAM_APE_after2010)
+    }
+
+    # 2.2. Execution: process data into APE ----
+
+
+    ## Loop through all GCAM_commodity with available data ----
+
+    FAO_SUA_Kt_2010to2019_R %>%
+      left_join(Mapping_SUA_PrimaryEquivalent_ID %>% select(APE_comm, item_code = sink_item_code, nest_level) %>% distinct(), by = c("item_code")) %>%
+      left_join(Mapping_SUA_PrimaryEquivalent_ID %>% select(APE_comm_source = APE_comm, item_code = source_item_code) %>% distinct(), by=c("item_code")) %>%
+      # find SUA items which are truly not mapped to anything and filter them out
+      mutate(APE_comm = if_else(is.na(APE_comm), APE_comm_source, APE_comm)) %>%
+      select(-APE_comm_source) %>%
+      filter(!is.na(APE_comm)) %>%
+      # the remaining rows with NA nest_level are primary, we need to keep them
+      # around for processing even though they don't need to be aggregated themselves
+      # so we will give them a nest level of -1
+      mutate(nest_level = if_else(is.na(nest_level), -1L, nest_level)) %>%
+      # we will literally nest by nest level to avoid constant subseting
+      # although we end up needed to unnest at times as well so ultimately,
+      # it likely makes little difference in performance
+      tidyr::nest(data = -nest_level)  %>%
+      # we are now ready to recursively primarize APE commodities then aggregate
+      # to GCAM commodities
+      Proc_primarize() ->
+      GCAM_APE_after2010
+
+    rm(FAO_SUA_Kt_2010to2019_R)
+
+    ## Done Section2 ----
+    #****************************----
+
+    # Section3 [1970-2009] Food balance sheet (original) aggregation to GCAM regions and commodities ----
+
+    # 3.1. Helper functions ----
+
+    #' Balance gross trade
+    #' @description Scale gross export and import in all regions to make them equal at the world level.
+    #' @param .DF An input dataframe with an element col including Import and Export
+    #' @param .MIN_TRADE_PROD_RATIO Trade will be removed if world total export or import over production is smaller than .MIN_TRADE_PROD_RATIO (1% default value)
+    #' @param .Reg_VAR Region variable name; default is ("area_code")
+    #' @param .GROUP_VAR Group variable; default is ("item_code", "year")
+    #' @return The same dataframe with balanced world export and import.
+
+    GROSS_TRADE_ADJUST <- function(.DF,
+                                   .MIN_TRADE_PROD_RATIO = 0.01,
+                                   .Reg_VAR = 'area_code',
+                                   .GROUP_VAR = c("item_code", "year")){
+
+      # assert .DF structure
+      assertthat::assert_that(all(c("element", .GROUP_VAR) %in% names(.DF)))
+      assertthat::assert_that(dplyr::is.grouped_df(.DF) == F)
+      assertthat::assert_that(all(c("Import", "Export", "Production") %in%
+                                    c(.DF %>% distinct(element) %>% pull)))
+
+      .DF %>%
+        # Join ExportScaler and ImportScaler
+        left_join(
+          .DF %>%
+            spread(element, value) %>%
+            dplyr::group_by_at(vars(dplyr::all_of(.GROUP_VAR))) %>%
+            # filter out items with zero world trade or production
+            # and replace na to zero later for scaler
+            replace_na(list(Export = 0, Import = 0, Production = 0)) %>%
+            filter(sum(Export) != 0, sum(Import) != 0, sum(Production) != 0) %>%
+            # world trade should be later than .MIN_TRADE_PROD_RATIO to have meaningful data
+            # depending on item group, .MIN_TRADE_PROD_RATIO can be set differently
+            filter(sum(Export) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>%
+            filter(sum(Import) / sum(Production) > .MIN_TRADE_PROD_RATIO) %>%
+            # finally,
+            # use average gross trade value to calculate trade scaler
+            # the trade scalers will be applied to all regions
+            mutate(ExportScaler = (sum(Export) + sum(Import))/ 2 / sum(Export),
+                   ImportScaler = (sum(Export) + sum(Import))/ 2 / sum(Import)) %>%
+            select(dplyr::all_of(c(.Reg_VAR, .GROUP_VAR)), ExportScaler, ImportScaler) %>%
+            ungroup(),
+          by = c(dplyr::all_of(c(.Reg_VAR, .GROUP_VAR)))) %>%
+        replace_na(list(ExportScaler = 0, ImportScaler = 0)) %>%
+        # If world export, import, or prod is 0, trade will be zero
+        mutate(value = case_when(
+          element %in% c("Export") ~ value * ExportScaler,
+          element %in% c("Import") ~ value * ImportScaler,
+          TRUE ~ value)) %>%
+        select(-ExportScaler, -ImportScaler)
+
+    }
+
+    # 3.2. Execution ----
+    ## a. FBSH_CB aggregate to GCAM commodity and region----
+
+    GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009 %>%
+      gather_years() %>%
+      filter(year < Min_SUA_Year) %>%
+      filter(!is.na(value)) ->
+      FBSH_CB
+
+    # asssert mapping is good
+    assertthat::assert_that(
+      Mapping_item_FBS_GCAM %>% filter(!is.na(GCAM_commodity)) %>%
+        distinct(item_code) %>% pull() %in%
+        c(FBSH_CB %>% distinct(item_code) %>% pull) %>% all)
+
+    Mapping_item_FBS_GCAM %>%
+      select(item_code, GCAM_commodity)%>%
+      filter(!is.na(GCAM_commodity)) %>%
+      left_join(FBSH_CB, by = "item_code")  %>%
+      gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
+      gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>%
+      gcamdata::left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>%
+      dplyr::group_by_at(vars(area = region, year, GCAM_commodity, element)) %>%
+      summarise(value = sum(value), .groups = "drop") %>%
+      # complete element
+      complete(nesting(area, year, GCAM_commodity), element,
+               fill = list(value = 0)) ->
+      FBSH_CB_GCAM
+
+
+    ## b. Get primary production in GCAM region and sector ----
+
+    GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020 %>%
+      gather_years() %>%
+      filter(year < Min_SUA_Year) %>%
+      filter(!is.na(value), element == "Production") %>%
+      inner_join(
+        Mapping_SUA_PrimaryEquivalent %>% filter(source_primary == T) %>%
+          distinct(GCAM_commodity, item = source_item), by = "item") %>%
+      gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
+      gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>%
+      gcamdata::left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>%
+      dplyr::group_by_at(vars(area = region, year, GCAM_commodity, element)) %>%
+      summarise(value = sum(value), .groups = "drop") ->
+      QCL_PROD_GCAM
+
+    ## c. QCL_PROD_GCAM and FBSH_CB_GCAM: merge, scale, and connect----
+    # Connect SUA (FBSH_CB) to primary production (QCL_PROD_GCAM)
+    # Primary production could be different due to aggregation or inconsistency
+
+    QCL_PROD_GCAM %>%
+      # Complete elements in QCL_PROD
+      # also GCAM_commodity because no pork production in Pakistan
+      complete(area, year, GCAM_commodity,
+               element = unique(FBSH_CB_GCAM$element), fill = list(value = 0)) %>%
+      left_join_error_no_match(
+        FBSH_CB_GCAM %>%
+          # complete element to add zero productions
+          complete(area, year, GCAM_commodity, element, fill = list(value = 0)) %>%
+          rename(FBSH_CB = value),
+        by = c("area", "year", "GCAM_commodity", "element")
+      ) %>%
+      # mapping
+      group_by(area, GCAM_commodity, year) %>%
+      # When production in FBSH_CB > primary production (QCL_PROD_GCAM), adjust processed
+      # to ensure production <= primary production when processed is enough; (will scale later)
+      # calculate Prod_diff which is the diff in production b/t the two data sets
+      mutate(Prod_diff = FBSH_CB[element == "Production"] - value[element == "Production"] ) %>%
+      # adjust processed when prod_diff > 0 by canceling off production and processed
+      mutate(Prod_diff = if_else(Prod_diff > 0 & element %in% c("Production", "Processed"), Prod_diff, 0),
+             Processed = if_else(Prod_diff > 0 & element %in% c("Production", "Processed"),
+                                 FBSH_CB[element == "Processed"], 0),
+             FBSH_CB = FBSH_CB - pmin(Prod_diff, Processed),
+             # When production in FBSH_CB = 0 set to production in QCL so scaling will be consistent
+             FBSH_CB = if_else(FBSH_CB[element == "Production"] == 0 & element == "Production", value, FBSH_CB),
+             # After the above adjustments, re-scale SUA to match production in value
+             value = FBSH_CB * value[element == "Production"]/FBSH_CB[element == "Production"],
+             # fix NA
+             value = if_else(!is.finite(value), FBSH_CB, value)) %>%
+      ungroup() %>%
+      select(area, GCAM_commodity, element, year, value) %>%
+      # adjust gross trade
+      GROSS_TRADE_ADJUST(.MIN_TRADE_PROD_RATIO = 0.001,
+                         .Reg_VAR = "area",
+                         .GROUP_VAR = c("GCAM_commodity", "year")) %>%
+      spread(element, value) %>%
+      # Note that stock variation here was  = opening - ending
+      # reversed here so the variation is a demand
+      mutate(`Stock Variation` = - `Stock Variation`,
+             `Regional supply` = Production + `Import`,
+             `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` + `Stock Variation`,
+             Residuals = `Regional supply` -  `Regional demand`) %>%
+      tidyr::gather(element, value, -area, -GCAM_commodity, -year) %>%
+      ungroup() %>%
+      rename(region = area) ->
+      GCAM_APE_before2010
+
+    rm(FBSH_CB, FBSH_CB_GCAM)
+
+    ## Done Section3 ----
+    #****************************----
+
+    # Section4 [1970-2019] GCAM_APE SUA ----
+
+    # 4.1. Helper functions ----
+    Check_Balance_SUA <- function(.DF){
+
+      assertthat::assert_that(all(c("element") %in% names(.DF)))
+      assertthat::assert_that(all(c("Import", "Export", "Production",
+                                    "Food", "Feed", "Other uses") %in%
+                                    c(.DF %>% distinct(element) %>% pull)))
+      # 0. Check NA
+      if (.DF %>% filter(is.na(value)) %>% nrow() > 0) {
+        warning("NA values in SUA Balance")
+      }
+
+
+      # 1. Positive value except stock variation and residues
+      if (isFALSE(.DF %>% filter(!element %in% c("Stock Variation", "Other uses")) %>%
+                 summarise(min = min(value, na.rm = T)) %>% pull(min) >= -0.001)) {
+        warning("Negative values in key elements (not including stock variation and other uses)")
+      }
+
+      # 2. Trade balance in all year and items
+      if (isFALSE(.DF %>% filter(element %in% c("Import", "Export")) %>%
+                 group_by(year, GCAM_commodity, element) %>%
+                 summarise(value = sum(value), .groups = "drop") %>%
+                 spread(element, value) %>% filter(abs(Import - Export) > 0.0001) %>% nrow() == 0)) {
+        warning("Gross trade imbalance")
+      }
+
+      # 3. SUA balance check
+      if (isFALSE(.DF %>%
+                 spread(element, value) %>%
+                 mutate(`Regional supply` = Production + `Import`,
+                        `Regional demand` = `Export` + Feed + Food  + `Other uses`,
+                        bal = abs(`Regional supply` -  `Regional demand`)) %>%
+                 filter(bal > 0.0001) %>% nrow() == 0)) {
+        warning("Regional supply != Regional demand + Residuals")
+      }
+
+      # 4. Balanced in all dimensions
+      assertthat::assert_that(.DF %>% nrow() ==
+                                .DF %>% distinct(year) %>% nrow *
+                                .DF %>% distinct(GCAM_commodity) %>% nrow *
+                                .DF %>% distinct(element) %>% nrow *
+                                .DF %>% distinct(region) %>% nrow)
+
+    }
+
+    # 4.2. Connect and bind data from two periods ----
+
+    GCAM_AgLU_SUA_APE_1973_2019 <-
+      GCAM_APE_before2010 %>%
+      bind_rows(GCAM_APE_after2010) %>%
+      mutate(unit = "1000 tonnes") %>%
+      # clean and aggregate elements not using
+      filter(!element %in% c("Regional demand", "Regional supply",
+                             "Opening stocks", "Closing stocks")) %>%
+      mutate(element = replace(element,
+                               element %in% c("Stock Variation", "Processed",
+                                              "Seed", "Residuals", "Loss"),
+                               "Other uses")) %>%
+      dplyr::group_by_at(dplyr::vars(-value)) %>%
+      summarise(value = sum(value), .groups = "drop")
+
+    ## Check balance
+    GCAM_AgLU_SUA_APE_1973_2019 %>% Check_Balance_SUA
+    rm(GCAM_APE_before2010, GCAM_APE_after2010)
+
+
+    ## Done Section4 ----
+    #****************************----
+
+    # Section5 [1970-2019] Connect production and area data ----
+
+    # This section gets crop and livestock production before aggregation (FAO region and items)
+    # For both before 2010 and after 2010
+    # They are also aggregated to GCAM region and commodities to assert consistency
+    # The processing includes all crops (including fodder crops) and livestock items
+
+    # 5.1. Get all mapping straight ----
+
+    Primary_Item_CROP <-
+      FAO_ag_items_PRODSTAT %>%
+        select(item, GCAM_commodity, GCAM_subsector) %>%
+        filter(!is.na(item), !is.na(GCAM_commodity)) %>%
+        # Fodder grass has a duplicate as it mapped to different GTAP crops
+        distinct %>%
+        mutate(CropMeat = if_else(GCAM_commodity %in% c("FodderGrass", "FodderHerb"),
+                                  "Crop_Fodder", "Crop_NonFodder"))
+    assertthat::assert_that(
+      all(Primary_Item_CROP %>% filter(CropMeat == "Crop_NonFodder") %>%  pull(item) %in%
+        c(Mapping_SUA_PrimaryEquivalent %>% filter(source_primary == T) %>%
+          distinct(item = source_item) %>% pull)),
+      msg = "Inconsistent mapping of primary crops between FAO_ag_items_PRODSTAT and Mapping_SUA_PrimaryEquivalent" )
+
+    Primary_Item_MEAT <-
+      Mapping_SUA_PrimaryEquivalent %>%
+      # animal meat Eq since they are included as primary production after 2010
+      filter(source_primary == T | grepl("MeatEq", APE_comm)) %>%
+      distinct(GCAM_commodity, item = source_item) %>%
+      filter(GCAM_commodity %in%
+               c(FAO_an_items_PRODSTAT %>%
+                   filter(!is.na(GCAM_commodity)) %>%
+                   distinct(GCAM_commodity) %>% pull))%>%
+      mutate(CropMeat = "Meat")
+
+    # 5.2. Get primary production for all ----
+    # Connecting, mapping, arrange, and assertion
+
+    ## Bind production and area data for both fodder and nonfodder ----
+    FAO_AgProd_Kt_Area_Kha <-
+      GCAMDATA_FAOSTAT_ProdArea_96Regs_16FodderItems_1973to2020%>%
+      mutate(item_set = "QCL_COMM_CROP_PRIMARY_FODDER") %>%
+      bind_rows(GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020) %>%
+      gather_years() %>%
+      filter(!is.na(value))
+
+    # Assert yield no inf.
+    assertthat::assert_that(
+      # only safeguard here as data was cleaned and area and prod are matched
+      FAO_AgProd_Kt_Area_Kha %>%
+        # filter only primary crop items (all crops with area)
+        filter(item_set %in% c("QCL_COMM_CROP_PRIMARY",
+                               "QCL_COMM_CROP_PRIMARY_FODDER")) %>%
+        select(area_code, item_code, year, element, value) %>%
+        spread(element, value) %>%
+        filter(is.infinite(Production / `Area harvested`|
+                             is.infinite(`Area harvested`/Production)) ) %>%
+        nrow == 0,
+      msg = "Check region/item for prod > 0 & area = 0 or prod = 0 & area > 0" )
+
+    # Assert primary production in two sources area consistent
+    assertthat::assert_that(
+      FAO_SUA_Kt_2010to2019 %>% filter(element == "Production") %>%
+        inner_join(FAO_AgProd_Kt_Area_Kha %>%
+                     filter(item_set != "QCL_COMM_OTHERPROC") %>%
+                     filter(element == "Production") %>% rename(value1 = value),
+                   by = c("area_code", "item_code", "element", "year")) %>%
+        mutate(diff = abs(value1 - value)) %>%
+        filter(diff > 0.0001) %>% nrow() == 0,
+      msg = "Primary production in SUA (FAO_SUA_Kt_2010to2019) and
+      QCL (FAO_AgProd_Kt_Area_Kha) are inconsistent "
+    )
+
+    ## a. All production ----
+    # Meat production is more than (QCL)FAO_AgProd_Kt_Area_Kha after 2010
+    # Production in FAO_AgProd_Kt_Area_Kha before 2010 was used
+    FAO_AgProd_Kt_Area_Kha %>%
+      filter(element == "Production") %>%
+      filter(year < Min_SUA_Year) %>%
+      select(c(names(FAO_SUA_Kt_2010to2019), "item")) %>%
+      bind_rows(
+        # For after 2010
+        # Note that not all meat items came from QCL_PROD (unlike primary crops)
+        # E.g., meat Eq, offals (livers chicken), etc. were from derivation or SCL
+        # But all items should exist in Bal_new_all
+        # And Bal_new_all is identical to QCL_PROD for primary productions
+        FAO_SUA_Kt_2010to2019 %>%
+          filter(element == "Production") %>%
+          # ensure we at least have a complete series across time otherwise it may
+          # throw off moving avg calculations
+          complete(area_code = Area_Region_Map$area_code, year = pull(., year) %>% unique(), nesting(item_code, element), fill=list(value=0)) %>%
+          left_join_error_no_match(SUA_item_code_map, by = c("item_code"))) %>%
+      bind_rows(
+        # bind fodder crops for after 2010
+        FAO_AgProd_Kt_Area_Kha %>%
+          filter(item_set == "QCL_COMM_CROP_PRIMARY_FODDER",
+                 element == "Production") %>%
+          filter(year >= Min_SUA_Year) %>%
+          select(c(names(FAO_SUA_Kt_2010to2019), "item"))
+      ) %>%
+      # Inner join works as filter here
+      # Keep subsector info for crops
+      inner_join(Primary_Item_CROP %>%
+                   bind_rows(Primary_Item_MEAT %>%
+                               mutate(GCAM_subsector = GCAM_commodity)),
+                 by = "item") %>%
+      # add in iso and gcam regions ID
+      left_join_error_no_match(Area_Region_Map, by = "area_code") ->
+      FAO_AgProd_Kt_All
+
+    FAO_AgProd_Kt_All %>%
+      dplyr::group_by_at(vars(region, year, GCAM_commodity, element, CropMeat)) %>%
+      summarise(value = sum(value), .groups = "drop") ->
+      QCL_PROD_GCAM
+
+    FAO_AgProd_Kt_All %>%
+      select(-region) ->
+      FAO_AgProd_Kt_All
+
+    assertthat::assert_that(
+      GCAM_AgLU_SUA_APE_1973_2019 %>%
+        filter(element == "Production") %>%
+        left_join_error_no_match(
+          QCL_PROD_GCAM %>% filter(CropMeat != "Crop_Fodder") %>%
+            select(-CropMeat) %>%
+            rename(value1 = value) %>%
+            complete(nesting(region, year, element), GCAM_commodity, fill = list(value1 = 0)),
+          by = c("region", "GCAM_commodity", "year", "element")) %>%
+        mutate(diff = abs(value1 - value)) %>%
+        filter(diff > 0.0001) %>% nrow() == 0,
+        msg = "Primary production from two sources
+              (GCAM_AgLU_SUA_APE_1973_2019 and FAO_AgProd_Kt_Area_Kha) are inconsistent." )
+
+    ## b. All area harvested ----
+
+    assertthat::assert_that(
+      all(Primary_Item_CROP %>% pull(item) %in%
+            c(FAO_AgProd_Kt_Area_Kha %>%
+                filter(item_set %in% c("QCL_COMM_CROP_PRIMARY",
+                                       "QCL_COMM_CROP_PRIMARY_FODDER")) %>%
+                pull(item)) ),
+      msg =  "Not all required primary crop items included in FAO_AgProd_Kt_Area_Kha" )
+
+    FAO_AgProd_Kt_Area_Kha %>%
+      filter(element == "Area harvested") %>%
+      select(c(names(FAO_SUA_Kt_2010to2019), "item")) %>%
+      # Keep subsector info for crops
+      inner_join(Primary_Item_CROP, by = "item") %>%
+      # add in iso and gcam regions ID
+      left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") ->
+      FAO_AgArea_Kha_All
+
+    #****************************----
+    #Section6 Connect food items and macronutrient rates ----
+
+    # 6.1 Separate FAO food items into GCAM food items and NEC for macronutrient ----
+    # GCAM included most of the food items
+    # All food item with available macronutrient info from FAOSTAT are included
+
+    # a. Get all GCAM SUA items from the mapping by binding both source and sink items
+    # about 486 items (out of 530) used in GCAM
+
+    Mapping_SUA_PrimaryEquivalent %>%
+      select(GCAM_commodity, item = source_item) %>%
+      bind_rows(Mapping_SUA_PrimaryEquivalent %>%
+                  select(GCAM_commodity, item = sink_item)) %>%
+      distinct() ->
+      SUA_Items_GCAM
+
+    assertthat::assert_that(
+      SUA_Items_GCAM %>% distinct(item) %>% nrow() == SUA_Items_GCAM %>% nrow(),
+      msg = "Check duplicates in Mapping_SUA_PrimaryEquivalent SUA items"
+    )
+
+    # highly processed products or other products are not included in GCAM
+    # (e.g., wine, infant food, or other nonfood items etc.)
+
+    SUA_item_code_map %>%
+      filter(!item %in% unique(SUA_Items_GCAM$item)) -> SUA_Items_NonGCAM
+
+    # b. There are 426 FAO food items, all included in FAO_SUA_Kt_2010to2019 (530 items)
+    # SUA_Items_Food includes both GCAM and NonGCAM(NEC)
+    SUA_item_code_map %>%
+      filter(item_code %in% unique(GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean$item_code)) %>%
+      left_join(SUA_Items_GCAM, by = "item") %>%
+      # For NA GCAM_commodity: not elsewhere classified (NEC)
+      # So we would know % of food calories not included in GCAM commodities
+      mutate(GCAM_commodity = if_else(is.na(GCAM_commodity), "NEC", GCAM_commodity)) ->
+      SUA_Items_Food
+
+
+    # 6.2 Get macronutrient values ----
+
+    ### a. Get world average macronutrient ----
+    # For filling in missing values
+
+    GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>%
+      tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>%
+      group_by(item, item_code, macronutrient) %>%
+      summarise(macronutrient_value_World = mean(macronutrient_value), .groups = "drop") %>%
+      ungroup() ->
+      SUA_food_macronutrient_rate_World
+
+
+    ### b. Calculate SUA food Calories consumption by joining macronutrient rates and SUA food ----
+
+    FAO_SUA_Kt_2010to2019 %>%
+      filter(element == "Food", item_code %in% SUA_Items_Food$item_code) %>%
+      # ensure we at least have a complete series across time otherwise it may
+      # throw off moving avg calculations
+      complete(area_code = Area_Region_Map$area_code, year = pull(., year) %>% unique(), nesting(item_code, element), fill=list(value=0)) %>%
+      rename(Food_Kt = value) %>%
+      select(-element) %>%
+      left_join_error_no_match(SUA_Items_Food, by = c("item_code")) %>%
+      repeat_add_columns(
+        tibble(macronutrient = c("calperg", "fatperc", "proteinperc"))) %>%
+      left_join(
+        GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>%
+          tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc),
+        by = c("area_code", "item_code", "item", "macronutrient")
+      ) %>%
+      left_join_error_no_match(SUA_food_macronutrient_rate_World,
+                               by = c("item_code", "item", "macronutrient")) %>%
+      mutate(macronutrient_value = if_else(is.na(macronutrient_value),
+                                           macronutrient_value_World,
+                                           macronutrient_value),
+             # calculate total Cal, protein and fat in food
+             # value was in 1000 ton or 10^ 9 g
+             value = macronutrient_value * Food_Kt,
+             value = if_else(macronutrient %in% c("fatperc", "proteinperc"),
+                             value / 100 /1000, value)) %>% # unit from perc to Mt
+      select(-macronutrient_value, -macronutrient_value_World, -Food_Kt) %>%
+      # rename element with units
+      mutate(macronutrient = case_when(
+        macronutrient == "calperg" ~ "MKcal",
+        macronutrient == "fatperc" ~ "MtFat",
+        macronutrient == "proteinperc" ~ "MtProtein" )) %>%
+      left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") ->
+      FAO_Food_Macronutrient_All_2010_2019
+
+    ### c. Get the max values of macronutrient conversion rate (per GCAM_commodity) ----
+    # This will be used later as an upper bound to improve the data
+    GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>%
+      tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>%
+      left_join_error_no_match(SUA_Items_Food,
+                               by = c("item_code", "item")) %>%
+      group_by(GCAM_commodity, macronutrient) %>%
+      summarise(max_macronutrient_value = max(macronutrient_value), .groups = "drop") ->
+      FAO_Food_MacronutrientRate_2010_2019_MaxValue
+
+
+    #****************************----
+    # Produce outputs ----
+    #*******************************
+
+    GCAM_AgLU_SUA_APE_1973_2019 %>%
+      add_title("GCAM_AgLU_SUA_APE_1973_2019") %>%
+      add_units("kton") %>%
+      add_comments("Supply utilization balance for GCAM commodities and regions in primary equivalent") %>%
+      add_precursors("aglu/AGLU_ctry",
+                     "common/iso_GCAM_regID",
+                     "common/GCAM_region_names",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020",
+                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent",
+                     "aglu/FAO/SUA_item_code_map",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009",
+                     "aglu/FAO/Mapping_item_FBS_GCAM") ->
+      GCAM_AgLU_SUA_APE_1973_2019
+
+    FAO_AgProd_Kt_All %>%
+      add_title("FAO_AgProd_Kt_All") %>%
+      add_units("1000 tonnes") %>%
+      add_comments("Supply utilization balance for GCAM commodities and regions in primary equivalent") %>%
+      add_precursors("aglu/AGLU_ctry",
+                     "common/iso_GCAM_regID",
+                     "aglu/FAO/FAO_ag_items_PRODSTAT",
+                     "aglu/FAO/FAO_an_items_PRODSTAT",
+                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_96Regs_16FodderItems_1973to2020",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020") ->
+      FAO_AgProd_Kt_All
+
+    FAO_AgArea_Kha_All %>%
+      add_title("FAO_AgArea_Kha_All") %>%
+      add_units("1000 ha") %>%
+      add_comments("Harvested area") %>%
+      add_precursors("aglu/AGLU_ctry",
+                     "common/iso_GCAM_regID",
+                     "aglu/FAO/FAO_ag_items_PRODSTAT",
+                     "aglu/FAO/FAO_an_items_PRODSTAT",
+                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_96Regs_16FodderItems_1973to2020",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020") ->
+      FAO_AgArea_Kha_All
+
+    FAO_Food_Macronutrient_All_2010_2019 %>%
+      add_title("GCAM_AgLU_SUA_APE_1973_2019") %>%
+      add_units("MKcal, MtFat, MtProtein") %>%
+      add_comments("Macronutrient consumption values connected to food consumption in GCAM_AgLU_SUA_APE_1973_2019") %>%
+      add_precursors("aglu/AGLU_ctry",
+                     "common/iso_GCAM_regID",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean",
+                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent") ->
+      FAO_Food_Macronutrient_All_2010_2019
+
+    FAO_Food_MacronutrientRate_2010_2019_MaxValue %>%
+      add_title("FAO_Food_MacronutrientRate_2010_2019_MaxValue") %>%
+      add_units("cal per g, fat perc. , protein perc.") %>%
+      add_comments("The max value of macronutrient conversion rate across region, year, and SUA items (per GCAM_commodity") %>%
+      add_precursors("aglu/AGLU_ctry",
+                     "common/iso_GCAM_regID",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019",
+                     "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean",
+                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent") ->
+      FAO_Food_MacronutrientRate_2010_2019_MaxValue
+
+    return_data(MODULE_OUTPUTS)
+
+
+  } else {
+    stop("Unknown command")
+  }
+}
diff --git a/R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R b/R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R
deleted file mode 100644
index 8dd7d67d..00000000
--- a/R/zchunk_LA100.FAO_SUA_PrimaryEquivalent.R
+++ /dev/null
@@ -1,1034 +0,0 @@
-# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.
-
-#' module_aglu_L100_FAOSTAT_SUA_PrimaryEquivalent
-#'
-#' Generate supply utilization balance in primary equivalent
-#'
-#' @param command API command to execute
-#' @param ... other optional parameters, depending on command
-#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
-#'   \code{command} is "MAKE") all the generated outputs: \code{GCAM_AgLU_SUA_APE_2010_2019},
-#'   \code{FAO_AgProd_Kt_All},\code{FAO_AgArea_Kha_All},\code{FAO_Food_Macronutrient_All_2010_2019},
-#'   \code{FAO_Food_MacronutrientRate_2010_2019_MaxValue}
-#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
-#' A method to generate primary equivalent is created for the new FAOSTAT supply utilization data (2010 to 2019).
-#' New SUA balance is connected to the old one (before 2010). Production and harvested area data with FAO region and item
-#' for primary production are provided. For FAO food items, macronutrient values are calculated at SUA item level.
-#' Data processing was consistent across scales. Note that GCAM regions and commodities in aggregation mapping can
-#' be changed in corresponding mappings. The output data is not averaged over time.
-#' @importFrom assertthat assert_that
-#' @importFrom dplyr bind_rows filter if_else inner_join left_join mutate rename select n group_by_at vars case_when arrange
-#' @importFrom tidyr complete drop_na gather nesting spread replace_na
-#' @importFrom tibble tibble
-#' @author XZ 2022
-module_aglu_L100_FAOSTAT_SUA_PrimaryEquivalent <- function(command, ...) {
-
-  MODULE_INPUTS <-
-    c(FILE = "aglu/AGLU_ctry",
-      FILE = "common/iso_GCAM_regID",
-      FILE = "common/GCAM_region_names",
-      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019",
-      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020",
-      FILE = "aglu/FAO/Mapping_SUA_PrimaryEquivalent",
-      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020",
-      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009",
-      FILE = "aglu/FAO/Mapping_item_FBS_GCAM",
-      FILE = "aglu/FAO/FAO_ag_items_PRODSTAT",
-      FILE = "aglu/FAO/FAO_an_items_PRODSTAT",
-      FILE = "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean"
-    )
-
-  MODULE_OUTPUTS <-
-    c("GCAM_AgLU_SUA_APE_2010_2019",
-      "FAO_AgProd_Kt_All",
-      "FAO_AgArea_Kha_All",
-      "FAO_Food_Macronutrient_All_2010_2019",
-      "FAO_Food_MacronutrientRate_2010_2019_MaxValue")
-
-  if(command == driver.DECLARE_INPUTS) {
-    return(MODULE_INPUTS)
-  } else if(command == driver.DECLARE_OUTPUTS) {
-    return(MODULE_OUTPUTS)
-  } else if(command == driver.MAKE) {
-
-    year <- value <- Year <- Value <- FAO_country <- iso <- NULL    # silence package check.
-
-    all_data <- list(...)[[1]]
-
-    # Load required inputs ----
-
-    lapply(MODULE_INPUTS, function(d){
-      # get name as the char after last /
-      nm <- tail(strsplit(d, "/")[[1]], n = 1)
-      # get data and assign
-      assign(nm, get_data(all_data, d, strip_attributes = T),
-             envir = parent.env(environment()))  })
-
-    # Section1: [2010-2019] Region aggregation of supply-utilization-accounting data ----
-
-    # 1.1. Helper functions for SUA regional aggregation ----
-
-    #' Adjust gross trade in SUA data to ensure regional export is smaller than production for an SUA item
-    #' @description The function is used as an option in SUA_REG_Agg
-    #' @param .DF input supply-utilization accounting data frame
-    #' @return Trade-adjusted supply-utilization accounting data frame
-
-    RM_GTOSS_TRADE_TRIANGLE_INEQUALITY <- function(.DF){
-
-      # need to remove gross trade when export > production
-      # to maintain triangle the inequality rule
-      .DF %>% group_by(area, year, item) %>%
-        # Calculate GrossTradeRM when prod - export < 0 and element in export and import
-        # Note that when import < export, e.g., export from stock or discrepancy, import is used for adjustments
-        mutate(GrossTradeRM = pmax(value[element == "Production"] - value[element == "Export"],
-                                   -value[element == "Import"])) %>%
-        mutate(GrossTradeRM = ifelse(GrossTradeRM < 0 & element %in% c("Export", "Import"),
-                                     GrossTradeRM, 0)) %>%
-        # Remove both import and export
-        mutate(value = value + GrossTradeRM) %>% select(-GrossTradeRM) %>%
-        ungroup()
-    }
-
-
-    #' Regional SUA aggregation (e.g., 32 in GCAM) with an option of removing intra-regional trade
-    #' @description intra-regional trade, by default, removed only for the SUA items with bilateral trade data available.
-    #' E.g., for 32 regions, if removed, global trade becomes inter-32-region trade.
-    #' @param .DF_SUA input supply-utilization accounting data frame
-    #' @param .RM_IntraRegTrade If TRUE, intra-regional trade is removed after aggregation
-    #' @param .TM_Bilateral A data frame of bilateral trade data
-    #' @param .RM_TRIANGLE_INEQUALITY If TRUE, adjust gross trade in SUA data to ensure regional export is smaller than production for an SUA item
-    #' @return
-
-    SUA_REG_Agg <- function(.DF_SUA,
-                            .RM_IntraRegTrade = TRUE,
-                            .TM_Bilateral,
-                            .RM_TRIANGLE_INEQUALITY = TRUE) {
-      # Assertion
-      # iso_GCAM_regID specified regional aggregation mapping
-      # Need to get intra-regional area-source pairs based on the info
-      assertthat::assert_that(is.data.frame(AGLU_ctry))
-      assertthat::assert_that(is.data.frame(iso_GCAM_regID))
-      assertthat::assert_that(is.data.frame(GCAM_region_names))
-      if (.RM_IntraRegTrade == TRUE) {
-        assertthat::assert_that(is.data.frame(.TM_Bilateral))
-      }
-      if (.RM_TRIANGLE_INEQUALITY == TRUE) {
-        assertthat::assert_that(is.function(RM_GTOSS_TRADE_TRIANGLE_INEQUALITY))
-      }
-
-
-      # Aggregation based on region mapping ----
-      .DF_SUA %>%
-        # Join GCAM region mappings
-        gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
-        gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>%
-        gcamdata::left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>%
-        dplyr::group_by_at(vars(area = region, year, item, element)) %>%
-        summarise(value = sum(value), .groups = "drop") ->
-        DF_SUA_Agg
-
-      if (.RM_IntraRegTrade == TRUE) {
-        ## If removing intra-regional trade after aggregation ----
-
-        ### Get bilateral pairs that need subtraction  ----
-        .TM_Bilateral %>% distinct(area, source) %>%
-          gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
-          gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>%
-          select(-iso) %>%
-          gcamdata::left_join_error_no_match(AGLU_ctry %>% select(source = FAO_country, iso), by = "source") %>%
-          gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, Source_GCAM_region_ID = GCAM_region_ID),
-                                             by = "iso") %>%
-          filter(GCAM_region_ID == Source_GCAM_region_ID) %>%
-          select(-GCAM_region_ID, -Source_GCAM_region_ID, -iso) ->
-          TM_intra_REG_pair
-
-        ### Get trade flow and aggregate to gross and aggregated region----
-        .TM_Bilateral %>% inner_join(TM_intra_REG_pair, by = c("area", "source")) -> TM_bilateral_rm
-
-        # gross trade agg.
-        TM_bilateral_rm %>% # Trade is balanced if aggregating bilateral
-          mutate(element = "Import") %>%
-          group_by(area_code, area, item_code, item, element, year) %>%
-          summarise(TCL = sum(value, na.rm = T), .groups = "drop") %>%
-          bind_rows(
-            TM_bilateral_rm %>%
-              mutate(element = "Export") %>%
-              group_by(
-                area_code = source_code,
-                area = source,
-                item_code,
-                item,
-                element,
-                year
-              ) %>%
-              summarise(TCL = sum(value, na.rm = T), .groups = "drop")
-          ) %>%
-          # regional agg.
-          gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
-          gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>%
-          left_join(GCAM_region_names, by = "GCAM_region_ID") %>%
-          dplyr::group_by_at(vars(area = region, year, item, element)) %>%
-          summarise(TCL = sum(TCL), .groups = "drop") ->
-          TCL_rm
-
-        ### Join aggregated data and subtract intra-regional trade ----
-        ## be careful with the unit
-        DF_SUA_Agg %>% left_join(TCL_rm, by = c("area", "year", "item", "element")) %>%
-          mutate(value = if_else(is.na(TCL), value, value - TCL / 1000)) %>%
-          select(-TCL) ->
-          DF_SUA_Agg
-      }
-
-      if (.RM_TRIANGLE_INEQUALITY == TRUE) {
-        ### Remove gross trade to comply with TRIANGLE_INEQUALITY ----
-        ## Remove gross trade to ensure prod > export
-        DF_SUA_Agg %>%
-          RM_GTOSS_TRADE_TRIANGLE_INEQUALITY ->
-          DF_SUA_Agg1
-        return(DF_SUA_Agg1)
-      } else {
-        return(DF_SUA_Agg)
-      }
-    }
-
-
-    # 1.2. Execution: regional aggregation ----
-
-    # Get SUA data ready
-    FAO_SUA_Kt_2010to2019 <-
-      GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019 %>%
-      gather_years() %>%
-      filter(!is.na(value)) # filter out nonexist regions years due to gather e.g., USSR after 1991
-    # Get bilateral trade data ready
-    FAO_BiTrade_Kt_2010to2019 <-
-      GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020 %>%
-      gather_years() %>%
-      filter(!is.na(value))
-    ## Aggregate to *FAO_SUA_Kt_2010to2019_R ----
-    FAO_SUA_Kt_2010to2019_R <-
-      SUA_REG_Agg(
-        .DF_SUA =  FAO_SUA_Kt_2010to2019,
-        .RM_IntraRegTrade = TRUE,
-        .TM_Bilateral = FAO_BiTrade_Kt_2010to2019
-      )
-    ## Clean up
-    rm(FAO_BiTrade_Kt_2010to2019)
-    ## Done Section1 ----
-    #****************************----
-
-    # Section2: [2010-2019] Primary equivalent aggregation to GCAM commodities ----
-
-    # 2.1 Helper functions for SUA primary equivalent aggregation ----
-
-    #' Check and assert SUA data frame structure
-    #' @param .DF Input supply-utilization accounting data frame
-    #' @return Logical T or F
-
-    is.SUA <- function(.DF){
-      # Generate SUA class
-
-      assertthat::assert_that(is.data.frame(.DF))
-
-      # Data col names check
-      assertthat::assert_that(all( c("element", "value")%in% names(.DF)))
-      assertthat::assert_that(any( c("item", "item_code", "GCAM_commodity")%in% names(.DF)))
-
-      # Has all elements
-      c("Opening stocks", "Production", "Import",
-        "Export", "Processed", "Food", "Feed", "Seed", "Other uses", "Loss", "Closing stocks",
-        "Stock Variation") ->
-        Bal_element
-
-      assertthat::assert_that( all(Bal_element %in% unique(.DF$element)))
-
-    }
-
-    #' Get extraction rate
-    #' @description Gross extraction rate is calculated for domestic, traded, and lagged values.
-    #' By gross, it means sink items are aggregated.
-    #' The function is used in Proc_primarize.
-    #' @param .DF Input supply-utilization accounting data frame with one tier of processing
-    #' @param .source_item Source items or primary items in the processing
-    #' @param .sink_item Sink items or processed items in the processing
-    #' @return A data frame including regional, traded, and world extraction rates of a processing
-
-    Get_GROSS_EXTRACTION_RATE <- function(.DF, .source_item, .sink_item){
-
-      assertthat::assert_that(is.SUA(.DF))
-      assertthat::assert_that(is.data.frame(Mapping_SUA_PrimaryEquivalent))
-
-      Mapping_SUA_PrimaryEquivalent %>% filter(source_item %in% .source_item) %>%
-        distinct(extraction_rate_Q25, Q25asMin) %>%
-        mutate(minimium_extraction_rate = if_else(Q25asMin == T, extraction_rate_Q25, 0)) %>%
-        pull(minimium_extraction_rate) -> MIN_EXTRACTION_RATE
-
-      .DF %>%
-        #Prepare data to calculate regional, traded, and world average extraction rates
-        filter((element %in% c("Production", "Export") & item %in% .sink_item) |
-                 (element == "Processed" & item %in% .source_item)) %>%
-        group_by(area, year, element) %>%
-        # sum across year and also sink or source items
-        summarise(value = sum(value), .groups = "drop") %>%
-        spread(element, value) %>%
-        group_by(year) %>%
-        mutate(extraction_rate_world = sum(Production) / sum(Processed)) %>%
-        # in case sum(Processed) or sum(Production) == 0
-        mutate(extraction_rate_world = if_else(extraction_rate_world != 0 & is.finite(extraction_rate_world),
-                                               extraction_rate_world, 1)) %>%
-        # Regional extraction rate = prod of an aggregated processed item  / Processed use of an aggregated primary item
-        # Use world average to fill in NA or zero
-        mutate(extraction_rate = if_else(is.na(Production / Processed), extraction_rate_world, Production / Processed)) %>%
-        mutate(extraction_rate = if_else(extraction_rate == 0, extraction_rate_world, extraction_rate)) %>%
-        # Using minimum extraction rate here
-        mutate(minimium_extraction_rate = MIN_EXTRACTION_RATE) %>%
-        mutate(extraction_rate = pmax(MIN_EXTRACTION_RATE, extraction_rate)) %>%
-        # Calculate traded extraction rate
-        mutate(extraction_rate_trade = sum(Export) / sum(Export / extraction_rate),
-               extraction_rate_trade = if_else(is.na(extraction_rate_trade), extraction_rate, extraction_rate_trade),
-               # both processed and production > 0
-               positive_prod = if_else(Production >0 & Processed > 0 , T, F) ) %>%
-        ungroup() %>%
-        # Calculate lagged extraction_rate but replace NA with current rate (first period)
-        group_by(area) %>%
-        mutate(extraction_rate_lag = lag(extraction_rate)) %>%
-        mutate(extraction_rate_lag = if_else(is.na(extraction_rate_lag), extraction_rate, extraction_rate_lag)) %>%
-        ungroup() %>%
-        select(area, year, extraction_rate, extraction_rate_lag, extraction_rate_trade, extraction_rate_world, positive_prod)
-    }
-
-
-    #' Separate the SUA balance into domestic and imported balanced for sink_item
-    #' @description The function is used in Proc_primarize
-    #' @param .DF Input supply-utilization accounting data frame with one tier of processing
-    #' @param .SINK_ITEM Sink items or processed items in the processing
-    #' @return SUA DF
-
-    Get_ARMINGTON_BALANCE <- function(.DF, .SINK_ITEM){
-
-      assertthat::assert_that(is.SUA(.DF))
-      Import_Demand_Item <- c("Food", "Feed", "Processed", "Other uses", "Seed", "Loss")
-
-      .DF %>% filter(item %in% .SINK_ITEM) %>%
-        group_by(area, year, item) %>%
-        # Calculate imported consumption share
-        # The assumption is that a portion of Import_Demand_Items was imported
-        # so they need to be scaled by an international extraction rate
-        # Note that stock variation is not included in import consumption to maintain stock balance
-        # so additional adjustment may be needed
-        mutate(Import_Demand_Share = value[element == "Import"] /
-                 sum(value[element %in% Import_Demand_Item])) %>%
-        # replace NA and inf
-        mutate(Import_Demand_Share = if_else(!is.finite(Import_Demand_Share), 0,
-                                             Import_Demand_Share)) %>%
-        # The share should be small than 1 though outlier regions may import for storage
-        mutate(Import_Demand_Share = pmin(Import_Demand_Share, 1)) %>%
-        # when Import_Demand_Item consumption < Import they are used to share out Import consumptions
-        # otherwise, Residuals is used for adjustments
-        mutate(bal_import = case_when(
-          element %in% c("Import") ~ value,
-          element %in% Import_Demand_Item ~ Import_Demand_Share * value,
-          TRUE ~ 0) ) %>%
-        mutate(bal_import = if_else(element %in% c("Residuals"),
-                                    bal_import[element == "Import"] - sum(bal_import[element %in% Import_Demand_Item]),
-                                    bal_import )) %>%
-        select(-Import_Demand_Share) %>%
-        # Calculate domestic balance
-        mutate(bal_domestic = value - bal_import) %>%
-        select(-value) %>% ungroup() %>%
-        tidyr::gather(bal_source, value, bal_import, bal_domestic) %>%
-        # Clean the bal items
-        spread(element, value) %>%
-        mutate(`Regional supply` = `Opening stocks` + Production + `Import`,
-               `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`,
-               Residuals = `Regional supply` -  `Regional demand`) %>%
-        tidyr::gather(element, value, -area, -item, -year, -bal_source)
-
-    }
-
-
-    #' Separate the domestic SUA balance into current and lagged balanced for sink_item
-    #' @description The function is used in Proc_primarize
-    #' @param .DF Output from Get_ARMINGTON_BALANCE. Input supply-utilization accounting data frame with one tier of processing and
-    #' @param .SINK_ITEM Sink items or processed items in the processing
-    #' @return SUA DF
-
-    Get_STOCK_BALANCE <- function(.DF, .SINK_ITEM){
-
-      assertthat::assert_that(is.SUA(.DF))
-      Opening_Stock_Item <- c("Food", "Feed", "Processed", "Other uses", "Seed", "Loss")
-
-      .DF %>% filter(item %in% .SINK_ITEM,
-                     bal_source == "bal_domestic") %>%
-        select(-bal_source) %>%
-        group_by(area, year, item) %>%
-        mutate(Ostock = value[element == "Opening stocks"] ) %>%
-        mutate(Opening_Stock_Demand_Share = Ostock /
-                 sum(value[element %in% Opening_Stock_Item]))  %>%
-        mutate(Opening_Stock_Demand_Share = if_else(Ostock == 0, 0,
-                                                    Opening_Stock_Demand_Share)) %>%
-        replace_na(list(Opening_Stock_Demand_Share = 1)) %>%
-        # The share should be small than 1
-        # Other elements will be adjusted if not
-        mutate(Opening_Stock_Demand_Share = pmin(Opening_Stock_Demand_Share, 1)) %>%
-        mutate(bal_domestic_lag = case_when(
-          element %in% c("Opening stocks") ~ value,
-          element %in% Opening_Stock_Item ~ Opening_Stock_Demand_Share * value,
-          TRUE ~ 0) ) %>%
-        mutate(bal_domestic_lag = if_else(element %in% c("Residuals"),
-                                          bal_domestic_lag[element == "Opening stocks"] - sum(bal_domestic_lag[element %in% Opening_Stock_Item]),
-                                          bal_domestic_lag )) %>%
-
-        select(-Opening_Stock_Demand_Share, -Ostock) %>%
-        # Calculate domestic balance
-        mutate(bal_domestic_current = value - bal_domestic_lag) %>%
-        select(-value) %>% ungroup() %>%
-        tidyr::gather(bal_source, value, bal_domestic_lag, bal_domestic_current) %>%
-        # Clean the bal items
-        spread(element, value) %>%
-        mutate(`Regional supply` = `Opening stocks` + Production + `Import`,
-               `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`,
-               `Stock Variation` = `Closing stocks` - `Opening stocks`,
-               Residuals = `Regional supply` -  `Regional demand`) %>%
-        tidyr::gather(element, value, -area, -item, -year, -bal_source) %>%
-        bind_rows(
-          .DF %>% filter(item %in% .SINK_ITEM,
-                         bal_source != "bal_domestic")  )
-    }
-
-
-    #' Primary equivalent aggregation
-    #' @param .DF Input supply-utilization accounting data frame with one tier of processing
-    #' @param .SOURCE_ITEM Source items or primary items in the processing
-    #' @param .SINK_ITEM Sink items or processed items in the processing
-    #' @param .OUTPUT_SPECIFIC_EXTRACTION_RATE A data frame with item and output_specific_extraction_rate.
-    #' # In some cases, prescale sink item SUA using output_specific_extraction_rate can improve the processing.
-    #' # e.g., when coproduction shares are not fixed.
-    #' @return A supply-utilization accounting data frame of a data tier in source item equivalent
-
-    Proc_primarize <- function(.DF, .SOURCE_ITEM, .SINK_ITEM,
-                               .OUTPUT_SPECIFIC_EXTRACTION_RATE = NA){
-
-      ## Assertion
-      assertthat::assert_that(is.SUA(.DF))
-      assertthat::assert_that(is.character(.SOURCE_ITEM))
-      assertthat::assert_that(is.character(.SINK_ITEM))
-      # .DF has all items needed
-      assertthat::assert_that(all(c(.SOURCE_ITEM, .SINK_ITEM) %in% c(.DF %>% ungroup() %>%  distinct(item) %>% pull)))
-      assertthat::assert_that(is.function(Get_ARMINGTON_BALANCE))
-      assertthat::assert_that(is.function(Get_STOCK_BALANCE))
-      assertthat::assert_that(is.function(Get_GROSS_EXTRACTION_RATE))
-
-      ## a. Pre-scale sink item data when .OUTPUT_SPECIFIC_EXTRACTION_RATE is available ----
-      if (is.data.frame(.OUTPUT_SPECIFIC_EXTRACTION_RATE)) {
-        .DF %>% left_join(.OUTPUT_SPECIFIC_EXTRACTION_RATE, by = "item") %>%
-          replace_na(list(output_specific_extraction_rate = 1)) %>%
-          mutate(value = value / output_specific_extraction_rate) %>%
-          select(-output_specific_extraction_rate) ->
-          .DF
-      }
-
-      ## b. For the sink items of the tier, separate balance into domestic and imported ----
-      # Note that the method here relies on Get_GROSS_EXTRACTION_RATE and Get_ARMINGTON_BALANCE
-      .DF %>% Get_ARMINGTON_BALANCE(.SINK_ITEM) %>%
-        Get_STOCK_BALANCE(.SINK_ITEM) %>%
-        # Join extraction rate for the tier
-        gcamdata::left_join_error_no_match(
-          # Get extraction rate for domestic and traded
-          # Note that extraction rates are mean values across time
-          # Note that regional extraction rate could be inf
-          # It is likely due to data inconsistency, e.g., zero processed in source but positive sink
-          # No adjustments were made since 1/inf become zero in the scaling process, preserving primary balance
-          .DF %>% Get_GROSS_EXTRACTION_RATE(.SOURCE_ITEM, .SINK_ITEM) %>%
-            select(area, year,
-                   bal_domestic_current = extraction_rate,
-                   bal_domestic_lag = extraction_rate_lag,
-                   bal_import = extraction_rate_trade) %>%
-            tidyr::gather(bal_source, extraction_rate, bal_domestic_current, bal_domestic_lag, bal_import),
-          by = c("area", "year", "bal_source")
-        ) %>%
-        # Scale sink items to get source item equivalent
-        mutate(value = if_else(item %in% .SINK_ITEM, value / extraction_rate, value)) %>%
-        select(-extraction_rate) -> .df1
-
-      ## c. Aggregate sink_items are aggregated into "sink_item" ----
-      # And production & processed are adjusted for primary aggregation
-      # Bind source items as well
-      .df1 %>% mutate(item = replace(item, item %in% .SINK_ITEM, "sink_item")) %>%
-        group_by(area, year, item, element) %>%
-        summarise(value = sum(value), .groups = "drop") %>%
-        # Move production to negative processed for aggregation
-        group_by(area, year, item) %>%
-        mutate(value = if_else(item == "sink_item" & element %in% c("Production", "Processed"),
-                               value - value[element == "Production"], value)) %>%
-        ungroup() %>%
-        # Bind source items and other items not used
-        bind_rows(.DF %>% filter(!item %in% .SINK_ITEM))-> .df2
-
-      ## d. Merge sink SUA into source items SUA  ----
-      # Note that with multiple source items, sinks are aggregated into sources based on average processed shares across sources
-      # Prepare data to calculate world average source share
-      .df2 %>% filter(element == "Processed", item %in% .SOURCE_ITEM)  %>%
-        group_by(item) %>% mutate(value = sum(value)) %>% ungroup() %>%
-        group_by(area, year) %>%
-        mutate(share = value/sum(value)) %>%
-        # in case of inf. use simple share
-        mutate(share = if_else(is.finite(share), share, dplyr::n()/sum(dplyr::n()))) %>%
-        ungroup() %>%
-        select(-element, - value) %>%
-        # full join is needed since item (left) and element(right) are needed
-        full_join(.df2 %>% filter(item == "sink_item") %>%
-                    select(-item), by = c("area", "year")) %>%
-        mutate(value = share * value) %>% select(-share) %>%
-        # Bind source item and aggregated across source & sink items based on primary equivalent
-        bind_rows(.df2 %>% filter(item != "sink_item")) %>%
-        group_by(area, year, item, element) %>%
-        summarise(value = sum(value), .groups = "drop") -> .df3
-
-      .df3 %>%
-        spread(element, value) %>%
-        mutate(`Regional supply` = `Opening stocks` + Production + `Import`,
-               `Regional demand` = `Export` + Feed + Food + Loss + Processed + Seed + `Other uses` +`Closing stocks`,
-               Residuals = `Regional supply` -  `Regional demand`) %>%
-        tidyr::gather(element, value, -area, -item, -year) ->
-        .df4
-
-      ## e. Return data
-      return(.df4)
-
-      ## Done Proc_primarize function
-
-    }
-
-
-    ### FN for SUA primary equivalent aggregation
-
-    #' Recursively aggregate nest into primary equivalent using from bottom to top
-    #' @description The function is a recursive wrapper of Proc_primarize
-    #' @param .COMM Primary commodity in the FAO item mapping (APE_comm).
-    #' @param .MAPPING Mapping of SUA items to primary products by level of nestings. Mapping_SUA_PrimaryEquivalent was constructed.
-    #' @param .DF_SUA Input SUA data frame
-    #' @param .ITEM_SUFFIX Adding a suffix (_APE) for item in primary equivalent in output. Note that an aggregated primary commodity is also kept.
-    #' @param .RECUR_TOP_NEST Bottom nest_level in mapping. If 1, all the way through APE but if 2 only run to top but 1 nest
-    #' @return SUA data frame in primary equivalent
-
-    recursively_primarize <- function(.COMM,
-                                      .MAPPING = Mapping_SUA_PrimaryEquivalent,
-                                      .DF_SUA = Bal_new_all,
-                                      .ITEM_SUFFIX = "_APE",
-                                      .RECUR_TOP_NEST = 1
-    ) {
-
-
-      ## Assertion
-      # Need Proc_primarize exist
-      assertthat::assert_that(is.function(Proc_primarize))
-      assertthat::assert_that(is.character(.COMM))
-      # Required format for .MAPPING
-      assertthat::assert_that(is.data.frame(.MAPPING))
-      assertthat::assert_that(all(c("APE_comm", "nest_level",
-                                    "source_item", "sink_item",
-                                    "output_specific_extraction_rate") %in% names(.MAPPING)))
-
-      ## a. Get data for all commodities, both primary and processed, needed ----
-      COMM_ALL <-
-        .MAPPING %>%
-        filter(APE_comm == .COMM) %>%
-        select(source_item, sink_item) %>% unlist() %>%
-        unique()
-      assertthat::assert_that(length(COMM_ALL) >0, msg = "Item not exist; check mapping")
-      # Subset of relevant data only and assert all item need exist
-      assertthat::assert_that(is.data.frame(.DF_SUA))
-      DF_BAL <-
-        .DF_SUA %>% filter(item %in% COMM_ALL)
-      assertthat::assert_that(length(setdiff(COMM_ALL, unique(DF_BAL$item))) == 0)
-
-
-      ## b. Get total number of nests based on nest_level in .MAPPING ----
-      .MAPPING %>%
-        filter(APE_comm == .COMM) %>% pull(nest_level) %>% max ->
-        nNests
-
-      ## c. Get primary commodities set ----
-      .MAPPING %>% filter(source_primary == TRUE) %>%
-        distinct(source_item) %>% pull  %>%
-        intersect(COMM_ALL) ->
-        COMM_primary
-
-      ## d. Wrapper function of Proc_primarize to use nest_level and mapping inputs----
-      Proc_primarize1 <- function(.DF, .COMM, .NEST_LEVEL){
-
-        .MAPPING %>%
-          filter(APE_comm == .COMM, nest_level == .NEST_LEVEL) ->
-          .SUA
-
-        ### Get OUTPUT_SPECIFIC_EXTRACTION_RATE ----
-        .SUA %>%
-          filter(!is.na(output_specific_extraction_rate)) %>%
-          select(item = sink_item, output_specific_extraction_rate) ->
-          OUTPUT_SPECIFIC_EXTRACTION_RATE
-
-        .DF %>% Proc_primarize(.SOURCE_ITEM = unique(.SUA$source_item),
-                               .SINK_ITEM = unique(.SUA$sink_item),
-                               .OUTPUT_SPECIFIC_EXTRACTION_RATE = OUTPUT_SPECIFIC_EXTRACTION_RATE)
-      }
-
-      ## e. A smart recursive function ----
-      recursively_repeat <- function(.x, .reps, Proc_primarize1) {
-        if (.reps == .RECUR_TOP_NEST - 1 ) {
-          .x
-        } else {
-          recursively_repeat(Proc_primarize1(.x, .COMM,
-                                             .NEST_LEVEL = .reps),
-                             .reps - 1, Proc_primarize1)
-        }
-      }
-
-      ## f. Execute the recursive function to realize recursively_primarize ----
-      # through all nests
-      recursively_repeat(DF_BAL, .reps = nNests, Proc_primarize1) -> DF_BAL_PE
-
-      ## g. Return data ----
-      # mutate APE item if .RECUR_TOP_NEST == 1 and
-      # also bind primary commodity balance
-      # otherwise, keep item names
-      if (.RECUR_TOP_NEST == 1) {
-        return(DF_BAL_PE %>%
-                 # Update APE item name
-                 mutate(item = paste0(.COMM, .ITEM_SUFFIX)) %>%
-                 # Aggregate APE items when needed
-                 group_by(area, year, item, element) %>%
-                 summarise(value = sum(value), .groups = "drop") %>%
-                 ungroup() %>%
-                 # Keep an aggregated primary commodity
-                 bind_rows(DF_BAL %>%
-                             filter(item %in% COMM_primary) %>%
-                             mutate(item = .COMM) %>%
-                             group_by(area, year, item, element) %>%
-                             summarise(value = sum(value), .groups = "drop") %>%
-                             ungroup())  )
-      } else {
-        return(DF_BAL_PE)
-      }
-      ## Done recursively_primarize function
-    }
-
-    # 2.2. Execution: process data into APE ----
-
-
-    ## Loop through all GCAM_commodity with available data ----
-    Mapping_SUA_PrimaryEquivalent %>% distinct(GCAM_commodity) %>% pull ->
-      GCAM_commodity
-
-    lapply(GCAM_commodity, function(.GCAM_COMM){
-
-      ### Assert that items needed included in data
-      assertthat::assert_that(
-        Mapping_SUA_PrimaryEquivalent %>%
-          filter(GCAM_commodity == .GCAM_COMM) %>%
-          distinct(sink_item) %>% pull %>%
-          setdiff(FAO_SUA_Kt_2010to2019_R %>% pull(item)) %>% length() == 0
-      )
-      ### Loop and bind
-      Mapping_SUA_PrimaryEquivalent %>%
-        filter(GCAM_commodity == .GCAM_COMM) %>%
-        distinct(APE_comm) %>% pull %>%
-        # Get the APE_comm and enter them into recursively_primarize
-        lapply(FUN = recursively_primarize,
-               .DF_SUA = FAO_SUA_Kt_2010to2019_R) %>%
-        bind_rows()  %>%
-        # Aggregate APE_comm into GCAM_commodity
-        mutate(GCAM_commodity = if_else(grepl("_APE", item), .GCAM_COMM,
-                                        paste0(.GCAM_COMM, "_Primary"))) %>%
-        # Keep APE balance only
-        filter(grepl("_APE", item)) %>%
-        group_by(region = area, year, GCAM_commodity, element) %>%
-        summarise(value = sum(value), .groups = "drop")
-    }) %>% bind_rows() ->
-      GCAM_APE_after2010
-
-    rm(FAO_SUA_Kt_2010to2019_R)
-
-    ## Done Section2 ----
-    #****************************----
-
-
-    # Section4 [1970-2019] GCAM_APE SUA ----
-
-    # 4.1. Helper functions ----
-    Check_Balance_SUA <- function(.DF){
-
-      assertthat::assert_that(all(c("element") %in% names(.DF)))
-      assertthat::assert_that(all(c("Import", "Export", "Production",
-                                    "Food", "Feed", "Other uses") %in%
-                                    c(.DF %>% distinct(element) %>% pull)))
-      # 0. Check NA
-      if (isTRUE(.DF %>% filter(is.na(value)) %>% nrow == 0)) {
-        message("Good! No NA in value") } else{
-          warning("NA in value")
-        }
-
-
-      # 1. Positive value except stock variation and residues
-      if (isTRUE(.DF %>% filter(!element %in% c("Stock Variation", "Other uses")) %>%
-                 summarise(min = min(value, na.rm = T)) %>% pull(min) >= -0.001)) {
-        message("Good! Signs checked") } else{
-          warning("Negative values in key elements (not including stock variation and other uses)")
-        }
-
-      # 2. Trade balance in all year and items
-      if (isTRUE(.DF %>% filter(element %in% c("Import", "Export")) %>%
-                 group_by(year, GCAM_commodity, element) %>%
-                 summarise(value = sum(value), .groups = "drop") %>%
-                 spread(element, value) %>% filter(abs(Import - Export) > 0.0001) %>% nrow() == 0)) {
-        message("Good! Gross trade in balance") } else{
-          warning("Gross trade imbalance")
-        }
-
-      # 3. SUA balance check
-      if (isTRUE(.DF %>%
-                 spread(element, value) %>%
-                 mutate(`Regional supply` = Production + `Import`,
-                        `Regional demand` = `Export` + Feed + Food  + `Other uses`,
-                        bal = abs(`Regional supply` -  `Regional demand`)) %>%
-                 filter(bal > 0.0001) %>% nrow() == 0)) {
-        message("Good! Regional supply = Regional demand + Residuals") } else{
-          warning("Regional supply != Regional demand + Residuals")
-        }
-
-      # 4. Balanced in all dimensions
-      assertthat::assert_that(.DF %>% nrow() ==
-                                .DF %>% distinct(year) %>% nrow *
-                                .DF %>% distinct(GCAM_commodity) %>% nrow *
-                                .DF %>% distinct(element) %>% nrow *
-                                .DF %>% distinct(region) %>% nrow)
-
-    }
-
-    # 4.2. Connect and bind data from two periods ----
-
-    GCAM_AgLU_SUA_APE_2010_2019 <-
-      GCAM_APE_after2010 %>%
-      mutate(unit = "1000 tonnes") %>%
-      # clean and aggregate elements not using
-      filter(!element %in% c("Regional demand", "Regional supply",
-                             "Opening stocks", "Closing stocks")) %>%
-      mutate(element = replace(element,
-                               element %in% c("Stock Variation", "Processed",
-                                              "Seed", "Residuals", "Loss"),
-                               "Other uses")) %>%
-      dplyr::group_by_at(dplyr::vars(-value)) %>%
-      summarise(value = sum(value), .groups = "drop")
-
-    ## Check balance
-    GCAM_AgLU_SUA_APE_2010_2019 %>% Check_Balance_SUA
-    rm(GCAM_APE_after2010)
-
-
-    ## Done Section4 ----
-    #****************************----
-
-    # Section5 [1970-2019] Connect production and area data ----
-
-    # This section gets crop and livestock production before aggregation (FAO region and items)
-    # For both before 2010 and after 2010
-    # They are also aggregated to GCAM region and commodities to assert consistency
-    # The processing includes all crops (including fodder crops) and livestock items
-
-    # 5.1. Get all mapping straight ----
-
-    Primary_Item_CROP <-
-      FAO_ag_items_PRODSTAT %>%
-        select(item, GCAM_commodity, GCAM_subsector) %>%
-        filter(!is.na(item), !is.na(GCAM_commodity)) %>%
-        # Fodder grass has a duplicate as it mapped to different GTAP crops
-        distinct %>%
-        mutate(CropMeat = if_else(GCAM_commodity %in% c("FodderGrass", "FodderHerb"),
-                                  "Crop_Fodder", "Crop_NonFodder"))
-    assertthat::assert_that(
-      all(Primary_Item_CROP %>% filter(CropMeat == "Crop_NonFodder") %>%  pull(item) %in%
-        c(Mapping_SUA_PrimaryEquivalent %>% filter(source_primary == T) %>%
-          distinct(item = source_item) %>% pull)),
-      msg = "Inconsistent mapping of primary crops between FAO_ag_items_PRODSTAT and Mapping_SUA_PrimaryEquivalent" )
-
-    Primary_Item_MEAT <-
-      Mapping_SUA_PrimaryEquivalent %>%
-      # animal meat Eq since they are included as primary production after 2010
-      filter(source_primary == T | grepl("MeatEq", APE_comm)) %>%
-      distinct(GCAM_commodity, item = source_item) %>%
-      filter(GCAM_commodity %in%
-               c(FAO_an_items_PRODSTAT %>%
-                   filter(!is.na(GCAM_commodity)) %>%
-                   distinct(GCAM_commodity) %>% pull))%>%
-      mutate(CropMeat = "Meat")
-
-    # 5.2. Get primary production for all ----
-    # Connecting, mapping, arrange, and assertion
-
-    ## Bind production and area data for both fodder and nonfodder ----
-    FAO_AgProd_Kt_Area_Kha <-
-      GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020 %>%
-      gather_years() %>%
-      filter(!is.na(value))
-
-    # Assert yield no inf.
-    assertthat::assert_that(
-      # only safeguard here as data was cleaned and area and prod are matched
-      FAO_AgProd_Kt_Area_Kha %>%
-        # filter only primary crop items (all crops with area)
-        filter(item_set %in% c("QCL_COMM_CROP_PRIMARY",
-                               "QCL_COMM_CROP_PRIMARY_FODDER")) %>%
-        select(-unit) %>% spread(element, value) %>%
-        filter(is.infinite(Production / `Area harvested`|
-                             is.infinite(`Area harvested`/Production)) ) %>%
-        nrow == 0,
-      msg = "Check region/item for prod > 0 & area = 0 or prod = 0 & area > 0" )
-
-    # Assert primary production in two sources area consistent
-    assertthat::assert_that(
-      FAO_SUA_Kt_2010to2019 %>% filter(element == "Production") %>%
-        inner_join(FAO_AgProd_Kt_Area_Kha %>%
-                     filter(item_set != "QCL_COMM_OTHERPROC") %>%
-                     filter(element == "Production") %>% rename(value1 = value),
-                   by = c("area_code", "item_code", "element", "item", "area", "year")) %>%
-        mutate(diff = abs(value1 - value)) %>%
-        filter(diff > 0.0001) %>% nrow() == 0,
-      msg = "Primary production in SUA (FAO_SUA_Kt_2010to2019) and
-      QCL (FAO_AgProd_Kt_Area_Kha) are inconsistent "
-    )
-
-    ## a. All production ----
-    # Meat production is more than (QCL)FAO_AgProd_Kt_Area_Kha after 2010
-    # Production in FAO_AgProd_Kt_Area_Kha before 2010 was used
-    FAO_AgProd_Kt_Area_Kha %>%
-      filter(element == "Production") %>%
-      filter(year < min(FAO_SUA_Kt_2010to2019$year)) %>%
-      select(names(FAO_SUA_Kt_2010to2019)) %>%
-      bind_rows(
-        # For after 2010
-        # Note that not all meat items came from QCL_PROD (unlike primary crops)
-        # E.g., meat Eq, offals (livers chicken), etc. were from derivation or SCL
-        # But all items should exist in Bal_new_all
-        # And Bal_new_all is identical to QCL_PROD for primary productions
-        FAO_SUA_Kt_2010to2019 %>%
-          filter(element == "Production") ) %>%
-      bind_rows(
-        # bind fodder crops for after 2010
-        FAO_AgProd_Kt_Area_Kha %>%
-          filter(item_set == "QCL_COMM_CROP_PRIMARY_FODDER",
-                 element == "Production") %>%
-          filter(year >= min(FAO_SUA_Kt_2010to2019$year)) %>%
-          select(names(FAO_SUA_Kt_2010to2019))
-      ) %>%
-      # Inner join works as filter here
-      # Keep subsector info for crops
-      inner_join(Primary_Item_CROP %>%
-                   bind_rows(Primary_Item_MEAT %>%
-                               mutate(GCAM_subsector = GCAM_commodity)),
-                 by = "item") %>%
-      # add in iso and gcam regions ID
-      left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
-      left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") ->
-      FAO_AgProd_Kt_All
-
-    FAO_AgProd_Kt_All %>%
-      left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>%
-      dplyr::group_by_at(vars(region, year, GCAM_commodity, element, CropMeat)) %>%
-      summarise(value = sum(value), .groups = "drop") ->
-      QCL_PROD_GCAM
-
-    assertthat::assert_that(
-      GCAM_AgLU_SUA_APE_2010_2019 %>%
-        filter(element == "Production") %>%
-        left_join_error_no_match(
-          QCL_PROD_GCAM %>% filter(CropMeat != "Crop_Fodder") %>%
-            select(-CropMeat) %>%
-            rename(value1 = value) %>%
-            complete(nesting(region, year, element), GCAM_commodity, fill = list(value1 = 0)),
-          by = c("region", "GCAM_commodity", "year", "element")) %>%
-        mutate(diff = abs(value1 - value)) %>%
-        filter(diff > 0.0001) %>% nrow() == 0,
-        msg = "Primary production from two sources
-              (GCAM_AgLU_SUA_APE_2010_2019 and FAO_AgProd_Kt_Area_Kha) are inconsistent." )
-
-    ## b. All area harvested ----
-
-
-    FAO_AgProd_Kt_Area_Kha %>%
-      filter(element == "Area harvested") %>%
-      select(names(FAO_SUA_Kt_2010to2019)) %>%
-      # Keep subsector info for crops
-      inner_join(Primary_Item_CROP, by = "item") %>%
-      # add in iso and gcam regions ID
-      left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
-      left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") ->
-      FAO_AgArea_Kha_All
-
-    #****************************----
-    #Section6 Connect food items and macronutrient rates ----
-
-    # 6.1 Separate FAO food items into GCAM food items and NEC for macronutrient ----
-    # GCAM included most of the food items
-    # All food item with available macronutrient info from FAOSTAT are included
-
-    # a. Get all GCAM SUA items from the mapping by binding both source and sink items
-    # about 486 items (out of 530) used in GCAM
-
-    Mapping_SUA_PrimaryEquivalent %>%
-      select(GCAM_commodity, item = source_item) %>%
-      bind_rows(Mapping_SUA_PrimaryEquivalent %>%
-                  select(GCAM_commodity, item = sink_item)) %>%
-      distinct() %>% arrange(GCAM_commodity) ->
-      SUA_Items_GCAM
-
-    assertthat::assert_that(
-      SUA_Items_GCAM %>% distinct(item) %>% nrow() == SUA_Items_GCAM %>% nrow(),
-      msg = "Check duplicates in Mapping_SUA_PrimaryEquivalent SUA items"
-    )
-
-    # highly processed products or other products are not included in GCAM
-    # (e.g., wine, infant food, or other nonfood items etc.)
-
-    FAO_SUA_Kt_2010to2019 %>% distinct(item) %>%
-      filter(!item %in% unique(SUA_Items_GCAM$item)) -> SUA_Items_NonGCAM
-
-    # b. There are 426 FAO food items, all included in FAO_SUA_Kt_2010to2019 (530 items)
-    # SUA_Items_Food includes both GCAM and NonGCAM(NEC)
-    FAO_SUA_Kt_2010to2019 %>% distinct(item, item_code) %>%
-      filter(item %in% unique(GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean$item)) %>%
-      left_join(SUA_Items_GCAM, by = "item") %>%
-      # For NA GCAM_commodity: not elsewhere classified (NEC)
-      # So we would know % of food calories not included in GCAM commodities
-      mutate(GCAM_commodity = if_else(is.na(GCAM_commodity), "NEC", GCAM_commodity)) ->
-      SUA_Items_Food
-
-
-    # 6.2 Get macronutrient values ----
-
-    ### a. Get world average macronutrient ----
-    # For filling in missing values
-
-    GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>%
-      tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>%
-      group_by(item, item_code, macronutrient) %>%
-      summarise(macronutrient_value_World = mean(macronutrient_value), .groups = "drop") %>%
-      ungroup() ->
-      SUA_food_macronutrient_rate_World
-
-
-    ### b. Calculate SUA food Calories consumption by joining macronutrient rates and SUA food ----
-
-    FAO_SUA_Kt_2010to2019 %>%
-      filter(element == "Food", item_code %in% SUA_Items_Food$item_code) %>%
-      rename(Food_Kt = value) %>%
-      select(-element) %>%
-      left_join_error_no_match(SUA_Items_Food, by = c("item_code", "item")) %>%
-      repeat_add_columns(
-        tibble(macronutrient = c("calperg", "fatperc", "proteinperc"))) %>%
-      left_join(
-        GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>%
-          tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc),
-        by = c("area_code", "item_code", "item", "macronutrient")
-      ) %>%
-      left_join_error_no_match(SUA_food_macronutrient_rate_World,
-                               by = c("item_code", "item", "macronutrient")) %>%
-      mutate(macronutrient_value = if_else(is.na(macronutrient_value),
-                                           macronutrient_value_World,
-                                           macronutrient_value)) %>%
-      # calculate total Cal, protein and fat in food
-      # value was in 1000 ton or 10^ 9 g
-      mutate(value = macronutrient_value * Food_Kt,
-             value = if_else(macronutrient %in% c("fatperc", "proteinperc"),
-                             value / 100 / 1000, value)) %>% # unit from perc to Mt
-      select(-macronutrient_value, -macronutrient_value_World, -Food_Kt) %>%
-      # rename element with units
-      mutate(macronutrient = case_when(
-        macronutrient == "calperg" ~ "MKcal",
-        macronutrient == "fatperc" ~ "MtFat",
-        macronutrient == "proteinperc" ~ "MtProtein" )) %>%
-      left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
-      left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") ->
-      FAO_Food_Macronutrient_All_2010_2019
-
-    ### c. Get the max values of macronutrient conversion rate (per GCAM_commodity) ----
-    # This will be used later as an upper bound to improve the data
-    GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean %>%
-      tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>%
-      left_join_error_no_match(SUA_Items_Food,
-                               by = c("item_code", "item")) %>%
-      group_by(GCAM_commodity, macronutrient) %>%
-      summarise(max_macronutrient_value = max(macronutrient_value), .groups = "drop") ->
-      FAO_Food_MacronutrientRate_2010_2019_MaxValue
-
-
-    #****************************----
-    # Produce outputs ----
-    #*******************************
-
-    GCAM_AgLU_SUA_APE_2010_2019 %>%
-      add_title("GCAM_AgLU_SUA_APE_2010_2019") %>%
-      add_units("kton") %>%
-      add_comments("Supply utilization balance for GCAM commodities and regions in primary equivalent") %>%
-      add_precursors("aglu/AGLU_ctry",
-                     "common/iso_GCAM_regID",
-                     "common/GCAM_region_names",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_BiTrade_194Regs_400Items_2010to2020",
-                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009",
-                     "aglu/FAO/Mapping_item_FBS_GCAM") ->
-      GCAM_AgLU_SUA_APE_2010_2019
-
-    FAO_AgProd_Kt_All %>%
-      add_title("FAO_AgProd_Kt_All") %>%
-      add_units("1000 tonnes") %>%
-      add_comments("Supply utilization balance for GCAM commodities and regions in primary equivalent") %>%
-      add_precursors("aglu/AGLU_ctry",
-                     "common/iso_GCAM_regID",
-                     "aglu/FAO/FAO_ag_items_PRODSTAT",
-                     "aglu/FAO/FAO_an_items_PRODSTAT",
-                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020") ->
-      FAO_AgProd_Kt_All
-
-    FAO_AgArea_Kha_All %>%
-      add_title("FAO_AgArea_Kha_All") %>%
-      add_units("1000 ha") %>%
-      add_comments("Harvested area") %>%
-      add_precursors("aglu/AGLU_ctry",
-                     "common/iso_GCAM_regID",
-                     "aglu/FAO/FAO_ag_items_PRODSTAT",
-                     "aglu/FAO/FAO_an_items_PRODSTAT",
-                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_ProdArea_195Regs_271Prod160AreaItems_1973to2020") ->
-      FAO_AgArea_Kha_All
-
-    FAO_Food_Macronutrient_All_2010_2019 %>%
-      add_title("FAO_Food_Macronutrient_All_2010_2019") %>%
-      add_units("MKcal, MtFat, MtProtein") %>%
-      add_comments("Macronutrient consumption values connected to food consumption in GCAM_AgLU_SUA_APE_1973_2019") %>%
-      add_precursors("aglu/AGLU_ctry",
-                     "common/iso_GCAM_regID",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean",
-                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent") ->
-      FAO_Food_Macronutrient_All_2010_2019
-
-    FAO_Food_MacronutrientRate_2010_2019_MaxValue %>%
-      add_title("FAO_Food_MacronutrientRate_2010_2019_MaxValue") %>%
-      add_units("cal per g, fat perc. , protein perc.") %>%
-      add_comments("The max value of macronutrient conversion rate across region, year, and SUA items (per GCAM_commodity") %>%
-      add_precursors("aglu/AGLU_ctry",
-                     "common/iso_GCAM_regID",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019",
-                     "aglu/FAO/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean",
-                     "aglu/FAO/Mapping_SUA_PrimaryEquivalent") ->
-      FAO_Food_MacronutrientRate_2010_2019_MaxValue
-
-    return_data(MODULE_OUTPUTS)
-
-
-  } else {
-    stop("Unknown command")
-  }
-}
diff --git a/data-raw/generate_package_data_faostat_helper_funcs.R b/data-raw/generate_package_data_faostat_helper_funcs.R
index a851903a..5d6e37cf 100644
--- a/data-raw/generate_package_data_faostat_helper_funcs.R
+++ b/data-raw/generate_package_data_faostat_helper_funcs.R
@@ -68,9 +68,13 @@ rm_accent <- function(.df, ...){
     length(intersect(c(...), names(.df))) == length(c(...)),
     msg = "Columns listed not included in the data frame")
 
+  # .df %>%
+  #   mutate_at(c(...), iconv,  to = 'ASCII//TRANSLIT') %>%
+  #   mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "")
+
   .df %>%
-    mutate_at(c(...), iconv,  to = 'ASCII//TRANSLIT') %>%
-    mutate_at(c(...), .funs = gsub, pattern = "\\'", replacement = "")
+    mutate(dplyr::across(c(...), iconv, to = 'ASCII//TRANSLIT')) %>%
+    mutate(dplyr::across(c(...), gsub, pattern = "\\'", replacement = ""))
 
 }
 
diff --git a/inst/extdata/aglu/FAO/SUA_item_code_map.csv b/inst/extdata/aglu/FAO/SUA_item_code_map.csv
new file mode 100644
index 00000000..980a4b3b
--- /dev/null
+++ b/inst/extdata/aglu/FAO/SUA_item_code_map.csv
@@ -0,0 +1,540 @@
+# File: SUA_item_code_map.csv
+# Title: Mapping of SUA items to their item_code
+# Units: NA
+# Description: Ideally we just use SUA (supply utilization accounts) item names directly.  However, given the volume of data
+# it helps to use item_code to reduce the size of the input data as well as speed up processing.  We will then need this mapping
+# to ultimately translate back to item names.
+# Last update: 8-15-2022 (XZ)
+# Column types: ci
+# ----------
+item,item_code
+Wheat,15
+"Rice, paddy",27
+Barley,44
+Beer of barley,51
+Maize,56
+"Oil, maize",60
+Rye,71
+Oats,75
+Millet,79
+Sorghum,83
+Buckwheat,89
+Quinoa,92
+Fonio,94
+Triticale,97
+Canary seed,101
+"Grain, mixed",103
+Potatoes,116
+Sweet potatoes,122
+Cassava,125
+Roots and tubers nes,149
+Sugar beet,157
+Sugar crops nes,161
+Sugar Raw Centrifugal,162
+Molasses,165
+"Beans, dry",176
+"Broad beans, horse beans, dry",181
+"Peas, dry",187
+Chick peas,191
+Lentils,201
+Bambara beans,203
+Vetches,205
+"Cashew nuts, with shell",217
+Chestnut,220
+"Almonds, with shell",221
+"Walnuts, with shell",222
+Pistachios,223
+Kola nuts,224
+"Hazelnuts, with shell",225
+Areca nuts,226
+Nuts nes,234
+Soybeans,236
+"Oil, groundnut",244
+Coconuts,249
+"Oil, coconut (copra)",252
+"Oil, palm",257
+"Oil, palm kernel",258
+Olives,260
+"Oil, olive, virgin",261
+Karite nuts (sheanuts),263
+Sunflower seed,267
+"Oil, sunflower",268
+Rapeseed,270
+"Oil, rapeseed",271
+"Oil, safflower",281
+Sesame seed,289
+"Oil, sesame",290
+Mustard seed,292
+Poppy seed,296
+Cottonseed,329
+"Oil, cottonseed",331
+Linseed,333
+"Oil, linseed",334
+Oilseeds nes,339
+Cabbages and other brassicas,358
+Artichokes,366
+Asparagus,367
+Lettuce and chicory,372
+Spinach,373
+Tomatoes,388
+Cauliflowers and broccoli,393
+"Pumpkins, squash and gourds",394
+Cucumbers and gherkins,397
+Eggplants (aubergines),399
+"Chillies and peppers, green",401
+"Onions, shallots, green",402
+"Onions, dry",403
+Garlic,406
+"Leeks, other alliaceous vegetables",407
+"Beans, green",414
+"Peas, green",417
+Carrots and turnips,426
+"Maize, green",446
+Mushrooms and truffles,449
+"Vegetables, fresh nes",463
+Bananas,486
+Plantains and others,489
+Oranges,490
+"Tangerines, mandarins, clementines, satsumas",495
+Lemons and limes,497
+Grapefruit (inc. pomelos),507
+Apples,515
+Pears,521
+Quinces,523
+Apricots,526
+"Cherries, sour",530
+Cherries,531
+Peaches and nectarines,534
+Plums and sloes,536
+Strawberries,544
+Gooseberries,549
+Currants,550
+Blueberries,552
+Cranberries,554
+Grapes,560
+Wine,564
+Watermelons,567
+"Melons, other (inc.cantaloupes)",568
+Figs,569
+"Mangoes, mangosteens, guavas",571
+Avocados,572
+Pineapples,574
+Dates,577
+Persimmons,587
+Cashewapple,591
+Kiwi fruit,592
+Papayas,600
+"Fruit, tropical fresh nes",603
+"Fruit, fresh nes",619
+"Coffee, green",656
+"Cocoa, beans",661
+Tea,667
+Mate,671
+Pepper (piper spp.),687
+"Chillies and peppers, dry",689
+Vanilla,692
+Cinnamon (cannella),693
+Cloves,698
+"Nutmeg, mace and cardamoms",702
+"Anise, badian, fennel, coriander",711
+Ginger,720
+Spices nes,723
+"Meat, cattle",867
+"Offals, edible, cattle",868
+"Fat, cattle",869
+"Milk, whole fresh cow",882
+Cream fresh,885
+"Butter, cow milk",886
+"Milk, skimmed cow",888
+"Milk, whole condensed",889
+"Whey, condensed",890
+Yoghurt,891
+"Milk, whole evaporated",894
+"Milk, whole dried",897
+"Milk, skimmed dried",898
+"Whey, dry",900
+"Cheese, whole cow milk",901
+"Meat, sheep",977
+"Offals, sheep,edible",978
+"Cheese, sheep milk",984
+"Meat, goat",1017
+"Offals, edible, goats",1018
+"Meat, pig",1035
+"Offals, pigs, edible",1036
+"Fat, pigs",1037
+Lard,1043
+"Meat, chicken",1058
+"Eggs, hen, in shell",1062
+"Meat, duck",1069
+"Meat, goose and guinea fowl",1073
+"Meat, turkey",1080
+"Eggs, other bird, in shell",1091
+"Meat, horse",1097
+"Fat, camels",1129
+"Meat, rabbit",1141
+"Meat, game",1163
+Meat nes,1166
+"Honey, natural",1182
+Tallow,1225
+"Margarine, short",1242
+"Flour, wheat",16
+"Bran, wheat",17
+Macaroni,18
+Bread,20
+Bulgur,21
+Pastry,22
+"Rice, husked",28
+"Rice, milled/husked",29
+"Rice, milled",31
+"Rice, broken",32
+"Oil, rice bran",36
+"Flour, rice",38
+"Beverages, fermented rice",39
+"Cereals, breakfast",41
+"Barley, pearled",46
+Malt,49
+"Germ, maize",57
+"Flour, maize",58
+"Bran, maize",59
+Oats rolled,76
+"Bran, millet",81
+"Bran, sorghum",85
+"Flour, mixed grain",104
+Infant food,109
+Wafers,110
+"Flour, cereals",111
+Cereal preparations nes,113
+Mixes and doughs,114
+"Food preparations, flour, malt extract",115
+"Flour, potatoes",117
+"Potatoes, frozen",118
+"Flour, cassava",126
+Cassava dried,128
+"Starch, cassava",129
+"Flour, roots and tubers nes",150
+Maple sugar and syrups,160
+Sugar non-centrifugal,163
+Sugar refined,164
+"Fructose and syrup, other",166
+Sugar nes,167
+Sugar confectionery,168
+Glucose and dextrose,172
+Lactose,173
+"Flour, pulses",212
+"Brazil nuts, shelled",229
+"Cashew nuts, shelled",230
+Almonds shelled,231
+"Walnuts, shelled",232
+"Hazelnuts, shelled",233
+"Nuts, prepared (exc. groundnuts)",235
+"Oil, soybean",237
+Soya sauce,239
+Soya paste,240
+"Groundnuts, shelled",243
+"Groundnuts, prepared",246
+Peanut butter,247
+"Coconuts, desiccated",250
+Copra,251
+Olives preserved,262
+Butter of karite nuts,264
+"Oil, castor beans",266
+"Oil, olive residues",274
+"Flour, mustard",295
+"Oil, vegetable origin nes",340
+"Juice, tomato",390
+"Tomatoes, paste",391
+"Tomatoes, peeled",392
+Sweet corn frozen,447
+Sweet corn prep or preserved,448
+"Mushrooms, canned",451
+"Vegetables, dehydrated",469
+Vegetables in vinegar,471
+"Vegetables, preserved nes",472
+"Vegetables, frozen",473
+"Vegetables, temporarily preserved",474
+"Vegetables, preserved, frozen",475
+"Vegetables, homogenized preparations",476
+"Juice, orange, single strength",491
+"Juice, orange, concentrated",492
+"Juice, lemon, single strength",498
+"Juice, lemon, concentrated",499
+"Juice, grapefruit",509
+"Juice, grapefruit, concentrated",510
+"Juice, citrus, single strength",513
+"Juice, citrus, concentrated",514
+Cider etc,517
+"Juice, apple, single strength",518
+"Juice, apple, concentrated",519
+"Apricots, dry",527
+Plums dried (prunes),537
+Raisins,561
+"Juice, grape",562
+Vermouths & similar,565
+Figs dried,570
+Pineapples canned,575
+"Juice, pineapple",576
+"Juice, pineapple, concentrated",580
+"Fruit, dried nes",620
+"Juice, fruit nes",622
+"Fruit, prepared nes",623
+"Fruit, cooked, homogenized preparations",626
+"Beverages, distilled alcoholic",634
+"Coffee, roasted",657
+"Coffee, substitutes containing coffee",658
+"Coffee, extracts",659
+"Cocoa, paste",662
+"Cocoa, butter",664
+"Cocoa, powder & cake",665
+Chocolate products nes,666
+"Tea, mate extracts",672
+"Feed and meal, gluten",846
+"Meat, cattle, boneless (beef & veal)",870
+"Meat, beef and veal sausages",874
+"Meat, beef, preparations",875
+"Yoghurt, concentrated or not",892
+"Buttermilk, curdled, acidified milk",893
+"Cheese, processed",907
+"Milk, products of natural constituents nes",909
+Ice cream and edible ice,910
+Grease incl. lanolin wool,994
+"Meat, pork",1038
+Bacon and ham,1039
+"Meat, pig sausages",1041
+"Meat, pig, preparations",1042
+"Offals, liver chicken",1059
+"Fat, liver prepared (foie gras)",1060
+"Meat, chicken, canned",1061
+"Eggs, liquid",1063
+"Eggs, dried",1064
+"Offals, liver geese",1074
+"Offals, liver duck",1075
+"Meat, dried nes",1164
+"Oils, fats of animal nes",1168
+Food prep nes,1232
+"Margarine, liquid",1241
+"Fat nes, prepared",1243
+"Oil, boiled etc",1274
+Fatty acids,1276
+Fatty substance residues,1277
+Cereals nes,108
+Yautia (cocoyam),135
+Taro (cocoyam),136
+Yams,137
+Sugar cane,156
+"Cow peas, dry",195
+Pigeon peas,197
+Lupins,210
+Pulses nes,211
+"Brazil nuts, with shell",216
+"Groundnuts, with shell",242
+Oil palm fruit,254
+Palm kernels,256
+Castor oil seed,265
+Tung nuts,275
+Jojoba seed,277
+Safflower seed,280
+Melonseed,299
+Tallowtree seed,305
+Kapok fruit,310
+Kapokseed in shell,311
+Hempseed,336
+Cassava leaves,378
+"Vegetables, leguminous nes",420
+String beans,423
+Okra,430
+Chicory roots,459
+Carobs,461
+"Fruit, citrus nes",512
+"Fruit, stone nes",541
+"Fruit, pome nes",542
+Raspberries,547
+Berries nes,558
+"Ghee, butteroil of cow milk",887
+"Milk, skimmed evaporated",895
+"Milk, skimmed condensed",896
+"Milk, dry buttermilk",899
+"Cheese, skimmed cow milk",904
+"Meat, buffalo",947
+"Offals, edible, buffaloes",948
+"Fat, buffaloes",949
+"Milk, whole fresh buffalo",951
+"Butter, buffalo milk",952
+"Ghee, buffalo milk",953
+"Cheese, buffalo milk",955
+"Fat, sheep",979
+"Milk, whole fresh sheep",982
+"Butter and ghee, sheep milk",983
+"Fat, goats",1019
+"Milk, whole fresh goat",1020
+"Cheese, goat milk",1021
+"Meat, bird nes",1089
+"Offals, horses",1098
+"Meat, ass",1108
+"Meat, mule",1111
+"Meat, camel",1127
+"Offals, edible, camels",1128
+"Milk, whole fresh camel",1130
+"Meat, other rodents",1151
+"Snails, not sea",1176
+"Germ, wheat",19
+"Starch, wheat",23
+"Gluten, wheat",24
+"Beverages, fermented wheat",26
+"Gluten, rice",33
+"Starch, rice",34
+"Bran, rice",35
+"Barley, pot",45
+"Bran, barley",47
+"Flour, barley and grits",48
+Malt extract,50
+"Gluten, maize",63
+"Starch, maize",64
+Beer of maize,66
+"Flour, rye",72
+"Bran, rye",73
+"Bran, oats",77
+"Flour, millet",80
+Beer of millet,82
+"Flour, sorghum",84
+Beer of sorghum,86
+"Flour, buckwheat",90
+"Bran, buckwheat",91
+"Flour, fonio",95
+"Bran, fonio",96
+"Flour, triticale",98
+"Bran, triticale",99
+"Bran, mixed grains",105
+"Bran, cereals nes",112
+"Starch, potatoes",119
+"Tapioca, potatoes",121
+"Tapioca, cassava",127
+Roots and tubers dried,151
+Fructose chemically pure,154
+Maltose chemically pure,155
+"Sugar, cane, raw, centrifugal",158
+Isoglucose,175
+"Bran, pulses",213
+Soya curd,241
+"Oil, tung nuts",276
+"Oil, jojoba",278
+"Oil, mustard",293
+"Oil, poppy",297
+Vegetable tallow,306
+Kapokseed shelled,312
+"Oil, kapok",313
+"Oil, hempseed",337
+"Flour, oilseeds",343
+"Mushrooms, dried",450
+"Juice, vegetables nes",466
+"Juice, tangerine",496
+"Juice, plum, single strength",538
+"Juice, plum, concentrated",539
+"Grapes, must",563
+"Juice, mango",583
+"Fruit, tropical dried nes",604
+"Flour, fruit",624
+"Fruits, nuts, peel, sugar preserved",625
+Alcohol non food,632
+"Fat, cattle butcher",871
+"Meat, beef, dried, salted, smoked",872
+"Meat, extracts",873
+"Meat, homogenized preparations",877
+Liver prep.,878
+"Whey, fresh",903
+"Whey, cheese",905
+"Milk, reconstituted",908
+Egg albumine,916
+Casein,917
+"Milk, skimmed buffalo",954
+"Milk, skimmed sheep",985
+"Butter, goat milk",1022
+"Milk, skimmed goat",1023
+"Fat, pig butcher",1040
+"Fat, poultry",1065
+"Fat, poultry, rendered",1066
+"Offals, liver turkeys",1081
+"Fat, other camelids",1160
+Offals nes,1167
+"Meat nes, preparations",1172
+Lard stearine oil,1221
+Degras,1222
+"Castor oil, hydrogenated (opal wax)",1273
+"Oil, hydrogenated",1275
+"Cake, soybeans",238
+"Cake, groundnuts",245
+"Cake, copra",253
+"Cake, palm kernel",259
+"Cake, sunflower",269
+"Cake, rapeseed",272
+"Cake, sesame seed",291
+"Cake, mustard",294
+"Cake, cottonseed",332
+"Cake, others",341
+"Rice, paddy (rice milled equivalent)",30
+Seed cotton,328
+Hops,677
+Peppermint,748
+"Pyrethrum, dried",754
+Cotton lint,767
+Flax fibre and tow,773
+Hemp tow waste,777
+Kapok fibre,778
+Jute,780
+"Bastfibres, other",782
+Ramie,788
+Sisal,789
+Agave fibres nes,800
+Manila fibre (abaca),809
+Coir,813
+Fibre crops nes,821
+"Tobacco, unmanufactured",826
+"Rubber, natural",836
+"Gums, natural",839
+"Hides, cattle, fresh",919
+"Hides, buffalo, fresh",957
+"Wool, greasy",987
+"Skins, sheep, fresh",995
+"Skins, goat, fresh",1025
+"Meat, other camelids",1158
+Beeswax,1183
+"Silk-worm cocoons, reelable",1185
+"Silk, raw",1186
+Freshwater Fish,2761
+Demersal Fish,2762
+Pelagic Fish,2763
+"Marine Fish, Other",2764
+Crustaceans,2765
+Cephalopods,2766
+"Molluscs, Other",2767
+"Meat, Aquatic Mammals",2768
+"Aquatic Animals, Others",2769
+Aquatic Plants,2775
+"Fish, Body Oil",2781
+"Fish, Liver Oil",2782
+"Cake, rice bran",360000
+"Cake, maize",600000
+"Cake, olive residues",2740000
+"Cake, jojoba",2780000
+"Cake, safflower",2810000
+"Cake, poppy",2970000
+"Cake, kapok",3130000
+"Fiber, cottonseed",3320000
+"Cake, linseed",3340000
+"Cake, hempseed",3370000
+"Cake, vegetable origin nes",3400000
+"AnMeatEq, cattle",8670000
+"AnMeatEq, buffalo",9470000
+"AnMeatEq, sheep",9770000
+"AnMeatEq, goat",10170000
+"AnMeatEq, pig",10350000
+"AnMeatEq, chicken",10580000
+"AnMeatEq, duck",10690000
+"AnMeatEq, goose and guinea fowl",10730000
+"AnMeatEq, turkey",10800000
+"AnMeatEq, horse",10970000
+"AnMeatEq, ass",11080000
+"AnMeatEq, mule",11110000
+"AnMeatEq, camel",11270000
+"AnMeatEq, rabbit",11410000
+"AnMeatEq, other rodents",11510000
+"AnMeatEq, other camelids",11580000
diff --git a/man/FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd b/man/FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd
index 473a9257..ae84e6ad 100644
--- a/man/FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd
+++ b/man/FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL.Rd
@@ -6,6 +6,8 @@
 \usage{
 FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL(
   .DF,
+  .FAO_AREA_CODE_COL = "area_code",
+  .AREA_COL = "area",
   SUDAN2012_BREAK = F,
   SUDAN2012_MERGE = T
 )
@@ -13,6 +15,8 @@ FAO_AREA_DISAGGREGATE_HIST_DISSOLUTION_ALL(
 \arguments{
 \item{.DF}{Input data frame}
 
+\item{.AREA_COL}{}
+
 \item{SUDAN2012_BREAK}{If T break Sudan before 2012 based on 2013- 2016 data}
 
 \item{SUDAN2012_MERGE}{If T merge South Sudan into Sudan}
diff --git a/man/get_data_list.Rd b/man/get_data_list.Rd
new file mode 100644
index 00000000..b1b3aaac
--- /dev/null
+++ b/man/get_data_list.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-data.R
+\name{get_data_list}
+\alias{get_data_list}
+\title{get_data_list}
+\usage{
+get_data_list(all_data, data_list, strip_attributes = FALSE, environ = NULL)
+}
+\arguments{
+\item{all_data}{Data structure}
+
+\item{data_list}{A character vector of data to load into the given environment}
+
+\item{strip_attributes}{A logical vector which will be passed on to \code{get_data}.  The length
+must be 1 (use the save logical for all values of data_list) or match the length of
+\code{data_list} (one logical value for each data_list item).}
+
+\item{environ}{The environment into which the data should be loaded.  If NULL (the default)
+the caller's environment will be used.}
+}
+\description{
+This function calls \code{get_data} for each data name in \code{data_list} and assigns
+the resulting data into the given \code{environ} with the data name being used as the
+variable name.  Note: for values in data_list "named" FILE the "basename" of the string
+will be used as the variable name.
+}