Skip to content

Commit

Permalink
Quick source data, deflator and folder, updates; more code improving
Browse files Browse the repository at this point in the history
update Taiwan deflator data format
separating RawDataPreProc by fao dataset code
  • Loading branch information
realxinzhao committed Aug 8, 2023
1 parent 9ffae91 commit 6bff741
Show file tree
Hide file tree
Showing 12 changed files with 44,958 additions and 255 deletions.
91 changes: 91 additions & 0 deletions R/xfaostat_L101_RawDataPreProc1_QCL.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.

#' module_xfaostat_L101_RawDataPreProc1_QCL
#'
#' Preprocess raw faostat data part 1 QCL data
#'
#' @param command API command to execute
#' @param ... other optional parameters, depending on command
#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
#' \code{command} is "MAKE") all the generated outputs
#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
#' @importFrom assertthat assert_that
#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
#' first case_when vars
#' @importFrom tibble tibble
#' @importFrom tidyr complete drop_na gather nesting spread replace_na
#' @author XZ 2023
module_xfaostat_L101_RawDataPreProc1_QCL <- function(command, ...) {

MODULE_INPUTS <-
c(FILE = "aglu/AGLU_ctry")

MODULE_OUTPUTS <-
c("QCL", # Ag production quantity and harvested area
"QCL_area_code_map" # Country code
)

if(command == driver.DECLARE_INPUTS) {
return(MODULE_INPUTS)
} else if(command == driver.DECLARE_OUTPUTS) {
return(MODULE_OUTPUTS)
} else if(command == driver.MAKE) {

year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check.

all_data <- list(...)[[1]]

# Load required inputs ----

get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)


# *[QCL] FAOSTAT Production and area ----

## Load raw data
FAOSTAT_load_raw_data(DATASETCODE = "QCL", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT)


QCL %>%
# Remove aggregated areas and items
filter(area_code < 350, item_code < 1700) %>%
select(area_code,
area,
item_code,
item,
element_code,
element,
year,
value,
unit) %>%
# When dealing with animal/livestock data, units are important
# Prod Popultn (5314) for Beewax and honey is removed since data is only before 1990
filter(element_code != 5314) %>%
# Remove NA for simplicity for now; expend.grid later
# All Coir (coconut fiber) is filtered out due to NA
filter(!is.na(value)) %>%
# remove accent
rm_accent("item", "area") -> QCL1

### output QCL_area_code_map ----
# Other data uses OCL area for consistency
QCL1 %>%
distinct(area_code, area) %>%
add_title("FAO primary production country and code") %>%
add_units("NA") %>%
add_comments("FAO Country and code") ->
QCL_area_code_map

### output QCL ----
QCL1 %>%
add_title("FAO primary production") %>%
add_units("USD/tonne") %>%
add_comments("Preprocessed FAOSTAT primary production") ->
QCL

return_data(MODULE_OUTPUTS)

} else {
stop("Unknown command")
}
}
201 changes: 201 additions & 0 deletions R/xfaostat_L101_RawDataPreProc2_PP_PD_OA.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.

#' module_xfaostat_L101_RawDataPreProc2_PP_PD_OA
#'
#' Preprocess raw faostat data part 2 prices deflators and population
#'
#' @param command API command to execute
#' @param ... other optional parameters, depending on command
#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
#' \code{command} is "MAKE") all the generated outputs
#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
#' @importFrom assertthat assert_that
#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
#' first case_when vars
#' @importFrom tibble tibble
#' @importFrom tidyr complete drop_na gather nesting spread replace_na
#' @author XZ 2023
module_xfaostat_L101_RawDataPreProc2_PP_PD_OA <- function(command, ...) {

MODULE_INPUTS <-
c(FILE = "aglu/fao/FAOSTAT/Other_supplementary/GDP_deflator_Taiwan",
"QCL_area_code_map")

MODULE_OUTPUTS <-
c("PP", # Producer prices
"PD", # GDP deflator
"OA") # Population

if(command == driver.DECLARE_INPUTS) {
return(MODULE_INPUTS)
} else if(command == driver.DECLARE_OUTPUTS) {
return(MODULE_OUTPUTS)
} else if(command == driver.MAKE) {

year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check.

all_data <- list(...)[[1]]

# Load required inputs ----

get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)

# Get area code ----
QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull()


# *[PP] Producer price ----

FAOSTAT_load_raw_data(DATASETCODE = "PP", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT)
# check data
PP %>% distinct(element, element_code, unit)

PP %>%
filter(
area_code < 350,
# rm aggregated regions
item_code < 1700,
#rm aggregated items
area_code %in% QCL_area_code,
# only keep regions with production
element_code %in% c(5532, 5539)
) %>% #keep USD/tonne and index
rm_accent("item", "area") -> PP1


# Using index to fill in missing across years
PP1 %>%
filter(element_code %in% c(5532, 5539)) %>%
select(area_code, area, item_code, item, element, year, value) %>%
# Not completing year and area here
spread(element, value) %>%
left_join(
PP1 %>% filter(element_code %in% c(5532, 5539)) %>%
select(area_code, area, item_code, item, element, year, value) %>%
spread(element, value) %>%
rename(pp_base = `Producer Price (USD/tonne)`,
pp_baseindex = `Producer Price Index (2014-2016 = 100)`) %>%
filter(!is.na(pp_base)) %>%
group_by(area, area_code, item) %>%
filter(year == 2015) %>% within(rm(year)) %>%
ungroup(),
by = c("area_code", "area", "item_code", "item")
) %>% mutate(
`Producer Price (USD/tonne)` = if_else(
is.na(`Producer Price (USD/tonne)`),
pp_base * `Producer Price Index (2014-2016 = 100)` /
pp_baseindex,
`Producer Price (USD/tonne)`
)
) %>%
select(area_code,
area,
item_code,
item,
year,
`Producer Price (USD/tonne)`) %>%
gather(element, value, `Producer Price (USD/tonne)`) %>%
mutate(element_code = 5532) -> PP2


### output PP ----
PP2 %>%
add_title("FAO producer prices") %>%
add_units("USD/tonne") %>%
add_comments("Preprocessed FAOSTAT producer prices") %>%
add_precursors("QCL_area_code_map") ->
PP


# [PD] FAO_GDP_deflators ----
#**************************************

FAOSTAT_load_raw_data(DATASETCODE = "PD", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT)
# read in Taiwan values as FAO does not have Taiwan price data
GDP_deflator_Taiwan <-
readr::read_csv("data_raw/Other_supplementary/GDP_deflator_Taiwan.csv",
comment = "#")


PD %>% distinct(element, element_code, unit)
PD %>% distinct(item, item_code)

PD %>% filter(
year %in% Hist_Year,
area_code < 350,
area_code %in% QCL_area_code,
# only keep regions with production
item_code == 22024,
element_code == 6179
) %>% #keep US$
rm_accent("item", "area") -> PD1

PD2 <- PD1 %>%
filter(item == "GDP Deflator", grepl("US\\$", element)) %>%
select(area,
area_code,
item,
item_code,
element,
element_code,
year,
value) %>%
bind_rows(
GDP_deflator_Taiwan %>%
mutate(
area = "China, Taiwan Province of",
area_code = 214,
item = "GDP Deflator",
item_code = 22024,
element = "Value US$, 2015 prices",
element_code = 6179,
value = round(100 * value / value[year == 2015], 2)
)
)

### output PD ----
PD2 %>%
add_title("FAO GDP deflators by country (2015 = 100)") %>%
add_units("Unitless") %>%
add_comments("Preprocessed FAOSTAT regional gdp deflators") %>%
add_precursors("QCL_area_code_map",
"aglu/fao/FAOSTAT/Other_supplementary/GDP_deflator_Taiwan") ->
PD



# *[OA]: Population ----

FAOSTAT_load_raw_data(DATASETCODE = "OA", DATA_FOLDER = DIR_RAW_DATA_FAOSTAT)

OA %>% distinct(element, element_code)
OA %>% distinct(item, item_code)

OA %>% filter(area_code %in% QCL_area_code,
# only keep regions with production
element_code == 511,
item_code == 3010) %>%
select(area_code,
area,
item_code,
item,
element_code,
element,
year,
value,
unit) %>%
rm_accent("item", "area") -> OA1

### output OA ----
OA1 %>%
add_title("FAO population") %>%
add_units("tonne") %>%
add_comments("Preprocessed FAO OA") ->
OA

return_data(MODULE_OUTPUTS)

} else {
stop("Unknown command")
}
}
104 changes: 104 additions & 0 deletions R/xfaostat_L101_RawDataPreProc3_SCL_FBS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.

#' module_xfaostat_L101_RawDataPreProc3_SCL_FBS
#'
#' Preprocess raw faostat data part 3 SCL and FBS
#'
#' @param command API command to execute
#' @param ... other optional parameters, depending on command
#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
#' \code{command} is "MAKE") all the generated outputs
#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
#' @importFrom assertthat assert_that
#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
#' first case_when vars
#' @importFrom tibble tibble
#' @importFrom tidyr complete drop_na gather nesting spread replace_na
#' @author XZ 2023
module_xfaostat_L101_RawDataPreProc3_SCL_FBS <- function(command, ...) {

MODULE_INPUTS <-
c("QCL_area_code_map")

MODULE_OUTPUTS <-
c("SCL", # Supply utilization accounting
"FBS") # New food balance sheet


if(command == driver.DECLARE_INPUTS) {
return(MODULE_INPUTS)
} else if(command == driver.DECLARE_OUTPUTS) {
return(MODULE_OUTPUTS)
} else if(command == driver.MAKE) {

year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check.

all_data <- list(...)[[1]]

# Load required inputs ----

get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)

# Get area code ----
QCL_area_code <- QCL_area_code_map %>% distinct(area_code) %>% pull()

## *[SCL] SUA: supply utilization accounting ----

FAOSTAT_load_raw_data("SCL") # SUA 2010+

SCL %>% distinct(element, element_code, unit)


if (is.numeric(SCL$item_code)) {
SCL %>% filter(item_code <= 1700, item_code != 1) -> SCL
}

SCL %>% filter(!element_code %in% c(664, 665, 674, 684, 511),
# it is not useful to calculate cal/g using `Food supply (kcal/capita/day)` /`Food supply quantity (g/capita/day)`
# unit too small so remove them here
# `Calories/Year` / `Food supply quantity (tonnes)` is more accurate!
# similarly for protein and fat
# Use annual value in SUA to calculate the conversion rate!
area_code %in% QCL_area_code) %>%
select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
rm_accent("item", "area") -> SCL1


### output SCL----
SCL1 %>%
add_title("FAO SCL") %>%
add_units("tonne") %>%
add_comments("Preprocessed FAOSTAT SCL") ->
SCL
rm(SCL1)


# Food balance and Supply-Utilization-Account

## *[FBS] new food balance sheet (2010-) ----

## Load raw data
FAOSTAT_load_raw_data("FBS") # New FBS 2010+
FBS %>% distinct(element, element_code, unit)

FBS %>% filter(item_code < 2901, item_code != 2501,
!element_code %in% c(511, 5301),
area_code %in% QCL_area_code) %>%
select(area_code, area, item_code, item, element_code, element, year, value, unit) %>%
rm_accent("item", "area") -> FBS1


### output FBS ----
FBS1 %>%
add_title("FAO SCL") %>%
add_units("tonne") %>%
add_comments("Preprocessed FAOSTAT SCL") ->
FBS


return_data(MODULE_OUTPUTS)

} else {
stop("Unknown command")
}
}
Loading

0 comments on commit 6bff741

Please sign in to comment.