Skip to content

Commit

Permalink
dataset documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Ludwigm6 committed Mar 13, 2024
1 parent d7f5bcc commit e09c880
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 0 deletions.
27 changes: 27 additions & 0 deletions R/cookfarm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#' Cookfarm soil logger data
#'
#' spatio-temporal data of soil properties and associated predictors for the Cookfarm in South Africa
#' @format
#' A sf data.frame with 128545 rows and 17 columns:
#' \describe{
#' \item{SOURCEID}{sPlotOpen Metadata}
#' \item{VW}{Response Variable - Soil Moisture}
#' \item{altitude}{Measurement depth of VW}
#' \item{Date, cdata}{Measurement Date, Cumulative Date}
#' \item{Easting, Northing}{Location in EPSG:????}
#' \item{DEM, TWI, NDRE.M, NDRE.Sd, Precip_wrcc, MaxT_wrcc, MinT_wrcc, Precip_cum}
#' }
#' @source \itemize{
#' \item{Plot with Species_richness from \href{https://onlinelibrary.wiley.com/doi/full/10.1111/geb.13346}{sPlotOpen}}
#' \item{predictors acquired via R package \href{https://github.com/rspatial/geodata}{geodata}}
#' }
#'
#' @references \itemize{
#' \item{Gash et al. 2015 - Spatio-temporal interpolation of soil water, temperature, and electrical conductivity in 3D + T: The Cook Agronomy Farm data set \doi{https://doi.org/10.1016/j.spasta.2015.04.001}}
#' \item{Meyer et al. 2018 - Improving performance of spatio-temporal machine learning models using forward feature selection and target-oriented validation \doi{https://doi.org/10.1016/j.envsoft.2017.12.001}}
#'
#' }
#' @usage data(cookfarm)
#'
"cookfarm"

6 changes: 6 additions & 0 deletions data-raw/create-cookfarm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# This script creates the cookfarm dataset
#


cookfarm = readRDS("inst/extdata/Cookfarm.RDS")
save(cookfarm, file = "data/cookfarm.rda")
93 changes: 93 additions & 0 deletions data-raw/create-splotdata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
## This script creates the example dataset "splotdata" of the CAST package.
## It downloads splotopen data points and associated worldclim predictors for South America.
## A lower resolution predictor stack (terra rast) is also created for Chile.
## For more information, please check out the Book Chapter and Repository CAST4Ecology

library(geodata)
library(rnaturalearth)
library(terra)
library(sf)
library(tidyverse)
library(geodata)


##### Download Predictors --------------------------------
## Warning: This downloads ~ 1 GB of data
dir.create("data-raw/raw/")

wcf = geodata::worldclim_global(var = "bio", path = "data-raw/raw/", res = 0.5)
wc = geodata::worldclim_global(var = "bio", path = "data-raw/raw/", res = 5)
elevf = geodata::elevation_global(res = 0.5, path = "data-raw/raw/")
elev = geodata::elevation_global(res = 5, path = "data-raw/raw/")

wcf = c(wcf, elevf)
wc = c(wc, elev)

##### Download sPlotOpen -------------------------------------
if(!file.exists("data-raw/raw/splotopen")){
download.file("https://idata.idiv.de/ddm/Data/DownloadZip/3474?version=5779", destfile = "data-raw/raw/splotopen.zip")
unzip(zipfile = "data-raw/raw/splotopen.zip", exdir = "data-raw/raw/splotopen")
unzip(zipfile = "data-raw/raw/splotopen/sPlotOpen.RData(2).zip", exdir = "data-raw/raw/splotopen")
}



##### Clean up and save necessary files ----------------------------------
# define region: all of south america
region = rnaturalearth::ne_countries(continent = "South America", returnclass = "sf", scale = 110)


# Predictor clean up
wc = crop(wc, region)
names(wc) = names(wc) |> str_remove(pattern = "wc2.1_5m_")
p = c("bio_1", "bio_4", "bio_5", "bio_6", "bio_8", "bio_9", "bio_12", "bio_13", "bio_14", "bio_15", "elev")
wc = wc[[p]]

# worldclim in full resolution for extracting the training data
wcf = crop(wcf, region)
names(wcf) = names(wcf) |> str_remove(pattern = "wc2.1_30s_")
wcf = wcf[[p]]
wcf$lat = terra::init(wcf, "y")
wcf$lon = terra::init(wcf, "x")


# Gather Response Variable: sPlotOpen Species Richness for South America
## see Appendix 1 of https://doi.org/10.1111/geb.13346
load("data-raw/raw/splotopen/sPlotOpen.RData")

splot = header.oa |>
#filter(Resample_1 == TRUE) |>
filter(Continent == "South America") |>
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326) |>
left_join(CWM_CWV.oa |> select(c("PlotObservationID", "Species_richness"))) |>
select(c("PlotObservationID", "GIVD_ID", "Country", "Biome",
"Species_richness")) |>
na.omit()

# extract predictor values and attach to response
splot = terra::extract(wcf, splot, ID = FALSE, bind = TRUE) |>
st_as_sf() |>
na.omit()


# only keep unique locations
## some reference sample locations are in the same predictor stack pixel
## this can lead to erroneous models and misleading validations
splotdata = splot[!duplicated(c(splot$lat, splot$lon)),]
splotdata = splotdata |> na.omit()
splotdata$lat = NULL
splotdata$lon = NULL


# save splotdata
splotdata$Biome = droplevels(splotdata$Biome)
save(splotdata, file = "data/splotdata.rda")

## save predictors for chile
chile = rnaturalearth::ne_countries(country = "Chile", returnclass = "sf")
wc = crop(wc, chile)
writeRaster(wc, "inst/extdata/predictors_chile.tif", datatype = "INT2S", overwrite = TRUE)


## Remove downloaded data
unlink("data-raw/raw", recursive = TRUE)
Binary file added data/cookfarm.rda
Binary file not shown.
Binary file modified data/splotdata.rda
Binary file not shown.
37 changes: 37 additions & 0 deletions man/cookfarm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e09c880

Please sign in to comment.