Unit 1 Practice answers
-GEOG246-346
- - - - - - - - - -1 Module 1 practice answers
-N/A
-2 Module 2 practice answers
-N/A
-3 Module 3 practice answers
-3.1 Practice 1
-3.1.1 Questions
--
-
Class of
a
is integer.
-Because the vector is contained in a list, and using a single
[]
to pull out an element returns a 1-element list, so you have to use list notation to get the vector.
-a
is vector of integers,l
is a list of containing 2 integer vectors and 1 character vector.
-We applied a function that is the fourth element of list
l2
to the integer vectorf
that is the second element ofl2
.
-
3.1.2 Code
--
-
-
<- 20:30
- a <- letters b
-
-
-
names(a) <- b[1:length(a)]
- a
-
-
-
<- list(a = a, b = b) l
-
-
-
>= 26]
- a[a c(1, 7)]
- a[c(length(a) - 1, length(a))] a[
-
-
-
%in% c("a", "c", "g")] b[b
-
-
-
1]]
- l[[1]
- l[$a[l$a < 25]
- l"a"]][l[["a"]] == 25]
- l[[2]][l[[2]] %in% c("d", "e", "f")] l[[
3.2 Practice 2
-3.2.1 Questions
--
-
The vector is coerced to character.
-The whole
matrix
is coerced to character. Ifdata.frame
, onlym$b
is coerced to character.
--
-
- Have to use both column and row indices to attract row, col subset; -
- Can use list notation for
data.frame
(e.g.m$b
), but not formatrix
.
-
-
3.2.2 Code
--
-
-
<- cbind(1:10, 11:20, 21:30) m
-
-
-
4:5, 2:3] m[
-
-
-
colnames(m) <- letters[1:3]
-
-
-
"b"] > 14 & m[, "b"] <= 18, ] m[m[,
-
-
-
<- as.data.frame(m) d
-
-
-
$a > 4, "a"] <- -1 d[d
-
-
-
$c <- letters[1:10] d
-
-
-
<- list(m, d)
- l 2]][2:3, ] l[[
-
-
-
%>% filter(b >= 14 & b <= 18) d
3.3 Practice 3
-3.3.1 Questions
--
-
1 and 5 pulled from
b
and multiplied in sequence onm
by row then column.
-Would multiply 1st columns of each, 2nd columns of each, and 3rd columns of each.
-d
row 1 columns 1 to 3 multiplied by the value from the last row, first column ofm
.
-
3.3.2 Code
--
-
Not shown here due to length
-
-
sin(m)
-cos(m)
-
-
-
rowSums(d[, 1:3])
-colSums(d[, 1:3])
-rowMeans(d[, 1:3])
-colMeans(d[, 1:3])
3.4 Practice 4
-3.4.1 Code
--
-
-
It adds an NA to print statement for the 5th letter of the alphabet. Expand sscript
by adding another “th” to the vector, so it has 5 elements.
<- c("st", "nd", "rd", "th", "th") # vector of superscripts
- sscript for(i in 1:5) { # for loop with iterator i over vector 1:4
-<- paste0(letters[i], " is the ", i, sscript[i],
- stmnt " letter in the alphabet")
- print(stmnt) # print statement
- }
-
-
-
Turns 4 of the letters red and adds axes to plots
-<- c("st", "nd", "rd", "th") # vector of superscripts
- sscript par(mfrow = c(1, 4), mar = c(0, 0, 1, 0.5))
-for(i in 1:4) {
-<- paste0(letters[i], " is the ", i, sscript[i],
- stmnt " letter in the alphabet")
- plot(1:4, rep(3, 4), ylim = c(1, 5), pch = letters[1:4], #axes = FALSE,
- xlab = "", ylab = "", main = stmnt, cex = 2)
- # points(i, 3, pch = letters[i], col = "red", cex = 2)
- }
-
-
-
# Chunk 26
-for(i in 1:10) {
-if(i < 5) { # condition 1
- print(paste(i, "is less than", i + 1))
- else if(i >= 3 & i <= 8) { # condition 2
- } print(paste(i, "is between", i - 1, "and", i + 1))
- else { # remaining conditions
- } print(paste(i, "is greater than", i - 1))
-
- } }
-
-
-
Create a for
loop that iterates over a vector 1:20. Insert a condition into it such that it only prints out a result when the iterator’s value is 11.
for(i in 1:20) {
-if(i == 11) print(i)
- }
3.5 Practice 5
-3.5.1 Code
--
-
-
Turns the summary of “a” variables to NA.
-<- list(data.frame(a = 1:10, b = 21:30),
- dat_list data.frame(a = 31:40, b = 41:50),
- data.frame(a = 51:60, b = 61:70))
- <- function(x) {
- dat_modify 1:3, 1] <- 999
- x[return(x)
-
- }<- lapply(dat_list, dat_modify)
- dat_list
- dat_list
-<- c(dat_list, mean) # add another element to dat_list
- dat_list2 lapply(1:length(dat_list2), function(x) { # x <- 1
-<- dat_list2[[x]] # extract element of list
- d if(is.data.frame(d)) { # check if it is a data.frame
- == 999] <- NA # convert any 999 values to NA
- d[d <- c(colSums(d, na.rm = FALSE), # column sums, dropping NAs
- o "total" = sum(d, na.rm = TRUE)) # sum dropping NAs
- else { # if it is not a data.frame, make an error statement
- } <- paste("Operation not valid for a", class(d))
- o
- }return(o) # return result
- })
-
-
-
Can’t coerce list to double.
-<- list(mean, sd, range)
- flist lapply(1:3, function(x) flist[[x]](dat_list[[1]]))
-
-
-
lapply(dat_list, function(x) mean(unlist(x)))
-lapply(dat_list, function(x) sd(unlist(x)))
-
-
-
sapply(dat_list, function(x) mean(unlist(x)))
-sapply(dat_list, function(x) sd(unlist(x)))
-
-
-
lapply(dat_list, function(x) sum(x[1:2, 2]))
4 Module 4 practice answers
-4.1 Practice 1
-4.1.1 Questions
--
-
A
tibble
is an enhanceddata.frame
. Among other features, it provides more information on data types it contains when printing. It can be treated exactly like adata.frame
though, in terms of indexing and other operations.
-Base methods:
tb_a$a
;tb_a[["a"]]
;tb_a[, "a"]
. tidyverse, withtb_a %>% dplyr::select(a)
. Note that that gives back atibble
(ordata.frame
, if you have adata.frame
). To get a vector back, usetb_a %>% dplyr::pull(a)
-The data are messy, because the “column headers are values, not variable names” (see here). We would gather the data in the month rows, setting the month as the key and the mean flu cases as the value.
-inner_join
preserves just the values in x and drops non-matching rows from y.left_join
fills non-matching values in y with NAs.right_join
preserves values in y dropping non-matching values in x.full_join
preserves all non-matching rows in both x and y, filling NAs into non-matched rows.
-
4.1.2 Code
--
-
-
set.seed(1)
-<- tibble(a = sample(1:10), b = rnorm(10))
- dat <- "/path/where/you/want/to/write" # REPLACE THIS WITH YOUR OWN!!!!
- td ::write_csv(dat, path = file.path(td, "dummy.csv")) readr
-
-
-
<- "/path/where/you/want/to/write" # REPLACE THIS WITH YOUR OWN!!!!
- td ::read_csv(file.path(td, "dummy.csv")) readr
-
-
-
Recreate tibble
s first
# Chunk 13
-set.seed(1)
-<- tibble(v1 = paste0("N", 1:5), v2 = rnorm(5))
- t1 <- tibble(v1 = paste0("N", 1:5), v3 = runif(5))
- t2 <- tibble(v1 = paste0("N", 1:7), v4 = sample(1:100, 7))
- t3 # v5 = letters[sample(1:26, 7)])
- <- tibble(v1 = paste0("N", c(1:2, 4:7, 11)),
- t4 v5 = letters[sample(1:26, 7)])
Then do joins:
-left_join(t1, t2) %>% left_join(., t3) %>% left_join(., t4)
-right_join(t1, t2) %>% right_join(., t3) %>% right_join(., t4)
-
-
-
left_join(t1, t2) %>% left_join(., t3) %>% left_join(., t4) %>% arrange(v5)
-right_join(t1, t2) %>% right_join(., t3) %>% right_join(., t4) %>%
-arrange(desc(v5))
-
-
-
<- dir(system.file("extdata/", package = "geospaar"), pattern = "FAOSTAT",
- fs full.names = TRUE)
- <- lapply(fs, readr::read_csv)
- crops <- do.call(rbind, lapply(crops, function(x) {
- crops_df %>% dplyr::select(Item, Area, Element, Year, Value) %>%
- x pivot_wider(names_from = Element, values_from = Value) %>%
- rename(crop = Item, country = Area, year = Year,
- harv_area = `Area harvested`, prod = Production)
-
- }))<- crops_df %>% mutate(yield = prod / harv_area)
- crop_ylds <- crop_ylds %>%
- crop_ylds mutate(country = ifelse(country == "South Africa", "ZAF", country)) %>%
- mutate(country = ifelse(country == "Zambia", "ZMB", country)) %>%
- mutate(harv_km2 = harv_area / 100)
-
-
-
%>% rename(harv_area_km2 = harv_km2) crop_ylds
-
-
-
<- tibble(v1 = 1:10, v2 = 11:20) %>%
- my_tb rbind(., tibble(v1 = 11:20, v2 = 21:30)) %>% mutate(v3 = v2^2) %>%
- arrange(-v3)
-
-
-
%>% slice(1, 10, 17) %>% dplyr::select(v2, v3) my_tb
4.2 Practice 2: Analysis
-4.2.1 Questions
--
-
dplyr::filter
-group_by(crop, country, y2k)
is doing the splitting, on crop type, then country, and then year.summarize(...)
is doing the apply using amean
. There is no combine line, as it is implicit.
-Chunk 30 adds a
filter
for crop type (selecting out maize), and then simply groups on the y2k variable.
-It doesn’t work when the output of the analysis is not tabular/a list, as with
cor.test
andlm
. We can overcome this by 1) creating individual functions that reproduce the component outputs of the analysis (e.g. Chunk 33) and these as a list of functions usingfuns
tosummarise_all
, 2) doing the splits outside of the pipeline (e.g. Chunk 36), or 3) using functions such asdo
andbroom::tidy
within the pipeline (e.g. Chunk 39).
-
4.2.2 Code
--
-
-
%>% filter(crop == "sorghum" & country == "ZAF" & year >= 2000)
- crop_ylds $crop == "sorghum" & crop_ylds$country == "ZAF" &
- crop_ylds[crop_ylds$year >= 2000, ] crop_ylds
-
-
-
%>% filter(crop == "sorghum" & country == "ZAF" & year >= 2000) %>%
- crop_ylds select(prod, harv_area, yield) %>% summarise_all(funs(mean, sd))
-
-
-
%>% group_by(crop, country) %>% select(prod, harv_area) %>%
- crop_ylds summarise_all(funs(mean, sd))
-
-
-
%>% filter(crop == "maize" & country == "ZMB") %>%
- crop_ylds select(yield, harv_area) %>% cor()
-
-<- crop_ylds %>% filter(crop == "maize" & country == "ZMB")
- dat cor.test(dat$harv_area, dat$yield)
-
-
-
%>% filter(crop == "maize") %>% summarize(mu_yld = mean(yield))
- crop_ylds # 2.07 t/ha
-
-
-
South Africa shows larger yields gains (0.061 t/ha/yr versus 0.03 t/ha/yr)
-summary(lm(yield ~ year,
-data = crop_ylds %>% filter(crop == "maize" & country == "ZMB")))
- summary(lm(yield ~ year,
-data = crop_ylds %>% filter(crop == "maize" & country == "ZAF")))
-
-%>% filter(crop == "maize" & country == "ZMB") %>%
- crop_ylds lm(yield ~ year, data = .) %>% summary()
- %>% filter(crop == "maize" & country == "ZAF") %>%
- crop_ylds lm(yield ~ year, data = .) %>% summary()
-
-
-
%>% filter(crop != "sorghum") %>% group_by(crop, country) %>%
- crop_ylds do(prod_ha_lm = lm(yield ~ year, data = .)) %>%
- ::tidy(., prod_ha_lm) broom
4.3 Practice 3: Visualization
-4.3.1 Questions
--
-
ggplot2
is built fromgrid
graphics, and is based on an underlying visualization philosophy. It builds up graphics objects using the+
operator, and easily does splits within the data using the “color” argument withinaes
and/or thefacet_grid
function.graphics
plots can be faster to implement for exploratory analysis,ggplot2
has more attractive, presentation-grade defaults.
-Because the syntax used in
graphics
plots is used in many of the plotting functions developed for spatial packages, including newer ones such assf
andstars
.
-Each of the three plots takes the axis labels exactly as they are specified to the axis arguments (either as they were specified in the formula in the case of Chunk 40 or to the “x” and “y” arguments in Chunk 41). You can change the names using the “xlab” and “ylab” arguments.
-Using “col”, “pch”, and “cex” arguments.
-You have to add the
.
to the “data” argument ofplot
, e.g.dat %>% plot(y ~ x, data = .)
-You get just an empty grey background–
ggplot
won’t plot anything without ageom_*
function added to theggplot
object.
-
4.3.2 Code
--
-
-
%>% filter(crop == "sorghum") %>%
- crop_ylds ggplot() + geom_histogram(aes(x = yield), bins = 15) +
- ggtitle("Distribution of sorghum yields")
- %>% filter(crop == "sorghum") %>%
- crop_ylds ggplot() + geom_histogram(aes(x = yield), bins = 15, fill = "red") +
- ggtitle("Distribution of sorghum yields")
-
-
-
%>% filter(crop == "wheat" & country == "ZAF") %>%
- crop_ylds plot(harv_area ~ year, data = ., pch = 16, col = "blue",
- xlab = "", ylab = "Harvested area (ha)",
- main = "South Africa wheat (1961-2017)")
- %>% filter(crop == "wheat" & country == "ZAF") %>%
- crop_ylds plot(harv_area ~ year, data = ., pch = 16, type = "l", col = "blue",
- xlab = "", ylab = "Harvested area (ha)",
- main = "South Africa wheat (1961-2017)")
-
-
-
%>% filter(crop == "wheat" & country == "ZAF") %>%
- crop_ylds ggplot() + geom_point(aes(year, harv_area), col = "blue") +
- xlab("") + ylab("Harvested area (ha)") +
- ggtitle("South Africa wheat (1961-2017)")
- %>% filter(crop == "wheat" & country == "ZAF") %>%
- crop_ylds ggplot() + geom_line(aes(year, harv_area), col = "blue") +
- xlab("") + ylab("Harvested area (ha)") +
- ggtitle("South Africa wheat (1961-2017)")
-
-
-
%>% filter(crop == "wheat") %>%
- crop_ylds ggplot() + geom_line(aes(year, harv_area, color = country)) +
- scale_color_manual(values = c("red", "blue")) +
- xlab("") + ylab("Harvested area (ha)") +
- ggtitle("Wheat (1961-2017)")
-
-# extra
-%>% filter(crop == "wheat") %>%
- crop_ylds ggplot() + geom_line(aes(year, log10(harv_area), color = country)) +
- scale_color_manual(values = c("red", "blue")) +
- xlab("") + ylab("Harvested area (ha)") +
- ggtitle("Wheat (1961-2017)")
-
-
-
%>% filter(crop == "wheat" & country == "ZAF") %>%
- crop_ylds ggplot() + geom_point(aes(year, harv_area)) +
- geom_smooth(aes(year, harv_area)) +
- xlab("") + ylab("Harvested area (ha)") +
- ggtitle("South African wheat (1961-2017)")
-
-
-
# ggplot2
-%>% filter(crop == "wheat" & country == "ZMB") %>%
- crop_ylds ggplot() +
- geom_histogram(aes(x = harv_area), bins = 10, col = "black", fill = "blue") +
- xlab("Harvested area (ha)") + ggtitle("Zambian Wheat (1961-2017)")
-
-# hist
-# with dplyr
-%>% filter(crop == "wheat" & country == "ZMB") %>% pull(harv_area) %>%
- crop_ylds hist(., main = "Zambian Wheat (1961-2017)", xlab = "Harvested area (ha)",
- col = "blue")
- # with base subsetting
-hist(crop_ylds$harv_area[crop_ylds$crop == "wheat" &
-$country == "ZMB"],
- crop_yldsmain = "Zambian Wheat (1961-2017)", xlab = "Harvested area (ha)",
- col = "blue")
-
-
-
%>% filter(country == "ZAF") %>%
- crop_ylds ggplot() + geom_point(aes(x = year, y = harv_area)) +
- geom_smooth(aes(x = year, y = harv_area)) +
- facet_grid(cols = vars(crop)) +
- scale_color_manual(values = c("red", "blue")) +
- ylab("Yield (tons/ha)") + xlab("")
-
-
Unit 2 Practice answers
-GEOG246-346
- - - - - - - - - -1 Module 1 practice answers
-1.1 Practice 1
-1.1.1 Questions
--
-
Answers here.
-Assuming the
tibble
has x and y or lat/long coordinates, you apply the functionst_as_sf
with the “coords” argument set to specify which columns contain the x and y coordinates.
-sf::plot
by default plots one panel per variable. You can create a single panel by specifying the variable you want, or by using thest_geometry
argument to strip out the geometry from object. It also prevent distortions that sometimes occur when overlaying subsequent features on top of a base map.
-For a single point you provide an x and y coordinate, otherwise you give an input matrix containing x and y coordinates. A polygon requires that the last point pair in the matrix is the same as the first point pair, to close the polygon.
-
1.1.2 Code
--
-
-
%>% st_geometry()
- farmers st_geometry(farmers)
-
-
-
st_crs(farmers) <- st_crs(districts)
-# p <- "path/to/your/project/notebooks/data"
-<- "~/Desktop"
- p st_write(farmers, dsn = file.path(p, "farmers.sqlite"))
-rm(farmers)
-st_read(file.path(p, "farmers.sqlite"))
-
-
-
class(roads)
-str(roads)
-class(districts)
-str(districts)
-
-
-
plot(roads %>% st_geometry(), col = "blue")
-
-
-
plot(districts %>% select(distName), main = "Zambia Districts")
-
-
-
<- st_multipoint(x = cbind(x = c(27, 28, 29), y = c(-13, -14, -15)))
- pts plot(districts %>% st_geometry(), col = "grey")
-plot(pts, add = TRUE, col = "orange", pch = 16)
1.2 Practice 2
-1.2.1 Questions
--
-
At least with this example, pretty negligible–well less than 1% mean absolute error. It might matter more in other places and with other scales though. The reason
st_area
knows how to estimate areas is because it invokeslwgeom::st_geod_area
, which calculates a geodetic surface area.
-Because for the time being
sf::plot
is a fair bit faster. However, this recent twitter thread suggests that may change soon.
-By using
mutate
withcut
that has break values based on those properties. In our example, we found the breaks usingquantile
and different probabilities/percentile levels that creating tertiles of area.
-
1.2.2 Code
--
-
-
set.seed(1)
-%>% sample_n(20) %>% st_area() %>% units::set_units("ha") %>% mean() districts
-
-
-
set.seed(1)
-%>% sample_n(100) %>% st_length() %>% units::set_units("km") %>% mean() roads
-
-
-
plot(st_geometry(districts), col = "lightgrey")
-set.seed(1)
-%>% filter(season == 2) %>% sample_n(200) %>% st_geometry() %>%
- farmers plot(col = "red", pch = 20, add = TRUE)
-
-
-
%>% st_transform(st_crs(roads)) %>% st_geometry() %>%
- districts plot(col = "lightgrey")
- %>%
- roads mutate(length = as.numeric(st_length(.) / 1000)) %>%
- filter(length > 50 & length < 100) %>% st_geometry() %>%
- plot(col = "red", pch = 20, add = TRUE)
-
-
-
<- function(x) quantile(x, probs = seq(0, 1, 0.1))
- deciles <- districts %>% mutate(area = as.numeric(st_area(.)) / 10^6) %>%
- dist_deciles mutate(acls = cut(area, breaks = deciles(area), include.lowest = TRUE)) %>%
- group_by(acls) %>% summarize(sum_area = sum(area))
-
- dist_deciles#
-# #2
-<- heat.colors(10)
- cols par(mar = rep(0, 4))
-plot(st_geometry(dist_deciles), col = cols)
-legend(x = "bottomright", pch = 15, col = cols, bty = "n",
-legend = paste0(1:10, c("st", "nd", "rd", rep("th", 7))))
1.2.3 Practice 3
-1.2.3.1 Questions
--
-
It changes features from one type to another (e.g. POLYGON to MULTIPOLYGON), either one specified by the user or the simplest possible common feature, if left unspecified. Casting is sometimes necessary to avoid mixed feature types that cause failures for subsequent operations.
-st_union
runs under the hood ofsummarise.sf
, so asummarize
operation on ansf
will result in a merged/dissolved set of spatial features.
-It affects the order of the fields in the resulting unioned
sf
object–the fields from the object passed to the “x” argument appear first.
-
1.2.3.2 Code
--
-
-
<- cbind("x" = c(27, 27.5, 27.5, 27, 27),
- coords "y" = c(-13, -13, -13.5, -13.5, -13))
- <- st_polygon(x = list(coords)) %>% st_sfc %>% st_sf(ID = 1, crs = 4326)
- pol2
-par(mar = rep(0, 4))
-plot(st_geometry(districts), col = "grey")
-plot(pol2, col = "blue", add = TRUE)
-
-
-
<- st_intersection(pol2, districts)
- pol2_int_dists 1] %>% plot(col = "grey", main = NULL)
- districts[2] %>% plot(col = rainbow(nrow(pol2_int_dists)), add = TRUE) pol2_int_dists[
-
-
-
pol2
is nearly 26 hectares larger
%>% st_area() %>% as.numeric() %>% sum() / 10000 -
- pol2_int_dists %>% st_area() %>% as.numeric() %>% sum() / 10000 pol2
-
-
-
st_difference(districts, pol2)[2] %>% plot(col = "grey", main = NULL)
-
-
-
# Try compare the
-set.seed(1)
-%>% filter(season == 2) %>%
- farmers_alb sample_n(size = 5) %>% st_buffer(dist = 30000) %>% st_geometry %>% plot()
-
-# With
-set.seed(1)
-%>% filter(season == 2) %>%
- farmers_alb st_sample(size = 5) %>% st_buffer(dist = 30000) %>% st_geometry %>% plot()
-
-
-
<- roads %>% filter(as.numeric(st_length(.)) / 1000 > 400)
- roads_gt400 par(mar = rep(0, 4))
-st_transform(districts, st_crs(roads)) %>% st_geometry %>%
-plot(col = "grey", main = NULL)
- %>% st_buffer(25000) %>% plot(col = "green", add = TRUE) roads_gt400
-
-
-
par(mar = rep(0, 4))
-st_transform(districts, st_crs(roads)) %>% st_geometry %>%
-plot(col = "grey", main = NULL)
- %>% st_buffer(25000) %>% plot(col = "tan", add = TRUE)
- roads_gt400 set.seed(1)
-%>% st_buffer(25000) %>%
- roads_gt400 st_sample(size = rep(20, nrow(.)), exact = TRUE) %>%
- plot(col = "red", pch = 20, add = TRUE)
2 Module 2 practice answers
-2.1 Practice 1
-2.1.1 Questions
--
-
raster
uses the S4 object-oriented system, wheresf
uses S4. Slots in S4 objects are accessed using the@
operator.
-brick
, becauseraster
only allows you to read and write a single layer.
-stack
organizes multiple rasters that might be stored in separation locations into a single multi-layer object, whereasbrick
requires a single file on disk.bricks
take longer to create, but are faster to work with once they exist.
-The output of
raster
’s vectorization functions aresp
objects, so you have to convert them tosf
objects usingst_as_sf
.
-
2.1.2 Code
--
-
-
# recreate r, r2, r3
-<- extent(c("xmin" = 27, "xmax" = 29, "ymin" = -16, "ymax" = -14))
- e <- raster(x = e, res = 0.25, crs = crs(districts))
- r set.seed(1)
-values(r) <- sample(1:100, size = ncell(r), replace = TRUE) # 3
-<- r > 50
- r2 <- r
- r3 set.seed(1)
-values(r3) <- rnorm(n = ncell(r3), mean = 10, sd = 2)
-
-#
-<- r3
- r4 set.seed(1)
-values(r4) <- runif(n = ncell(r4), 0, 1)
-<- r4 > 0.5
- r5
-<- list(r, r2, r3, r4, r5) %>% stack
- s2 names(s2) <- c("r", "r2", "r3", "r4", "r5")
-plot(s2)
-
-
- We use
tempdir()
here, but you should use yournotebooks/data
folder.
-
<- brick(s2, file = file.path(tempdir(), "b2.tif")) b
-
-
-
<- raster(x = extent(districts), crs = crs(districts), res = 0.2)
- zamr3 values(zamr3) <- 1:ncell(zamr3)
-
-<- farmers %>% distinct(uuid, .keep_all = TRUE) %>% select(x, y) %>%
- farmersr mutate(count = 1) %>% st_as_sf(coords = c("x", "y")) %>%
- rasterize(x = ., y = zamr3, field = "count", fun = sum)
-
-par(mar = c(0, 0, 1, 4))
-%>% st_union %>% plot(col = "grey", border = "grey")
- districts plot(farmersr, add = TRUE)
-
-
-
<- projectRaster(from = zamr, res = 20000, crs = crs(roads),
- zamr_alb method = "ngb")
- <- projectRaster(from = farmersr, to = zamr_alb,
- farmersr_alb method = "bilinear")
-
-par(mar = c(0, 0, 1, 4), mfrow = c(1, 2))
-%>% st_union %>% plot(col = "grey", border = "grey")
- districts plot(farmersr, add = TRUE)
-%>% st_transform(crs(roads)) %>% st_union %>%
- districts plot(col = "grey", border = "grey")
- plot(farmersr_alb, add = TRUE)
-
-
-
par(mar = c(0, 0, 0, 0))
-%>% rasterToPolygons(dissolve = TRUE) %>% st_as_sf %>%
- farmersr plot(main = NULL)
2.2 Practice 2
-2.2.1 Questions
--
-
cellStats
provides summary statistics over an entire layer;zonal
calculates statistics within pre-defined zones;focal
calculates statistics within a moving window.
-Use
method = bilinear
.
-You don’t. You have to resample them to a common resolution and extent first. And then you have to
stack
them.
-
2.2.2 Code
--
-
-
as.Date("10-11-2017", "%m-%d-%Y")
-as.Date("10-11-17", "%m-%d-%y")
-as.Date("101117", "%m%d%y")
-as.Date("10112017", "%m%d%Y")
-::mdy("10-11-2017")
- lubridate::as_date("20171011") lubridate
-
-
-
<- farmers %>% distinct(uuid, .keep_all = TRUE) %>%
- farmersr2 mutate(count = 1) %>% select(x, y, count) %>%
- st_as_sf(coords = c("x", "y")) %>%
- rasterize(., distsr, field = "count", fun = sum)
-
-
-
zonal(farmersr2, distsr, fun = sum) %>% data.frame %>%
-subs(distsr, .) %>% plot_noaxes
-
-
-
<- matrix(1, nrow = 3, ncol = 3)
- wmat3 <- matrix(1, nrow = 5, ncol = 5)
- wmat5 <- list(sd3 = focal(x = chirpsz[[20]], w = wmat3, fun = sd),
- fstack sd5 = focal(x = chirpsz[[20]], w = wmat5, fun = sd),
- max3 = focal(x = chirpsz[[20]], w = wmat3, fun = max),
- max5 = focal(x = chirpsz[[20]], w = wmat5, fun = max)) %>% stack
- %>% plot_noaxes fstack
-
-
-
<- chirpsz[[1]] %>% crop(., extent(districts[57, ]))
- chirps_d57 <- list(d1 = disaggregate(chirps_d57, fact = 5),
- s d2 = disaggregate(chirps_d57, fact = 5, method = "bilinear")) %>%
-
- stackplot_noaxes(s)
-
-
-
<- lapply(list(mean, cv, median), function(x) {
- s calc(chirpsz, fun = x)
- %>% stack
- }) names(s) <- c("Mean", "CV", "Median")
-plot_noaxes(s, nr = 1)
2.3 Practice 3
-2.3.1 Questions
--
-
Using
cut
with vector of breakpoints (e.g. quantiles), or usereclassify
with a reclassification matrix.
-sampleRandom
,sampleStratified
. There is alsosampleRegular
.
-
2.3.2 Code
--
-
-
<- calc(chirpsz, fun = sd) chirps_sd
-
-
-
< cellStats(chirps_sd, mean)) %>% plot (chirps_sd
-
-
-
quantile(raintot, probs = seq(0, 1, 0.2)) %>% cut(raintot, .) %>%
- plot_noaxes
-
-
-
set.seed(11)
-<- districts %>% sample_n(size = 15) %>% rasterize(., raintot)
- randdistsr plot_noaxes(randdistsr)
-
-<- mask(raintot, randdistsr)
- newrandrain %>% plot_noaxes newrandrain
-
-
-
set.seed(1)
-<- sampleRandom(x = newrandrain, size = 300)
- randsamp
-set.seed(1)
-<- sampleStratified(x = randdistsr, size = 300 / 15, cells = TRUE)
- ind <- newrandrain[ind[, 1]]
- stratsamp
-<- bind_rows(
- rand_rain_stats tibble(rain = randsamp, dat = "Simple"),
- tibble(rain = stratsamp, dat = "Stratified"),
- %>% drop_na
- )
-<- theme(legend.title = element_blank(), axis.text.x = element_blank(),
- bp_theme axis.ticks.x = element_blank(),
- panel.grid.major.x = element_blank(),
- panel.grid.minor.x = element_blank(),
- panel.background = element_rect(fill = "grey95"))
-
-%>% ggplot() +
- rand_rain_stats geom_boxplot(mapping = aes(y = rain, fill = dat), position = "dodge2") +
- scale_fill_manual(values = c("lightblue", "steelblue")) +
- ggtitle("Rainfall distributions") + xlab(NULL) + ylab("mm") + bp_theme
2.4 Practice 4
-2.4.1 Questions
--
-
The
area
function is your friend for this.
-Use
expression
together combined withpaste
as needed for more complex labels.
-Make
names(predstack)
matches the predictor names used by the model.
-
2.4.2 Code
--
-
-
<- districts %>% filter(ID == 42) %>% st_transform(st_crs(roads)) %>%
- demalb42 crop(demalb, .)
- <- c("slope", "aspect", "flowdir", "tri")
- vars <- stack(lapply(1:length(vars), function(x) {
- terrvars <- terrain(x = demalb42, opt = vars[x], unit = "degrees")
- tv
- }))names(terrvars) <- vars
-
-plot_noaxes(terrvars)
-
-
-
library(gstat)
-
-# #1
-<- projectRaster(from = raintot, res = 5000, crs = crs(roads))
- raintotalb names(raintotalb) <- "rain"
-<- raster(extent(raintotalb), res = res(raintotalb), crs = crs(raintotalb), vals = 1)
- r
-# lapply approach to interpolation
-<- lapply(c(250, 500, 1000), function(x) {
- idw_list set.seed(1)
- <- sampleRandom(raintotalb, size = 1000, xy = TRUE)
- rainsamp <- as.data.frame(rainsamp)
- rainsamp <- gstat(id = "rain", formula = rain ~ 1, locations = ~x + y,
- invdist data = rainsamp)
- <- interpolate(object = r, model = invdist)
- invdistr <- mask(x = invdistr, mask = raintotalb)
- invdistrmsk
- })
-<- stack(c(raintotalb, idw_list))
- idws
-<- c("CHIRPS rainfall", "1000 pt IDW", "500 pt IDW", "250 pt IDW")
- titles plot_noaxes(idws, main = titles, zlim = c(0, 150))
-
-
-
%>% filter(ID %in% seq(15, 50, 5)) %>%
- districts st_transform(st_crs(roads)) %>% st_geometry %>% st_centroid %>%
- as_Spatial(.) %>%
- distanceFromPoints(object = raintotalb, xy = .) %>%
- mask(., raintotalb) %>% plot_noaxes
-
-
-
Pretty close to results of highest density sample. Slightly higher mean error.
-# #1
-data(zamprec)
-<- projectRaster(from = zamprec, to = raintotalb)
- zamprecalb names(zamprecalb) <- "rain"
-<- resample(aggregate(x = demalb, fact = 5), y = raintotalb)
- elev
-# #2
-set.seed(1)
-<- sampleRandom(x = zamprecalb, size = 25, sp = TRUE) %>% st_as_sf
- pts <- pts %>% mutate(elev = raster::extract(x = elev, y = .))
- pts <- bind_cols(pts %>% data.frame %>% select(-geometry) %>% as_tibble,
- pts_dat st_coordinates(pts) %>% as_tibble) %>% drop_na
-
- # #3
-<- ggplot(pts_dat) + geom_point(aes(X, rain), col = "steelblue") +
- p1 ylab("Rainfall (mm)")
- <- ggplot(pts_dat) + geom_point(aes(Y, rain), col = "blue2") + ylab("")
- p2 <- ggplot(pts_dat) + geom_point(aes(elev, rain), col = "darkblue") + ylab("")
- p3 ::plot_grid(p1, p2, p3, nrow = 1)
- cowplot
-# #4
-<- lm(rain ~ X + Y + elev, data = pts_dat)
- rain_lm summary(rain_lm)
-
-# #5
-<- xFromCell(object = raintotalb, cell = 1:ncell(raintotalb))
- xs <- yFromCell(object = raintotalb, cell = 1:ncell(raintotalb))
- ys <- Y <- raintotalb
- X values(X) <- xs
-values(Y) <- ys
-
-# #6
-<- stack(X, Y, elev)
- predst names(predst) <- c("X", "Y", "elev")
-<- predict(object = predst, model = rain_lm)
- predrainr
-# #7
-<- stack(zamprecalb, predrainr, (predrainr - zamprecalb) / zamprecalb * 100)
- s <- round(cellStats(abs(zamprecalb - predrainr), mean), 1)
- mae
-<- c("'Observed' Rainfall", "Predicted Rainfall", "% Difference")
- pnames par(mfrow = c(1, 3), mar = c(0, 0, 1, 4))
-for(i in 1:3) {
-plot_noaxes(s[[i]], main = pnames[i])
- if(i %in% 1:2) {
- %>% st_geometry %>%
- pts plot(pch = 20, cex = 0.2, col = "grey70", add = TRUE)
- else {
- } mtext(side = 1, line = -3, cex = 0.8,
- text = paste("Mean abs err =", mae, "mm"))
-
- } }
-
-