diff --git a/docs/index.Rmd b/docs/index.Rmd index 53dd0b7..da45240 100644 --- a/docs/index.Rmd +++ b/docs/index.Rmd @@ -9,15 +9,15 @@ knitr::opts_chunk$set(echo = TRUE) # Geospatial Analysis with R -## Spring 2023 +## Spring 2024 Updated on: `r Sys.Date()`
------------------ ------------------------------------------------------ -Instructor Michael Cecil (mcecil@clarku.edu) -Teaching assistant Arman Bajracharya +Instructor Lyndon Estes +Teaching assistant Vanchy Li ------------------ ------------------------------------------------------
@@ -41,75 +41,48 @@ The class materials were designed by Lyndon Estes and Lei Song. - [Module 2](unit1-module2.html) - R and R fundamentals - [Module 3](unit1-module3.html) - Data preparation and operation - [Module 4](unit1-module4.html) - Data manipulation and visualization - - [Unit1 practice answers](unit1-practice-answers.html) - Answers and code for Unit1 practice - [Unit 2](unit2.html) - Overview - [Module 1](unit2-module1.html) - Vector data - [Module 2a](unit2-module2a.html) - Raster data, part 1 - [Module 2b](unit2-module2b.html) - Raster data, part 2 - - [Unit2 practice answers](unit2-practice-answers.html) - Answers and code for Unit2 practice - + - [Potential Projects](projects.html) ## Course content -- Week 1 (Jan 18) +- Week 1 (Jan 17) - Setup - [Class 1 slides](class1.html) -- Week 2 (Jan 23, 25) +- Week 2 (Jan 22, 24) - Using git and Github; R packages; - - [Class 2 slides](class2.html) - - [Class 3 slides](class3.html) - Assignment 1 due (Unit 1 Module 1) -- Week 3 (Jan 30, Feb 1) +- Week 3 (Jan 29, Jan 31) - Continued setup work, RMarkdown, R ecosystem - - [Class 4 slides](class4.html) - - [Class 5 slides](class5.html) -- Week 4 (Feb 6, 8) +- Week 4 (Feb 5, 7) - R data structures, OOP, Environments, Control flow - - [Class 6 slides](class6.html) - - [Class 7 slides](class7.html) - Assignment 2 due (Unit 1 Modules 2-3) -- Week 5 (Feb 13, 15) +- Week 5 (Feb 12, 14) - tidyr universe, working with data - - [Class 8 slides](class8.html) - - [Class 9 slides](class9.html) -- Week 6 (Feb 20, 22) +- Week 6 (Feb 19, 21) - regression, plotting - - [Class 10 slides](class10.html) - - [Class 11 slides](class11.html) - Assignment 3 due (Unit 1 Module 4) -- Week 7 (Feb 27, Mar 1) +- Week 7 (Feb 19, 21) - more ggplot, intro to vector - - [Class 12 slides](class12.html) - - [Class 13 slides](class13.html) - Spring Break -- Week 8 (Mar 13, 15) +- Week 8 (Feb 26, 28) - Vector operations - - [Class 14 slides](class14.html) - - [Class 15 slides](class15.html) - Assignment 4 due (Unit 2 Module 1) -- Week 9 (Mar 20, 22) +- Week 9 (Mar 11, 13) - Raster basics - - [Class 16 slides](class16.html) - - Mar 22 online (AAG) -- Week 11 (Mar 27, 29) +- Week 11 (Mar 18, 20) - Raster analysis, neighborhoods - Mar 27 project prep (AAG) - - [Class 17 slides](class17.html) -- Week 12 (Apr 3, 5) +- Week 12 (Mar 25, 27) - Raster algebra, terrain, modeling - - [Class 18 slides](class18.html) - - [Class 19 slides](class19.html) - Assignment 5 (Unit 2 Module 2) -- Week 13-15 (Apr 10, 12, 17, 19, 24, 26, May 1) - - Project Overview due (Apr 16) - - [Class 20 slides (leaflet, plotly)](class20.html) - - [Random Forest - raster classification](wur_rf_demo.html) - - [Random Forest - ground sensors](sensor_rf_demo.html) - - [rgee - install](rgee_install.html) - - [rgee - examples](rgee_examples.html) - - [Shiny apps](rshiny.html) +- Week 13-15 (April) + - Project Overview due - Final Projects due - - May 8 + - May 5 diff --git a/docs/index.html b/docs/index.html index 22b016c..e173ac2 100644 --- a/docs/index.html +++ b/docs/index.html @@ -587,12 +587,12 @@ @@ -603,19 +603,19 @@

Geospatial Analysis with R

-

Spring 2023

-

Updated on: 2023-04-22

+

Spring 2024

+

Updated on: 2024-01-17

- + - +
InstructorMichael Cecil (mcecil@clarku.edu)Lyndon Estes
Teaching assistantArman BajracharyaVanchy Li
@@ -647,8 +647,6 @@

Course materials

operation
  • Module 4 - Data manipulation and visualization
  • -
  • Unit1 practice answers - -Answers and code for Unit1 practice
  • Unit 2 - Overview
  • Potential Projects
  • Course content

    diff --git a/docs/index.md b/docs/index.md index 26bab43..e7e2b24 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,108 +1,81 @@ # Geospatial Analysis with R -## Spring 2023 +## Spring 2024 -Updated on: 2023-04-17 +Updated on: 2024-01-17
    -| | | -|:-------------------|:------------------------------------| -| Instructor | Michael Cecil () | -| Teaching assistant | Arman Bajracharya | +| | | +|:-------------------|:-------------| +| Instructor | Lyndon Estes | +| Teaching assistant | Vanchy Li |
    ## Resources -- [Syllabus](syllabus.html) -- [Assessment Criteria](assessment.html) -- [Software installation](software-installation.html) -- [Git/GitHub](git-github.html) -- [Known bugs and fixes](bugs-fixes.html) +- [Syllabus](syllabus.html) +- [Assessment Criteria](assessment.html) +- [Software installation](software-installation.html) +- [Git/GitHub](git-github.html) +- [Known bugs and fixes](bugs-fixes.html) ## Links -- [Helpful functions](helpful_functions.html) -- [Cheatsheets](cheatsheets.html) +- [Helpful functions](helpful_functions.html) +- [Cheatsheets](cheatsheets.html) ## Course materials The class materials were designed by Lyndon Estes and Lei Song. -- [Unit 1](unit1.html) - Overview - - [Module 1](unit1-module1.html) - Reproducibility and related R - skills - - [Module 2](unit1-module2.html) - R and R fundamentals - - [Module 3](unit1-module3.html) - Data preparation and operation - - [Module 4](unit1-module4.html) - Data manipulation and - visualization - - [Unit1 practice answers](unit1-practice-answers.html) - Answers - and code for Unit1 practice -- [Unit 2](unit2.html) - Overview - - [Module 1](unit2-module1.html) - Vector data - - [Module 2a](unit2-module2a.html) - Raster data, part 1 - - [Module 2b](unit2-module2b.html) - Raster data, part 2 - - [Unit2 practice answers](unit2-practice-answers.html) - Answers - and code for Unit2 practice -- [Potential Projects](projects.html) +- [Unit 1](unit1.html) - Overview + - [Module 1](unit1-module1.html) - Reproducibility and related R + skills + - [Module 2](unit1-module2.html) - R and R fundamentals + - [Module 3](unit1-module3.html) - Data preparation and operation + - [Module 4](unit1-module4.html) - Data manipulation and visualization +- [Unit 2](unit2.html) - Overview + - [Module 1](unit2-module1.html) - Vector data + - [Module 2a](unit2-module2a.html) - Raster data, part 1 + - [Module 2b](unit2-module2b.html) - Raster data, part 2 +- [Potential Projects](projects.html) ## Course content -- Week 1 (Jan 18) - - Setup - - [Class 1 slides](class1.html) -- Week 2 (Jan 23, 25) - - Using git and Github; R packages; - - [Class 2 slides](class2.html) - - [Class 3 slides](class3.html) - - Assignment 1 due (Unit 1 Module 1) -- Week 3 (Jan 30, Feb 1) - - Continued setup work, RMarkdown, R ecosystem - - [Class 4 slides](class4.html) - - [Class 5 slides](class5.html) -- Week 4 (Feb 6, 8) - - R data structures, OOP, Environments, Control flow - - [Class 6 slides](class6.html) - - [Class 7 slides](class7.html) - - Assignment 2 due (Unit 1 Modules 2-3) -- Week 5 (Feb 13, 15) - - tidyr universe, working with data - - [Class 8 slides](class8.html) - - [Class 9 slides](class9.html) -- Week 6 (Feb 20, 22) - - regression, plotting - - [Class 10 slides](class10.html) - - [Class 11 slides](class11.html) - - Assignment 3 due (Unit 1 Module 4) -- Week 7 (Feb 27, Mar 1) - - more ggplot, intro to vector - - [Class 12 slides](class12.html) - - [Class 13 slides](class13.html) -- Spring Break -- Week 8 (Mar 13, 15) - - Vector operations - - [Class 14 slides](class14.html) - - [Class 15 slides](class15.html) - - Assignment 4 due (Unit 2 Module 1) -- Week 9 (Mar 20, 22) - - Raster basics - - [Class 16 slides](class16.html) - - Mar 22 online (AAG) -- Week 11 (Mar 27, 29) - - Raster analysis, neighborhoods - - Mar 27 project prep (AAG) - - [Class 17 slides](class17.html) -- Week 12 (Apr 3, 5) - - Raster algebra, terrain, modeling - - [Class 18 slides](class18.html) - - [Class 19 slides](class19.html) - - Assignment 5 (Unit 2 Module 2) -- Week 13-15 (Apr 10, 12, 17, 19, 24, 26, May 1) - - Project Overview due (Apr 16) - - [Class 20 slides (leaflet, plotly)](class20.html) - - [Random Forest - raster classification](wur_rf_demo.html) - - [Random Forest - ground sensors](sensor_rf_demo.html) -- Final Projects due - - May 8 +- Week 1 (Jan 17) + - Setup + - [Class 1 slides](class1.html) +- Week 2 (Jan 22, 24) + - Using git and Github; R packages; + - Assignment 1 due (Unit 1 Module 1) +- Week 3 (Jan 29, Jan 31) + - Continued setup work, RMarkdown, R ecosystem +- Week 4 (Feb 5, 7) + - R data structures, OOP, Environments, Control flow + - Assignment 2 due (Unit 1 Modules 2-3) +- Week 5 (Feb 12, 14) + - tidyr universe, working with data +- Week 6 (Feb 19, 21) + - regression, plotting + - Assignment 3 due (Unit 1 Module 4) +- Week 7 (Feb 19, 21) + - more ggplot, intro to vector +- Spring Break +- Week 8 (Feb 26, 28) + - Vector operations + - Assignment 4 due (Unit 2 Module 1) +- Week 9 (Mar 11, 13) + - Raster basics +- Week 11 (Mar 18, 20) + - Raster analysis, neighborhoods + - Mar 27 project prep (AAG) +- Week 12 (Mar 25, 27) + - Raster algebra, terrain, modeling + - Assignment 5 (Unit 2 Module 2) +- Week 13-15 (April) + - Project Overview due +- Final Projects due + - May 5 diff --git a/docs/unit1-practice-answers.html b/docs/unit1-practice-answers.html deleted file mode 100644 index 0c315d4..0000000 --- a/docs/unit1-practice-answers.html +++ /dev/null @@ -1,770 +0,0 @@ - - - - - - - - - - Unit 1 Practice answers - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - - - -

    Unit 1 Practice answers

    -

    GEOG246-346

    - - - - - - - - - -
    -
    -

    1 Module 1 practice answers

    -

    N/A

    -
    -
    -

    2 Module 2 practice answers

    -

    N/A

    -
    -
    -

    3 Module 3 practice answers

    -
    -

    3.1 Practice 1

    -
    -

    3.1.1 Questions

    -
      -
    1. Class of a is integer.

    2. -
    3. Because the vector is contained in a list, and using a single [] to pull out an element returns a 1-element list, so you have to use list notation to get the vector.

    4. -
    5. a is vector of integers, l is a list of containing 2 integer vectors and 1 character vector.

    6. -
    7. We applied a function that is the fourth element of list l2 to the integer vector f that is the second element of l2.

    8. -
    -
    -
    -

    3.1.2 Code

    -
      -
    1. -
    -
    a <- 20:30
    -b <- letters
    -
      -
    1. -
    -
    names(a) <- b[1:length(a)]
    -a
    -
      -
    1. -
    -
    l <- list(a = a, b = b)
    -
      -
    1. -
    -
    a[a >= 26]
    -a[c(1, 7)]
    -a[c(length(a) - 1, length(a))]
    -
      -
    1. -
    -
    b[b %in% c("a", "c", "g")]
    -
      -
    1. -
    -
    l[[1]]
    -l[1]
    -l$a[l$a < 25]
    -l[["a"]][l[["a"]] == 25]
    -l[[2]][l[[2]] %in% c("d", "e", "f")]
    -
    -
    -
    -

    3.2 Practice 2

    -
    -

    3.2.1 Questions

    -
      -
    1. The vector is coerced to character.

    2. -
    3. The whole matrix is coerced to character. If data.frame, only m$b is coerced to character.

    4. -
      1. -
      2. Have to use both column and row indices to attract row, col subset;
      3. -
      4. Can use list notation for data.frame (e.g. m$b), but not for matrix.
      5. -
    5. -
    -
    -
    -

    3.2.2 Code

    -
      -
    1. -
    -
    m <- cbind(1:10, 11:20, 21:30)
    -
      -
    1. -
    -
    m[4:5, 2:3]
    -
      -
    1. -
    -
    colnames(m) <- letters[1:3]
    -
      -
    1. -
    -
    m[m[, "b"] > 14 & m[, "b"] <= 18, ]
    -
      -
    1. -
    -
    d <- as.data.frame(m)
    -
      -
    1. -
    -
    d[d$a > 4, "a"] <- -1 
    -
      -
    1. -
    -
    d$c <- letters[1:10]
    -
      -
    1. -
    -
    l <- list(m, d)
    -l[[2]][2:3, ]
    -
      -
    1. -
    -
    d %>% filter(b >= 14 & b <= 18)
    -
    -
    -
    -

    3.3 Practice 3

    -
    -

    3.3.1 Questions

    -
      -
    1. 1 and 5 pulled from b and multiplied in sequence on m by row then column.

    2. -
    3. Would multiply 1st columns of each, 2nd columns of each, and 3rd columns of each.

    4. -
    5. d row 1 columns 1 to 3 multiplied by the value from the last row, first column of m.

    6. -
    -
    -
    -

    3.3.2 Code

    -
      -
    1. Not shown here due to length

    2. -
    3. -
    -
    sin(m)
    -cos(m)
    -
      -
    1. -
    -
    rowSums(d[, 1:3])
    -colSums(d[, 1:3])
    -rowMeans(d[, 1:3])
    -colMeans(d[, 1:3])
    -
    -
    -
    -

    3.4 Practice 4

    -
    -

    3.4.1 Code

    -
      -
    1. -
    -

    It adds an NA to print statement for the 5th letter of the alphabet. Expand sscript by adding another “th” to the vector, so it has 5 elements.

    -
    sscript <- c("st", "nd", "rd", "th", "th")  # vector of superscripts  
    -for(i in 1:5) {  # for loop with iterator i over vector 1:4
    -  stmnt <- paste0(letters[i], " is the ", i, sscript[i],
    -                  " letter in the alphabet")
    -  print(stmnt)  # print statement
    -}
    -
      -
    1. -
    -

    Turns 4 of the letters red and adds axes to plots

    -
    sscript <- c("st", "nd", "rd", "th")  # vector of superscripts  
    -par(mfrow = c(1, 4), mar = c(0, 0, 1, 0.5))
    -for(i in 1:4) {  
    -  stmnt <- paste0(letters[i], " is the ", i, sscript[i],
    -                  " letter in the alphabet")
    -  plot(1:4, rep(3, 4), ylim = c(1, 5), pch = letters[1:4], #axes = FALSE,
    -       xlab = "", ylab = "", main = stmnt, cex = 2) 
    -  # points(i, 3, pch = letters[i], col = "red", cex = 2)
    -}
    -
      -
    1. -
    -
    # Chunk 26
    -for(i in 1:10) {
    -  if(i < 5) {  # condition 1
    -    print(paste(i, "is less than", i + 1))
    -  } else if(i >= 3 & i <= 8) {  # condition 2
    -    print(paste(i, "is between", i - 1, "and", i + 1))
    -  } else {  # remaining conditions
    -    print(paste(i, "is greater than", i - 1))
    -  }
    -}
    -
      -
    1. -
    -

    Create a for loop that iterates over a vector 1:20. Insert a condition into it such that it only prints out a result when the iterator’s value is 11.

    -
    for(i in 1:20) {
    -  if(i == 11) print(i)
    -}
    -
    -
    -
    -

    3.5 Practice 5

    -
    -

    3.5.1 Code

    -
      -
    1. -
    -

    Turns the summary of “a” variables to NA.

    -
    dat_list <- list(data.frame(a = 1:10, b = 21:30), 
    -                 data.frame(a = 31:40, b = 41:50),
    -                 data.frame(a = 51:60, b = 61:70))
    -dat_modify <- function(x) {
    -  x[1:3, 1] <- 999
    -  return(x)
    -}
    -dat_list <- lapply(dat_list, dat_modify)
    -dat_list
    -
    -dat_list2 <- c(dat_list, mean)  # add another element to dat_list
    -lapply(1:length(dat_list2), function(x) {  # x <- 1
    -  d <- dat_list2[[x]]  # extract element of list
    -  if(is.data.frame(d)) {  # check if it is a data.frame
    -    d[d == 999] <- NA  # convert any 999 values to NA 
    -    o <- c(colSums(d, na.rm = FALSE), # column sums, dropping NAs
    -           "total" = sum(d, na.rm = TRUE)) #  sum dropping NAs
    -  } else { # if it is not a data.frame, make an error statement
    -    o <- paste("Operation not valid for a", class(d))  
    -  }
    -  return(o)  # return result
    -})
    -
      -
    1. -
    -

    Can’t coerce list to double.

    -
    flist <- list(mean, sd, range)
    -lapply(1:3, function(x) flist[[x]](dat_list[[1]]))
    -
      -
    1. -
    -
    lapply(dat_list, function(x) mean(unlist(x)))
    -lapply(dat_list, function(x) sd(unlist(x)))
    -
      -
    1. -
    -
    sapply(dat_list, function(x) mean(unlist(x)))
    -sapply(dat_list, function(x) sd(unlist(x)))
    -
      -
    1. -
    -
    lapply(dat_list, function(x) sum(x[1:2, 2]))
    -
    -
    -
    -
    -

    4 Module 4 practice answers

    -
    -

    4.1 Practice 1

    -
    -

    4.1.1 Questions

    -
      -
    1. A tibble is an enhanced data.frame. Among other features, it provides more information on data types it contains when printing. It can be treated exactly like a data.frame though, in terms of indexing and other operations.

    2. -
    3. Base methods: tb_a$a; tb_a[["a"]]; tb_a[, "a"]. tidyverse, with tb_a %>% dplyr::select(a). Note that that gives back a tibble (or data.frame, if you have a data.frame). To get a vector back, use tb_a %>% dplyr::pull(a)

    4. -
    5. The data are messy, because the “column headers are values, not variable names” (see here). We would gather the data in the month rows, setting the month as the key and the mean flu cases as the value.

    6. -
    7. inner_join preserves just the values in x and drops non-matching rows from y. left_join fills non-matching values in y with NAs. right_join preserves values in y dropping non-matching values in x. full_join preserves all non-matching rows in both x and y, filling NAs into non-matched rows.

    8. -
    -
    -
    -

    4.1.2 Code

    -
      -
    1. -
    -
    set.seed(1)
    -dat <- tibble(a = sample(1:10), b = rnorm(10))
    -td <- "/path/where/you/want/to/write" # REPLACE THIS WITH YOUR OWN!!!!
    -readr::write_csv(dat, path = file.path(td, "dummy.csv"))
    -
      -
    1. -
    -
    td <- "/path/where/you/want/to/write" # REPLACE THIS WITH YOUR OWN!!!!
    -readr::read_csv(file.path(td, "dummy.csv"))
    -
      -
    1. -
    -

    Recreate tibbles first

    -
    # Chunk 13
    -set.seed(1)
    -t1 <- tibble(v1 = paste0("N", 1:5), v2 = rnorm(5))
    -t2 <- tibble(v1 = paste0("N", 1:5), v3 = runif(5))
    -t3 <- tibble(v1 = paste0("N", 1:7), v4 = sample(1:100, 7))
    -             # v5 = letters[sample(1:26, 7)])
    -t4 <- tibble(v1 = paste0("N", c(1:2, 4:7, 11)), 
    -             v5 = letters[sample(1:26, 7)])
    -

    Then do joins:

    -
    left_join(t1, t2) %>% left_join(., t3) %>% left_join(., t4)
    -right_join(t1, t2) %>% right_join(., t3) %>% right_join(., t4)
    -
      -
    1. -
    -
    left_join(t1, t2) %>% left_join(., t3) %>% left_join(., t4) %>% arrange(v5)
    -right_join(t1, t2) %>% right_join(., t3) %>% right_join(., t4) %>% 
    -  arrange(desc(v5))
    -
      -
    1. -
    -
    fs <- dir(system.file("extdata/", package = "geospaar"), pattern = "FAOSTAT", 
    -          full.names = TRUE)
    -crops <- lapply(fs, readr::read_csv)
    -crops_df <- do.call(rbind, lapply(crops, function(x) {
    -  x %>% dplyr::select(Item, Area, Element, Year, Value) %>% 
    -    pivot_wider(names_from = Element, values_from = Value) %>% 
    -    rename(crop = Item, country = Area, year = Year, 
    -           harv_area = `Area harvested`, prod = Production)
    -}))
    -crop_ylds <- crops_df %>% mutate(yield = prod / harv_area)
    -crop_ylds <- crop_ylds %>%     
    -  mutate(country = ifelse(country == "South Africa", "ZAF", country)) %>%  
    -  mutate(country = ifelse(country == "Zambia", "ZMB", country)) %>% 
    -  mutate(harv_km2 = harv_area / 100)
    -
      -
    1. -
    -
    crop_ylds %>% rename(harv_area_km2 = harv_km2)
    -
      -
    1. -
    -
    my_tb <- tibble(v1 = 1:10, v2 = 11:20) %>% 
    -  rbind(., tibble(v1 = 11:20, v2 = 21:30)) %>% mutate(v3 = v2^2) %>% 
    -  arrange(-v3)
    -
      -
    1. -
    -
    my_tb %>% slice(1, 10, 17) %>% dplyr::select(v2, v3)
    -
    -
    -
    -

    4.2 Practice 2: Analysis

    -
    -

    4.2.1 Questions

    -
      -
    1. dplyr::filter

    2. -
    3. group_by(crop, country, y2k) is doing the splitting, on crop type, then country, and then year. summarize(...) is doing the apply using a mean. There is no combine line, as it is implicit.

    4. -
    5. Chunk 30 adds a filter for crop type (selecting out maize), and then simply groups on the y2k variable.

    6. -
    7. It doesn’t work when the output of the analysis is not tabular/a list, as with cor.test and lm. We can overcome this by 1) creating individual functions that reproduce the component outputs of the analysis (e.g. Chunk 33) and these as a list of functions using funs to summarise_all, 2) doing the splits outside of the pipeline (e.g. Chunk 36), or 3) using functions such as do and broom::tidy within the pipeline (e.g. Chunk 39).

    8. -
    -
    -
    -

    4.2.2 Code

    -
      -
    1. -
    -
    crop_ylds %>% filter(crop == "sorghum" & country == "ZAF" & year >= 2000)
    -crop_ylds[crop_ylds$crop == "sorghum" & crop_ylds$country == "ZAF" & 
    -            crop_ylds$year >= 2000, ]
    -
      -
    1. -
    -
    crop_ylds %>% filter(crop == "sorghum" & country == "ZAF" & year >= 2000) %>% 
    -  select(prod, harv_area, yield) %>% summarise_all(funs(mean, sd))
    -
      -
    1. -
    -
    crop_ylds %>% group_by(crop, country) %>% select(prod, harv_area) %>%
    -  summarise_all(funs(mean, sd))
    -
      -
    1. -
    -
    crop_ylds %>% filter(crop == "maize" & country == "ZMB") %>% 
    -  select(yield, harv_area) %>% cor()
    -
    -dat <- crop_ylds %>% filter(crop == "maize" & country == "ZMB")
    -cor.test(dat$harv_area, dat$yield)
    -
      -
    1. -
    -
    crop_ylds %>% filter(crop == "maize") %>% summarize(mu_yld = mean(yield))
    -# 2.07 t/ha
    -
      -
    1. -
    -

    South Africa shows larger yields gains (0.061 t/ha/yr versus 0.03 t/ha/yr)

    -
    summary(lm(yield ~ year, 
    -           data = crop_ylds %>% filter(crop == "maize" & country == "ZMB")))
    -summary(lm(yield ~ year, 
    -           data = crop_ylds %>% filter(crop == "maize" & country == "ZAF")))
    -
    -crop_ylds %>% filter(crop == "maize" & country == "ZMB") %>% 
    -  lm(yield ~ year, data = .) %>% summary() 
    -crop_ylds %>% filter(crop == "maize" & country == "ZAF") %>% 
    -  lm(yield ~ year, data = .) %>% summary()
    -
      -
    1. -
    -
    crop_ylds %>% filter(crop != "sorghum") %>% group_by(crop, country) %>% 
    -  do(prod_ha_lm = lm(yield ~ year, data = .)) %>% 
    -  broom::tidy(., prod_ha_lm)
    -
    -
    -
    -

    4.3 Practice 3: Visualization

    -
    -

    4.3.1 Questions

    -
      -
    1. ggplot2 is built from grid graphics, and is based on an underlying visualization philosophy. It builds up graphics objects using the + operator, and easily does splits within the data using the “color” argument within aes and/or the facet_grid function. graphics plots can be faster to implement for exploratory analysis, ggplot2 has more attractive, presentation-grade defaults.

    2. -
    3. Because the syntax used in graphics plots is used in many of the plotting functions developed for spatial packages, including newer ones such as sf and stars.

    4. -
    5. Each of the three plots takes the axis labels exactly as they are specified to the axis arguments (either as they were specified in the formula in the case of Chunk 40 or to the “x” and “y” arguments in Chunk 41). You can change the names using the “xlab” and “ylab” arguments.

    6. -
    7. Using “col”, “pch”, and “cex” arguments.

    8. -
    9. You have to add the . to the “data” argument of plot, e.g. dat %>% plot(y ~ x, data = .)

    10. -
    11. You get just an empty grey background–ggplot won’t plot anything without a geom_* function added to the ggplot object.

    12. -
    -
    -
    -

    4.3.2 Code

    -
      -
    1. -
    -
    crop_ylds %>% filter(crop == "sorghum") %>%
    -  ggplot() + geom_histogram(aes(x = yield), bins = 15) +
    -  ggtitle("Distribution of sorghum yields")
    -crop_ylds %>% filter(crop == "sorghum") %>%
    -  ggplot() + geom_histogram(aes(x = yield), bins = 15, fill = "red") +
    -  ggtitle("Distribution of sorghum yields")
    -
      -
    1. -
    -
    crop_ylds %>% filter(crop == "wheat" & country == "ZAF") %>% 
    -  plot(harv_area ~ year, data = ., pch = 16, col = "blue", 
    -       xlab = "", ylab = "Harvested area (ha)", 
    -       main = "South Africa wheat (1961-2017)")
    -crop_ylds %>% filter(crop == "wheat" & country == "ZAF") %>% 
    -  plot(harv_area ~ year, data = ., pch = 16, type = "l", col = "blue", 
    -       xlab = "", ylab = "Harvested area (ha)", 
    -       main = "South Africa wheat (1961-2017)")
    -
      -
    1. -
    -
    crop_ylds %>% filter(crop == "wheat" & country == "ZAF") %>% 
    -  ggplot() + geom_point(aes(year, harv_area), col = "blue") + 
    -  xlab("") + ylab("Harvested area (ha)") +  
    -  ggtitle("South Africa wheat (1961-2017)")
    -crop_ylds %>% filter(crop == "wheat" & country == "ZAF") %>% 
    -  ggplot() + geom_line(aes(year, harv_area), col = "blue") + 
    -  xlab("") + ylab("Harvested area (ha)") +  
    -  ggtitle("South Africa wheat (1961-2017)")
    -
      -
    1. -
    -
    crop_ylds %>% filter(crop == "wheat") %>% 
    -  ggplot() + geom_line(aes(year, harv_area, color = country)) +
    -  scale_color_manual(values = c("red", "blue")) + 
    -  xlab("") + ylab("Harvested area (ha)") +  
    -  ggtitle("Wheat (1961-2017)")
    -
    -# extra
    -crop_ylds %>% filter(crop == "wheat") %>% 
    -  ggplot() + geom_line(aes(year, log10(harv_area), color = country)) +
    -  scale_color_manual(values = c("red", "blue")) + 
    -  xlab("") + ylab("Harvested area (ha)") +  
    -  ggtitle("Wheat (1961-2017)")
    -
      -
    1. -
    -
    crop_ylds %>% filter(crop == "wheat" & country == "ZAF") %>% 
    -  ggplot() + geom_point(aes(year, harv_area)) +
    -  geom_smooth(aes(year, harv_area)) +
    -  xlab("") + ylab("Harvested area (ha)") +  
    -  ggtitle("South African wheat (1961-2017)")
    -
      -
    1. -
    -
    # ggplot2
    -crop_ylds %>% filter(crop == "wheat" & country == "ZMB") %>% 
    -  ggplot() + 
    -  geom_histogram(aes(x = harv_area), bins = 10, col = "black", fill = "blue") +
    -  xlab("Harvested area (ha)") + ggtitle("Zambian Wheat (1961-2017)")
    -
    -# hist
    -# with dplyr
    -crop_ylds %>% filter(crop == "wheat" & country == "ZMB") %>% pull(harv_area) %>% 
    -  hist(., main = "Zambian Wheat (1961-2017)", xlab = "Harvested area (ha)", 
    -       col = "blue")
    -# with base subsetting
    -hist(crop_ylds$harv_area[crop_ylds$crop == "wheat" & 
    -                           crop_ylds$country == "ZMB"], 
    -     main = "Zambian Wheat (1961-2017)", xlab = "Harvested area (ha)", 
    -     col = "blue")
    -
      -
    1. -
    -
    crop_ylds %>% filter(country == "ZAF") %>% 
    -  ggplot() + geom_point(aes(x = year, y = harv_area)) +
    -  geom_smooth(aes(x = year, y = harv_area)) + 
    -  facet_grid(cols = vars(crop)) +
    -  scale_color_manual(values = c("red", "blue")) + 
    -  ylab("Yield (tons/ha)") + xlab("")
    -
    -
    -Back to home -
    -
    -
    -
    -
    -
    - - - - - - - - - - - - - - - diff --git a/docs/unit2-practice-answers.html b/docs/unit2-practice-answers.html deleted file mode 100644 index 35d26a8..0000000 --- a/docs/unit2-practice-answers.html +++ /dev/null @@ -1,773 +0,0 @@ - - - - - - - - - - Unit 2 Practice answers - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - - - -

    Unit 2 Practice answers

    -

    GEOG246-346

    - - - - - - - - - -
    -
    -

    1 Module 1 practice answers

    -
    -

    1.1 Practice 1

    -
    -

    1.1.1 Questions

    -
      -
    1. Answers here.

    2. -
    3. Assuming the tibble has x and y or lat/long coordinates, you apply the function st_as_sf with the “coords” argument set to specify which columns contain the x and y coordinates.

    4. -
    5. sf::plot by default plots one panel per variable. You can create a single panel by specifying the variable you want, or by using the st_geometry argument to strip out the geometry from object. It also prevent distortions that sometimes occur when overlaying subsequent features on top of a base map.

    6. -
    7. For a single point you provide an x and y coordinate, otherwise you give an input matrix containing x and y coordinates. A polygon requires that the last point pair in the matrix is the same as the first point pair, to close the polygon.

    8. -
    -
    -
    -

    1.1.2 Code

    -
      -
    1. -
    -
    farmers %>% st_geometry()
    -st_geometry(farmers)
    -
      -
    1. -
    -
    st_crs(farmers) <- st_crs(districts)
    -# p <- "path/to/your/project/notebooks/data"
    -p <- "~/Desktop"
    -st_write(farmers, dsn = file.path(p, "farmers.sqlite"))
    -rm(farmers)
    -st_read(file.path(p, "farmers.sqlite"))
    -
      -
    1. -
    -
    class(roads)
    -str(roads)
    -class(districts)
    -str(districts)
    -
      -
    1. -
    -
    plot(roads %>% st_geometry(), col = "blue")
    -
      -
    1. -
    -
    plot(districts %>% select(distName), main = "Zambia Districts")
    -
      -
    1. -
    -
    pts <- st_multipoint(x = cbind(x = c(27, 28, 29), y = c(-13, -14, -15)))
    -plot(districts %>% st_geometry(), col = "grey")
    -plot(pts, add = TRUE, col = "orange", pch = 16)
    -
    -
    -
    -

    1.2 Practice 2

    -
    -

    1.2.1 Questions

    -
      -
    1. At least with this example, pretty negligible–well less than 1% mean absolute error. It might matter more in other places and with other scales though. The reason st_area knows how to estimate areas is because it invokes lwgeom::st_geod_area, which calculates a geodetic surface area.

    2. -
    3. Because for the time being sf::plot is a fair bit faster. However, this recent twitter thread suggests that may change soon.

    4. -
    5. By using mutate with cut that has break values based on those properties. In our example, we found the breaks using quantile and different probabilities/percentile levels that creating tertiles of area.

    6. -
    -
    -
    -

    1.2.2 Code

    -
      -
    1. -
    -
    set.seed(1)
    -districts %>% sample_n(20) %>% st_area() %>% units::set_units("ha") %>% mean()
    -
      -
    1. -
    -
    set.seed(1)
    -roads %>% sample_n(100) %>% st_length() %>% units::set_units("km") %>% mean()
    -
      -
    1. -
    -
    plot(st_geometry(districts), col = "lightgrey")
    -set.seed(1)
    -farmers %>% filter(season == 2) %>% sample_n(200) %>% st_geometry() %>% 
    -  plot(col = "red", pch = 20, add = TRUE)
    -
      -
    1. -
    -
    districts %>% st_transform(st_crs(roads)) %>% st_geometry() %>% 
    -  plot(col = "lightgrey")
    -roads %>%  
    -  mutate(length = as.numeric(st_length(.) / 1000)) %>% 
    -  filter(length > 50 & length < 100) %>% st_geometry() %>% 
    -  plot(col = "red", pch = 20, add = TRUE)
    -
      -
    1. -
    -
    deciles <- function(x) quantile(x, probs = seq(0, 1, 0.1))
    -dist_deciles <- districts %>% mutate(area = as.numeric(st_area(.)) / 10^6) %>%
    -  mutate(acls = cut(area, breaks = deciles(area), include.lowest = TRUE)) %>% 
    -  group_by(acls) %>% summarize(sum_area = sum(area))  
    -dist_deciles
    -#
    -# #2
    -cols <- heat.colors(10)
    -par(mar = rep(0, 4))
    -plot(st_geometry(dist_deciles), col = cols)
    -legend(x = "bottomright", pch = 15, col = cols, bty = "n", 
    -       legend = paste0(1:10, c("st", "nd", "rd", rep("th", 7))))
    -
    -
    -

    1.2.3 Practice 3

    -
    -

    1.2.3.1 Questions

    -
      -
    1. It changes features from one type to another (e.g. POLYGON to MULTIPOLYGON), either one specified by the user or the simplest possible common feature, if left unspecified. Casting is sometimes necessary to avoid mixed feature types that cause failures for subsequent operations.

    2. -
    3. st_union runs under the hood of summarise.sf, so a summarize operation on an sf will result in a merged/dissolved set of spatial features.

    4. -
    5. It affects the order of the fields in the resulting unioned sf object–the fields from the object passed to the “x” argument appear first.

    6. -
    -
    -
    -

    1.2.3.2 Code

    -
      -
    1. -
    -
    coords <- cbind("x" = c(27, 27.5, 27.5, 27, 27), 
    -                "y" = c(-13, -13, -13.5, -13.5, -13))
    -pol2 <- st_polygon(x = list(coords)) %>% st_sfc %>% st_sf(ID = 1, crs = 4326)
    -
    -par(mar = rep(0, 4))
    -plot(st_geometry(districts), col = "grey")
    -plot(pol2, col = "blue", add = TRUE)
    -
      -
    1. -
    -
    pol2_int_dists <- st_intersection(pol2, districts)
    -districts[1] %>% plot(col = "grey", main = NULL)
    -pol2_int_dists[2] %>% plot(col = rainbow(nrow(pol2_int_dists)), add = TRUE)
    -
      -
    1. -
    -

    pol2 is nearly 26 hectares larger

    -
    pol2_int_dists %>% st_area() %>% as.numeric() %>% sum() / 10000 -
    -  pol2 %>% st_area() %>% as.numeric() %>% sum() / 10000
    -
      -
    1. -
    -
    st_difference(districts, pol2)[2] %>% plot(col = "grey", main = NULL)
    -
      -
    1. -
    -
    # Try compare the
    -set.seed(1)
    -farmers_alb %>% filter(season == 2) %>%
    -  sample_n(size = 5) %>% st_buffer(dist = 30000) %>% st_geometry %>% plot()
    -
    -# With 
    -set.seed(1)
    -farmers_alb %>% filter(season == 2) %>%
    -  st_sample(size = 5) %>% st_buffer(dist = 30000) %>% st_geometry %>% plot()
    -
      -
    1. -
    -
    roads_gt400 <- roads %>% filter(as.numeric(st_length(.)) / 1000 > 400)
    -par(mar = rep(0, 4))
    -st_transform(districts, st_crs(roads)) %>% st_geometry %>% 
    -  plot(col = "grey", main = NULL)
    -roads_gt400 %>% st_buffer(25000) %>% plot(col = "green", add = TRUE)
    -
      -
    1. -
    -
    par(mar = rep(0, 4))
    -st_transform(districts, st_crs(roads)) %>% st_geometry %>% 
    -  plot(col = "grey", main = NULL)
    -roads_gt400 %>% st_buffer(25000) %>% plot(col = "tan", add = TRUE)
    -set.seed(1)
    -roads_gt400 %>% st_buffer(25000) %>% 
    -  st_sample(size = rep(20, nrow(.)), exact = TRUE) %>% 
    -  plot(col = "red", pch = 20, add = TRUE)
    -
    -
    -
    -
    -
    -

    2 Module 2 practice answers

    -
    -

    2.1 Practice 1

    -
    -

    2.1.1 Questions

    -
      -
    1. raster uses the S4 object-oriented system, where sf uses S4. Slots in S4 objects are accessed using the @ operator.

    2. -
    3. brick, because raster only allows you to read and write a single layer.

    4. -
    5. stack organizes multiple rasters that might be stored in separation locations into a single multi-layer object, whereas brick requires a single file on disk. bricks take longer to create, but are faster to work with once they exist.

    6. -
    7. The output of raster’s vectorization functions are sp objects, so you have to convert them to sf objects using st_as_sf.

    8. -
    -
    -
    -

    2.1.2 Code

    -
      -
    1. -
    -
    # recreate r, r2, r3
    -e <- extent(c("xmin" = 27, "xmax" = 29, "ymin" = -16, "ymax" = -14))  
    -r <- raster(x = e, res = 0.25, crs = crs(districts))
    -set.seed(1)  
    -values(r) <- sample(1:100, size = ncell(r), replace = TRUE)  # 3
    -r2 <- r > 50
    -r3 <- r
    -set.seed(1)
    -values(r3) <- rnorm(n = ncell(r3), mean = 10, sd = 2)
    -
    -# 
    -r4 <- r3
    -set.seed(1)
    -values(r4) <- runif(n = ncell(r4), 0, 1)
    -r5 <- r4 > 0.5
    -
    -s2 <- list(r, r2, r3, r4, r5) %>% stack
    -names(s2) <- c("r", "r2", "r3", "r4", "r5")
    -plot(s2)
    -
      -
    1. We use tempdir() here, but you should use your notebooks/data folder.
    2. -
    -
    b <- brick(s2, file = file.path(tempdir(), "b2.tif"))
    -
      -
    1. -
    -
    zamr3 <- raster(x = extent(districts), crs = crs(districts), res = 0.2)
    -values(zamr3) <- 1:ncell(zamr3)
    -
    -farmersr <- farmers %>% distinct(uuid, .keep_all = TRUE) %>% select(x, y) %>% 
    -  mutate(count = 1) %>% st_as_sf(coords = c("x", "y")) %>% 
    -  rasterize(x = ., y = zamr3, field = "count", fun = sum)
    -
    -par(mar = c(0, 0, 1, 4))
    -districts %>% st_union %>% plot(col = "grey", border = "grey")
    -plot(farmersr, add = TRUE)
    -
      -
    1. -
    -
    zamr_alb <- projectRaster(from = zamr, res = 20000, crs = crs(roads), 
    -                          method = "ngb")
    -farmersr_alb <- projectRaster(from = farmersr, to = zamr_alb, 
    -                              method = "bilinear")
    -
    -par(mar = c(0, 0, 1, 4), mfrow = c(1, 2))
    -districts %>% st_union %>% plot(col = "grey", border = "grey")
    -plot(farmersr, add = TRUE)
    -districts %>% st_transform(crs(roads)) %>% st_union %>% 
    -  plot(col = "grey", border = "grey")
    -plot(farmersr_alb, add = TRUE)
    -
      -
    1. -
    -
    par(mar = c(0, 0, 0, 0))
    -farmersr %>% rasterToPolygons(dissolve = TRUE) %>% st_as_sf %>% 
    -  plot(main = NULL)
    -
    -
    -
    -

    2.2 Practice 2

    -
    -

    2.2.1 Questions

    -
      -
    1. cellStats provides summary statistics over an entire layer; zonal calculates statistics within pre-defined zones; focal calculates statistics within a moving window.

    2. -
    3. Use method = bilinear.

    4. -
    5. You don’t. You have to resample them to a common resolution and extent first. And then you have to stack them.

    6. -
    -
    -
    -

    2.2.2 Code

    -
      -
    1. -
    -
    as.Date("10-11-2017", "%m-%d-%Y")
    -as.Date("10-11-17", "%m-%d-%y")
    -as.Date("101117", "%m%d%y")
    -as.Date("10112017", "%m%d%Y")
    -lubridate::mdy("10-11-2017")
    -lubridate::as_date("20171011")
    -
      -
    1. -
    -
    farmersr2 <- farmers %>% distinct(uuid, .keep_all = TRUE) %>% 
    -  mutate(count = 1) %>% select(x, y, count) %>% 
    -  st_as_sf(coords = c("x", "y")) %>% 
    -  rasterize(., distsr, field = "count", fun = sum)
    -
      -
    1. -
    -
    zonal(farmersr2, distsr, fun = sum) %>% data.frame %>% 
    -  subs(distsr, .) %>% plot_noaxes
    -
      -
    1. -
    -
    wmat3 <- matrix(1, nrow = 3, ncol = 3) 
    -wmat5 <- matrix(1, nrow = 5, ncol = 5) 
    -fstack <- list(sd3 = focal(x = chirpsz[[20]], w = wmat3, fun = sd), 
    -               sd5 = focal(x = chirpsz[[20]], w = wmat5, fun = sd),
    -               max3 = focal(x = chirpsz[[20]], w = wmat3, fun = max), 
    -               max5 = focal(x = chirpsz[[20]], w = wmat5, fun = max)) %>% stack
    -fstack %>% plot_noaxes
    -
      -
    1. -
    -
    chirps_d57 <- chirpsz[[1]] %>% crop(., extent(districts[57, ]))
    -s <- list(d1 = disaggregate(chirps_d57, fact = 5), 
    -          d2 = disaggregate(chirps_d57, fact = 5, method = "bilinear")) %>% 
    -  stack
    -plot_noaxes(s)
    -
      -
    1. -
    -
    s <- lapply(list(mean, cv, median), function(x) {
    -  calc(chirpsz, fun = x)  
    -}) %>% stack
    -names(s) <- c("Mean", "CV", "Median")
    -plot_noaxes(s, nr = 1)
    -
    -
    -
    -

    2.3 Practice 3

    -
    -

    2.3.1 Questions

    -
      -
    1. Using cut with vector of breakpoints (e.g. quantiles), or use reclassify with a reclassification matrix.

    2. -
    3. sampleRandom, sampleStratified. There is also sampleRegular.

    4. -
    -
    -
    -

    2.3.2 Code

    -
      -
    1. -
    -
    chirps_sd <- calc(chirpsz, fun = sd)
    -
      -
    1. -
    -
    (chirps_sd < cellStats(chirps_sd, mean)) %>% plot
    -
      -
    1. -
    -
    quantile(raintot, probs = seq(0, 1, 0.2)) %>% cut(raintot, .) %>% 
    -  plot_noaxes
    -
      -
    1. -
    -
    set.seed(11)
    -randdistsr <- districts %>% sample_n(size = 15) %>% rasterize(., raintot)
    -plot_noaxes(randdistsr)
    -
    -newrandrain <- mask(raintot, randdistsr)
    -newrandrain %>% plot_noaxes
    -
      -
    1. -
    -
    set.seed(1)
    -randsamp <- sampleRandom(x = newrandrain, size = 300)
    -
    -set.seed(1)
    -ind <- sampleStratified(x = randdistsr, size = 300 / 15, cells = TRUE)
    -stratsamp <- newrandrain[ind[, 1]]
    -
    -rand_rain_stats <- bind_rows(
    -  tibble(rain = randsamp, dat = "Simple"),
    -  tibble(rain = stratsamp, dat = "Stratified"),
    -) %>% drop_na
    -
    -bp_theme <- theme(legend.title = element_blank(), axis.text.x = element_blank(),
    -                  axis.ticks.x = element_blank(), 
    -                  panel.grid.major.x = element_blank(), 
    -                  panel.grid.minor.x = element_blank(), 
    -                  panel.background = element_rect(fill = "grey95"))
    -
    -rand_rain_stats %>% ggplot() +
    -  geom_boxplot(mapping = aes(y = rain, fill = dat), position = "dodge2") +
    -  scale_fill_manual(values = c("lightblue", "steelblue")) + 
    -  ggtitle("Rainfall distributions") + xlab(NULL) + ylab("mm") + bp_theme
    -
    -
    -
    -

    2.4 Practice 4

    -
    -

    2.4.1 Questions

    -
      -
    1. The area function is your friend for this.

    2. -
    3. Use expression together combined with paste as needed for more complex labels.

    4. -
    5. Make names(predstack) matches the predictor names used by the model.

    6. -
    -
    -
    -

    2.4.2 Code

    -
      -
    1. -
    -
    demalb42 <- districts %>% filter(ID == 42) %>% st_transform(st_crs(roads)) %>% 
    -  crop(demalb, .)
    -vars <- c("slope", "aspect", "flowdir", "tri")
    -terrvars <- stack(lapply(1:length(vars), function(x) {
    -  tv <- terrain(x = demalb42, opt = vars[x], unit = "degrees")
    -}))
    -names(terrvars) <- vars
    -
    -plot_noaxes(terrvars)
    -
      -
    1. -
    -
    library(gstat)
    -
    -# #1
    -raintotalb <- projectRaster(from = raintot, res = 5000, crs = crs(roads))
    -names(raintotalb) <- "rain"
    -r <- raster(extent(raintotalb), res = res(raintotalb), crs = crs(raintotalb),             vals = 1)
    -
    -# lapply approach to interpolation
    -idw_list <- lapply(c(250, 500, 1000), function(x) {
    -  set.seed(1)
    -  rainsamp <- sampleRandom(raintotalb, size = 1000, xy = TRUE)
    -  rainsamp <- as.data.frame(rainsamp)
    -  invdist <- gstat(id = "rain", formula = rain ~ 1, locations = ~x + y, 
    -                   data = rainsamp)
    -  invdistr <- interpolate(object = r, model = invdist)
    -  invdistrmsk <- mask(x = invdistr, mask = raintotalb)
    -})
    -
    -idws <- stack(c(raintotalb, idw_list))
    -
    -titles <- c("CHIRPS rainfall", "1000 pt IDW", "500 pt IDW", "250 pt IDW")
    -plot_noaxes(idws, main = titles, zlim = c(0, 150))
    -
      -
    1. -
    -
    districts %>% filter(ID %in% seq(15, 50, 5)) %>% 
    -  st_transform(st_crs(roads)) %>% st_geometry %>% st_centroid %>% 
    -  as_Spatial(.) %>% 
    -  distanceFromPoints(object = raintotalb, xy = .) %>% 
    -  mask(., raintotalb) %>% plot_noaxes
    -
      -
    1. -
    -

    Pretty close to results of highest density sample. Slightly higher mean error.

    -
    # #1
    -data(zamprec)
    -zamprecalb <- projectRaster(from = zamprec, to = raintotalb)
    -names(zamprecalb) <- "rain"
    -elev <- resample(aggregate(x = demalb, fact = 5), y = raintotalb)
    -
    -# #2
    -set.seed(1)
    -pts <- sampleRandom(x = zamprecalb, size = 25, sp = TRUE) %>% st_as_sf
    -pts <- pts %>% mutate(elev = raster::extract(x = elev, y = .)) 
    -pts_dat <- bind_cols(pts %>% data.frame %>% select(-geometry) %>% as_tibble, 
    -                     st_coordinates(pts) %>% as_tibble) %>% drop_na
    -  
    -# #3
    -p1 <- ggplot(pts_dat) + geom_point(aes(X, rain), col = "steelblue") +
    -  ylab("Rainfall (mm)")
    -p2 <- ggplot(pts_dat) + geom_point(aes(Y, rain), col = "blue2") + ylab("")
    -p3 <- ggplot(pts_dat) + geom_point(aes(elev, rain), col = "darkblue") + ylab("")
    -cowplot::plot_grid(p1, p2, p3, nrow = 1)
    -
    -# #4
    -rain_lm <- lm(rain ~ X + Y + elev, data = pts_dat)
    -summary(rain_lm)
    -
    -# #5
    -xs <- xFromCell(object = raintotalb, cell = 1:ncell(raintotalb))
    -ys <- yFromCell(object = raintotalb, cell = 1:ncell(raintotalb))
    -X <- Y <- raintotalb
    -values(X) <- xs
    -values(Y) <- ys
    -
    -# #6
    -predst <- stack(X, Y, elev)
    -names(predst) <- c("X", "Y", "elev")
    -predrainr <- predict(object = predst, model = rain_lm)
    -
    -# #7
    -s <- stack(zamprecalb, predrainr, (predrainr - zamprecalb) / zamprecalb * 100)
    -mae <- round(cellStats(abs(zamprecalb - predrainr), mean), 1)  
    -
    -pnames <- c("'Observed' Rainfall", "Predicted Rainfall", "% Difference")
    -par(mfrow = c(1, 3), mar = c(0, 0, 1, 4))
    -for(i in 1:3) {
    -  plot_noaxes(s[[i]], main = pnames[i])
    -  if(i %in% 1:2) {
    -    pts %>% st_geometry %>% 
    -      plot(pch = 20, cex = 0.2, col = "grey70", add = TRUE)
    -  } else {
    -    mtext(side = 1, line = -3, cex = 0.8, 
    -          text = paste("Mean abs err =", mae, "mm"))
    -  }
    -}
    -
    -
    -Back to home -
    -
    -
    -
    -
    -
    - - - - - - - - - - - - - - -