From 10138ce3a4fde38632b0d7ed613c2b7ce033daa8 Mon Sep 17 00:00:00 2001 From: ldemaz Date: Wed, 10 Apr 2024 15:01:50 -0400 Subject: [PATCH] Class 22 slides updated, Class 23 slides added with working terra parallelization example. Note some code is hidden in html, needs to be viewed in Rmd --- docs/class22.Rmd | 27 +++-- docs/class22.html | 26 ++-- docs/class23.Rmd | 219 ++++++++++++++++++++++++++++++++++ docs/class23.html | 297 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 548 insertions(+), 21 deletions(-) create mode 100644 docs/class23.Rmd create mode 100644 docs/class23.html diff --git a/docs/class22.Rmd b/docs/class22.Rmd index 6e4f845..980cd14 100755 --- a/docs/class22.Rmd +++ b/docs/class22.Rmd @@ -1,6 +1,6 @@ --- title: "Geospatial Analysis with R" -subtitle: Class 21 +subtitle: Class 22 output: xaringan::moon_reader: lib_dir: libs @@ -54,8 +54,8 @@ test <- subset(dset, samp == FALSE) set.seed(123) mod <- randomForest(crop ~ ., data = train, ntree = 1000, importance = TRUE) -mod -summary(mod) +sum(diag(mod$confusion[1:4, 1:4])) / sum(mod$confusion[1:4, 1:4]) +summary(mod) # Evaluation pred <- predict(mod, newdata = test %>% dplyr::select(-c(crop))) @@ -90,14 +90,14 @@ p --- ```{r, eval=FALSE, echo=FALSE} -pred_stack <- "../external/data/rststack_1016-1063.tif" +pred_stack <- here::here("external/data/rststack_1016-1063.tif") r <- terra::rast(pred_stack) r <- terra::crop(r, terra::ext(r) * 0.2) -writeRaster(r, "external/data/rststack_1016-1063_crop.tif") +writeRaster(r, here::here("external/data/rststack_1016-1063_crop.tif")) ``` -# Map +## Map Look at Rmd file to see code use to make prediction map for crop types. ```{r, eval=FALSE, echo=FALSE} @@ -173,7 +173,7 @@ dev.off() ```{r, echo=FALSE, out.width="60%", fig.align='center'} knitr::include_graphics("figures/crop_probs.png") ``` - +--- ## Advanced - parallelization ```{r, eval=FALSE} library(parallel) @@ -182,8 +182,8 @@ library(parallel) cl <- makeCluster(4) clusterEvalQ(cl, library(raster)) system.time(b <- parLapply(cl, 1:2000, function(x) { # x <- 1 - r <- raster(extent(30, 31, -14, -13), res = 0.01, - crs = "+proj=longlat +datum=WGS84") + r <- raster::raster(extent(30, 31, -14, -13), res = 0.01, + crs = "+proj=longlat +datum=WGS84") set.seed(x) values(r) <- sample(1:10, size = ncell(r), replace = TRUE) stack(r, r * runif(n = ncell(r), 0.9, 1.2), r * runif(n = ncell(r), 0.8, 1.3)) @@ -191,7 +191,9 @@ system.time(b <- parLapply(cl, 1:2000, function(x) { # x <- 1 stopCluster(cl) # *nix only -system.time(bmc <- mclapply(1:1000, function(x) { +library(terra) +system.time(bmc <- mclapply(1:2000, function(x) { + r <- raster(extent(30, 31, -14, -13), res = 0.01, crs = "+proj=longlat +datum=WGS84") set.seed(x) @@ -220,7 +222,9 @@ system.time(b2 <- lapply(1:2000, function(x) { ```{r, eval = FALSE} library(mapview) mapview(districts) -viewRGB(x = chirpsz[[1:3]]) +chirps <- terra::rast(system.file("extdata/chirps.tif", package = "geospaar")) + +viewRGB(x = terra::as.raster(chirps[[1:3]])) # mapView(districts, alpha.regions = 1, legend = FALSE) mapView(districts, alpha.regions = 0, legend = FALSE) + mapview(raintot) @@ -244,6 +248,7 @@ tm_shape(raintot) + tm_raster(palette = "magma", breaks = seq(0, 200, 25)) + ### leaflet ```{r, eval = FALSE} + library(leaflet) m <- leaflet() %>% addTiles() # # m # a map with the default OSM tile layer diff --git a/docs/class22.html b/docs/class22.html index 6b33a20..03aaea1 100644 --- a/docs/class22.html +++ b/docs/class22.html @@ -3,7 +3,7 @@ Geospatial Analysis with R - + @@ -17,7 +17,7 @@ # Geospatial Analysis with R ] .subtitle[ -## Class 21 +## Class 22 ] --- @@ -68,8 +68,8 @@ set.seed(123) mod <- randomForest(crop ~ ., data = train, ntree = 1000, importance = TRUE) -mod -summary(mod) +sum(diag(mod$confusion[1:4, 1:4])) / sum(mod$confusion[1:4, 1:4]) +summary(mod) # Evaluation pred <- predict(mod, newdata = test %>% dplyr::select(-c(crop))) @@ -107,13 +107,13 @@ -# Map +## Map Look at Rmd file to see code use to make prediction map for crop types. <img src="figures/crop_probs.png" width="60%" style="display: block; margin: auto;" /> - +--- ## Advanced - parallelization ```r @@ -123,8 +123,8 @@ cl <- makeCluster(4) clusterEvalQ(cl, library(raster)) system.time(b <- parLapply(cl, 1:2000, function(x) { # x <- 1 - r <- raster(extent(30, 31, -14, -13), res = 0.01, - crs = "+proj=longlat +datum=WGS84") + r <- raster::raster(extent(30, 31, -14, -13), res = 0.01, + crs = "+proj=longlat +datum=WGS84") set.seed(x) values(r) <- sample(1:10, size = ncell(r), replace = TRUE) stack(r, r * runif(n = ncell(r), 0.9, 1.2), r * runif(n = ncell(r), 0.8, 1.3)) @@ -132,13 +132,16 @@ stopCluster(cl) # *nix only -system.time(bmc <- mclapply(1:1000, function(x) { +library(terra) +system.time(bmc <- mclapply(1:2000, function(x) { + r <- raster(extent(30, 31, -14, -13), res = 0.01, crs = "+proj=longlat +datum=WGS84") set.seed(x) values(r) <- sample(1:10, size = ncell(r), replace = TRUE) stack(r, r * runif(n = ncell(r), 0.9, 1.2), r * runif(n = ncell(r), 0.8, 1.3)) }, mc.cores = 4)) + ``` --- @@ -162,7 +165,9 @@ ```r library(mapview) mapview(districts) -viewRGB(x = chirpsz[[1:3]]) +chirps <- terra::rast(system.file("extdata/chirps.tif", package = "geospaar")) + +viewRGB(x = terra::as.raster(chirps[[1:3]])) # mapView(districts, alpha.regions = 1, legend = FALSE) mapView(districts, alpha.regions = 0, legend = FALSE) + mapview(raintot) @@ -188,6 +193,7 @@ ### leaflet ```r + library(leaflet) m <- leaflet() %>% addTiles() # # m # a map with the default OSM tile layer diff --git a/docs/class23.Rmd b/docs/class23.Rmd new file mode 100644 index 0000000..eb3f50b --- /dev/null +++ b/docs/class23.Rmd @@ -0,0 +1,219 @@ +--- +title: "Geospatial Analysis with R" +subtitle: Class 23 +output: + xaringan::moon_reader: + lib_dir: libs + css: ["default", "lucy", "middlebury-fonts", "themes/class18.css"] + nature: + highlightStyle: github + highlightLines: true + countIncrementalSlides: false +--- + +## Today + +- Modeling review (random forest) +- Project update + + +--- +## Modelling demo +### Random Forests +```{r, eval=FALSE} +# Train model +library(caTools) +library(randomForest) +library(geospaar) + +# read in train set +train_ref <- system.file("extdata/train_reference.csv", + package = "geospaar") %>% + read_csv() %>% mutate(crop = factor(case_when( + class == "Maize" ~ 1, + class == "Rice" ~ 2, + class == "Other" ~ 3, + class == "noncrop" ~ 4 +))) %>% dplyr::select(id, class, crop, !!names(.)) +``` + +--- +### Model + +```{r, eval=FALSE} +# prepare dataset +dset <- train_ref %>% filter(type == "mean") %>% + select(-id, -class, -type, -seed) +set.seed(10) +samp <- sample.split(dset$crop, SplitRatio = 0.8) +train <- subset(dset, samp == TRUE) +test <- subset(dset, samp == FALSE) +# train %>% tidyr::drop_na() + +# fit model +set.seed(123) +mod <- randomForest(crop ~ ., data = train, ntree = 1000, + importance = TRUE) +sum(diag(mod$confusion[1:4, 1:4])) / sum(mod$confusion[1:4, 1:4]) +summary(mod) + +# Evaluation +pred <- predict(mod, newdata = test %>% dplyr::select(-c(crop))) +cm <- table(test$crop, pred) +# sum(diag(cm)) / sum(cm) + +rf_results <- list("train" = train, "test" = test, "mod" = mod, "pred" = pred, + "error_mat" = cm) +``` + +--- +```{r, eval=FALSE} +imptab <- mod$importance +nms <- rownames(imptab) +imp_tbl <- as_tibble(imptab) %>% + mutate(Variable = nms) %>% + dplyr::select(Variable, MeanDecreaseAccuracy) %>% + arrange(MeanDecreaseAccuracy) %>% + mutate(order = 1:n()) %>% + mutate(Variable = gsub("\\.", "_", Variable)) + +p <- ggplot(imp_tbl) + + geom_point(aes(MeanDecreaseAccuracy, factor(order))) + + scale_y_discrete(labels = toupper(imp_tbl$Variable)) + + ylab("Variable") + xlab("Mean Decrease in Accuracy") + + theme_linedraw() + + theme(axis.text = element_text(size = 7), + axis.title = element_text(size = 7)) +p +``` + +--- + +```{r, eval=FALSE, echo=FALSE} +pred_stack <- here::here("external/data/rststack_1016-1063.tif") +r <- terra::rast(pred_stack) +r <- terra::crop(r, terra::ext(r) * 0.2) +writeRaster(r, here::here("external/data/rststack_1016-1063_crop.tif")) +``` + + +## Map + +Look at Rmd file to see code use to make prediction map for crop types. +```{r, eval=FALSE, echo=FALSE} +r <- terra::rast("external/data/rststack_1016-1063_crop.tif") + +# pred_stack +predvars <- rownames(mod$importance) +# pred_stack <- "external/data/rststack_1016-1063.tif" +rtemp <- r[[1]] +v <- values(r) +v_tb <- as_tibble(v) %>% + mutate( + ndvi_1 = (B8_1 - B4_1) / (B8_1 + B4_1), + ndvi_2 = (B8_2 - B4_2) / (B8_2 + B4_2), + rg1_ndvi_1 = (B8_1 - B5_1) / (B8_1 + B5_1), + rg1_ndvi_2 = (B8_2 - B5_2) / (B8_2 + B5_2), + rg2_ndvi_1 = (B8_1 - B6_1) / (B8_1 + B6_1), + rg2_ndvi_2 = (B8_2 - B6_2) / (B8_2 + B6_2), + + gcvi_1 = (B8_1 / (B3_1 + 0.00001)) - 1, + gcvi_2 = (B8_2 / (B3_2 + 0.00001)) - 1, + rg1_gcvi_1 = (B8_1 / (B5_1 + 0.00001)) - 1, + rg1_gcvi_2 = (B8_2 / (B5_2 + 0.00001)) - 1, + rg2_gcvi_1 = (B8_1 / (B6_1 + 0.00001)) - 1, + rg2_gcvi_2 = (B8_2 / (B6_2 + 0.00001)) - 1, + + mtci_1 = (B8_1 - B5_1) / (B5_1 - B4_1 + 0.00001), + mtci_2 = (B8_2 - B5_2) / (B5_2 - B4_2 + 0.00001), + mtci2_1 = (B6_1 - B5_1) / (B5_1 - B4_1 + 0.00001), + mtci2_2 = (B6_2 - B5_2) / (B5_2 - B4_2 + 0.00001), + + reip_1 = 700 + 40 * ((B4_1 + B7_1) / 2 - B5_1) / (B7_1 - B5_1), + reip_2 = 700 + 40 * ((B4_2 + B7_2) / 2 - B5_2) / (B7_2 - B5_2), + + nbr1_1 = (B8_1 - B11_1) / (B8_1 + B11_1), + nbr1_2 = (B8_2 - B11_2) / (B8_2 + B11_2), + nbr2_1 = (B8_1 - B12_1) / (B8_1 + B12_1), + nbr2_2 = (B8_2 - B12_2) / (B8_2 + B12_2), + + ndti_1 = (B11_1 - B12_1) / (B11_1 + B12_1), + ndti_2 = (B11_2 - B12_2) / (B11_2 + B12_2), + + crc_1 = (B11_1 - B3_1) / (B11_1 + B3_1), + crc_2 = (B11_2 - B3_2) / (B11_2 + B3_2), + + sti_1 = B11_1 / (B12_1 + 0.00001), + sti_2 = B11_2 / (B12_2 + 0.00001) + ) %>% dplyr::select(all_of(predvars)) + +# clean up NA and Inf +v_tb <- v_tb %>% + mutate_all(function(x) ifelse(is.infinite(x), NA, x)) %>% + mutate_at(vars(predvars), zoo::na.aggregate) # replace NA with mean + +# predict probabilities of each class +pred <- predict(mod, v_tb, type = "prob") + +# create probability stack +pred_stack <- do.call(c, lapply(1:ncol(pred), function(y) { + predr <- rtemp + values(predr) <- pred[, y] + predr +})) +names(pred_stack) <- c("Maize", "Rice", "Other Crops", "Non crop") + +png(filename = here::here("docs/figures/crop_probs.png"), width = 4.5, + height = 4, units = "in", res = 300) +terra::plot(pred_stack, axes = FALSE, range = c(0, 1), plg = list(inset = -1)) +dev.off() + +``` + +```{r, echo=FALSE, out.width="60%", fig.align='center'} +knitr::include_graphics("figures/crop_probs.png") +``` +--- + +## Advanced - parallelization + +From last class, we looked at `raster` + + +```{r, eval=FALSE} +# raster equivalent +library(terra) +system.time(bmc <- mclapply(1:2000, function(x) { + + r <- raster(extent(30, 31, -14, -13), res = 0.01, + crs = "+proj=longlat +datum=WGS84") + set.seed(x) + values(r) <- sample(1:10, size = ncell(r), replace = TRUE) + stack(r, r * runif(n = ncell(r), 0.9, 1.2), r * runif(n = ncell(r), 0.8, 1.3)) +}, mc.cores = 4)) + +``` + +--- + +Here is a `terra` based approach. Note the use of `wrap` and `unwrap` to serialize the `spatRaster` object so it can be passed between cores. See [here](https://github.com/rspatial/terra/issues/36) for discussion. +```{r, eval=FALSE} +library(parallel) + +# terra +r <- rast(ext(30, 31, -14, -13), res = 0.01, + crs = "+proj=longlat +datum=WGS84") +values(r) <- 1:ncell(r) + +rp <- terra::wrap(r) +system.time(b <- mclapply(1:2000, function(x, r = rp) { + r <- unwrap(r) + values(r) <- sample(1:10, size = ncell(r), replace = TRUE) + set.seed(x) + r3 <- r2 <- r + values(r2) <- values(r) * runif(n = ncell(r), 0.9, 1.2) + values(r3) <- values(r) * runif(n = ncell(r), 0.8, 1.3) + wrap(c(r3, r2, r)) +}) %>% lapply(., unwrap)) +``` + diff --git a/docs/class23.html b/docs/class23.html new file mode 100644 index 0000000..5f69979 --- /dev/null +++ b/docs/class23.html @@ -0,0 +1,297 @@ + + + + Geospatial Analysis with R + + + + + + + + + + + + + + + + + +