vignette v1

baarthur · Oct 23, 2023 · d9d9026 · d9d9026
1 parent 187f924
commit d9d9026
Show file tree

Hide file tree

Showing 11 changed files with 230 additions and 17 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -3,3 +3,5 @@
 ^data-raw$
 ^LICENSE\.md$
 ^\.github$
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
@@ -53,3 +53,6 @@ rsconnect/
 
 # data sources
 data-raw/*
+inst/doc
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -28,3 +28,7 @@ Imports:
     units
 Depends: 
   R (>= 2.10)
+Suggests: 
+    knitr,
+    rmarkdown
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,9 +5,9 @@ export(coef_rqs)
 export(count_features)
 export(dist_nearest)
 export(geom_sf_rais)
+export(get_osm_postcodes)
 export(get_osm_rail)
 export(get_osm_roads)
-export(get_osm_zips)
 export(get_rais)
 export(gg_rqs)
 export(maphub_tidy_lines)

diff --git a/R/get_osm.R b/R/get_osm.R
@@ -4,10 +4,14 @@
 #' A convenient wrapper for osm features to get some features I need easily convertible to `sf` format.
 #'
 #' @param bbox A bounding box for the search area.
+#' @param use Should all geometries be kept? Defaults to all, but can also be one of
+#'  "points", "lines", "multilines", "multipolygons". Use this when sure about what geometry you
+#'  want to retrieve (e.g. when getting postcodes, you most surely want `use = "points"`).
 #'
 #' @import osmdata
 #'
-#' @returns A `list` for each kind of geometry retrieved by osmdata.
+#' @returns If `use = ("all")`, a `list` for each kind of geometry retrieved by `{osmdata}`. Else, a
+#'  `simple feature` dataframe for the selected geometry type instead of a list with all geometries.
 #'
 #' @seealso [osmdata::add_osm_feature()]
 
@@ -30,20 +34,24 @@
 #' get_osm_roads()
 #' }
 
-get_osm_roads <- function(bbox) {
+get_osm_roads <- function(bbox, use = c("all", "points", "lines", "multilines", "multipolygons")) {
   query <- opq(bbox, timeout = 50) %>%
     add_osm_feature(
       key = "highway",
       value = c("motorway", "primary", "secondary", "tertiary", "residential", "living_street", "pedestrian")
     ) %>%
     osmdata_sf()
+
+  if(use != "all") {
+    query <- query[[paste0("osm_",use)]]
+  }
+
   return(query)
 }
 
 
-
 #' @rdname get_osm_
-#' @name get_osm_zips
+#' @name get_osm_postcodes
 #' @export
 #'
 #' @details
@@ -55,17 +63,21 @@ get_osm_roads <- function(bbox) {
 #' # Get postcodes in Brazil's smallest city
 #' geobr::read_municipality(code_muni = 3157336) %>%
 #' sf::st_bbox() %>%
-#' get_osm_zips()
+#' get_osm_postcodes()
 #' }
 
 
-get_osm_zips <- function(bbox) {
+get_osm_postcodes <- function(bbox, use = c("all", "points", "lines", "multilines", "multipolygons")) {
   query <- opq(bbox, timeout = 50) %>%
     add_osm_feature(
       key = "addr:postcode"
     ) %>%
     osmdata_sf()
 
+  if(use != "all") {
+    query <- query[[paste0("osm_",use)]]
+  }
+
   return(query)
 }
 
@@ -89,7 +101,7 @@ get_osm_zips <- function(bbox) {
 #' }
 
 
-get_osm_rail <- function(bbox) {
+get_osm_rail <- function(bbox, use = c("all", "points", "lines", "multilines", "multipolygons")) {
   query <- opq(bbox, timeout = 50) %>%
     add_osm_feature(
       key = "railway",
@@ -98,5 +110,9 @@ get_osm_rail <- function(bbox) {
     ) %>%
     osmdata_sf()
 
+  if(use != "all") {
+    query <- query[[paste0("osm_",use)]]
+  }
+
   return(query)
 }
diff --git a/R/shp_extract_read.R b/R/shp_extract_read.R
@@ -12,11 +12,11 @@
 #'
 #' @returns A `sf` object
 
-shp_extract_read <- function(path, dsn, ...) {
+shp_extract_read <- function(path, dsn = "shp", ...) {
 
   temp <- tempfile()
   unzip(path, exdir = temp, junkpaths = TRUE)
-  listfiles <- list.files(temp, pattern = dsn, full.names = TRUE)
+  listfiles <- list.files(temp, pattern = paste0("\\.", dsn, "$"), full.names = TRUE)
 
   shp <- sf::st_read(dsn = listfiles, ...)
 

diff --git a/man/get_osm_.Rd b/man/get_osm_.Rd
diff --git a/man/shp_extract_read.Rd b/man/shp_extract_read.Rd
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
@@ -0,0 +1,4 @@
+*.html
+*.R
+
+data/
diff --git a/vignettes/spatialops.Rmd b/vignettes/spatialops.Rmd
@@ -0,0 +1,170 @@
+---
+title: "An introduction to spatialops"
+output:   
+  rmarkdown::html_vignette:
+    df_print: kable
+vignette: >
+  %\VignetteIndexEntry{An introduction to spatialops}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+In this vignette, I demonstrate how to use the main features of the `{spatialops}` package. Basing on the sunny city of Fortaleza, we will first read a manually downloaded shapefiles from .zip folders. Then, we will use our `{osmdata}` wrappers to get info on rail stations and perform two kind of spatial operations: count metro stations per neighborhood and the distance from each park to the closest station. 
+
+**Setup**
+```{r, include=FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  #out.width = "400px",
+  #out.height = "300px",
+  #fig.retina = 2
+  out.width = "100%"
+)
+```
+
+```{r setup, message=FALSE}
+library(sf)
+library(dplyr)
+library(ggplot2)
+library(spatialops)
+```
+
+
+
+## Read shapefiles from a .zip folder
+
+> Tip: I highly recommend that you save your data in an organized way, such as [this         structure](https://martinctc.github.io/blog/rstudio-projects-and-working-directories-a-beginner's-guide/).
+
+From the official [Fortaleza Maps Portal](https://mapas.fortaleza.ce.gov.br/), I downloaded the neighborhoods and parks files. Since they are in the `Esri Shapefile` format, they come in `.zip` folders containing multiple files (`.dbf`, `.prj`, `.shp`, `.shx`), all of them necessary. To avoid creating a mess, we can use `shp_extract_read()`: Just specify the path and you're done! You can also give a `dsn` (data source name) extension. By default, `dsn = shp` but you can also use `dsn = ` `geojson`, `kml` and so on. Other `st_read()` arguments can also be passed. Here, we define the `options` argument to ensure the `latin1` encoding, otherwise this file won't work. 
+
+```{r shp-read}
+# load
+shp_bairr <- shp_extract_read(path = "data/Bairros_de_Fortaleza.zip", options = "ENCODING=latin1")
+shp_parks <- shp_extract_read(path = "data/Pracas_de_Fortaleza.zip", options = "ENCODING=latin1")
+
+# and a bit of tidying
+shp_bairr <- shp_bairr %>% 
+  select(id, name = Nome, area_ha = Área..ha., geometry)
+
+shp_parks <- shp_parks %>% 
+  select(id, name = nome, area_m2 = area.m2, geometry)
+```
+
+
+## Get OSM data
+
+### Rail stations
+
+Now, we'll use our `get_osm_rail()` wrapper to get all rail things from OSM already into a `simple feature`, by keeping only points since we don't want polygons or lines.
+
+```{r metrofor-scaffoldings1, eval=FALSE, include=FALSE}
+shp_bairr %>% 
+  st_bbox() %>% 
+  get_osm_rail(use = "points") %>% 
+  saveRDS("data/shp_metro.RDS")
+```
+
+```{r metrofor-scaffoldings2, include=FALSE}
+shp_metro <- readRDS("data/shp_metro.RDS")
+```
+
+```{r get-metrofor, eval=FALSE}
+shp_metro <- shp_bairr %>% 
+  st_bbox() %>% 
+  get_osm_rail(use = "points")
+
+head(shp_metro)
+```
+
+Most of this data is useless for us, so let's begin by `filter`ing only stations---this will reduce the number of rows from 2609 to 36---and then `select`ing relevant columns.
+```{r}
+shp_metro <- shp_metro %>% 
+  filter(public_transport == "station") %>%
+  select(osm_id, name, railway, start_date, station)
+```
+
+All set! Let's visualize:
+```{r shp-plot}
+ggplot() +
+  geom_sf(
+    data = shp_bairr, fill = "#f9f9f9"
+  ) + 
+  geom_sf(
+    data = shp_parks, aes(fill = "Park"), color = "#86d7bc"
+  ) +
+  geom_sf(
+    data = shp_metro, aes(color = "Metrofor station"), size = 0.5
+  ) +
+  scale_fill_manual(values = c("Park" = "#86d7bc")) +
+  scale_color_manual(values = c("Metrofor station" = "red")) +
+  labs(title = "Fortaleza: Parks and Metro") +
+  theme_void() +
+  theme(
+    legend.title = element_blank(),
+    legend.key.size = unit(0.5, "lines")
+  ) +
+  guides(color = guide_legend(override.aes = list(size = 1)))
+```
+
+
+
+
+## Count parks per neighborhood
+
+Parks and plazas are an important indicator of urban development, as they both attract households and companies, but are sometimes concentrated in historically affluent areas: In both cases, understanding how amenities are distributed along the space is key in most urban studies. Here, we demonstrate how `{spatialops}` makes it easy to count how many parks each neighborhood has.
+
+All we need to do is call `count_features(x,y)` and it will bind to `x` a column with the number of `y` features for each row. If `x` is a list of `POINT`s, we need to provide a `radius` to create a buffer and then count features inside it. There are two optional arguments: `name` is pretty obvious (the count variable name), whereas `predicate` lets you decide which spatial operation to perform. By default, `predicate = st_intersects` means that `count_features()` will count how many `y`s intersect with each `x`, but you can use `st_covers`, `st_contains_properly`, and so on.
+```{r, warning=FALSE}
+shp_bairr <- shp_bairr %>% 
+  count_features(shp_parks, name = "n_parks")
+
+shp_bairr %>% 
+  arrange(desc(n_parks)) %>% 
+  slice(1:10) %>% 
+  ggplot() +
+  geom_col(aes(y = reorder(name, n_parks), x = n_parks)) +
+  scale_y_discrete(labels = scales::label_wrap(10)) +
+  labs(title = "Parks and Rec: Fortaleza Edition", y = "Neighborhood", x = "Number of parks") +
+  theme_minimal()
+
+shp_bairr %>% 
+  ggplot() +
+  geom_sf(aes(fill = n_parks, color = n_parks)) +
+  geom_sf_label(
+    data = shp_bairr %>% arrange(desc(n_parks)) %>% slice(1:10),
+    aes(label = name),
+    size = 1.5, color = "black", label.padding = unit(0.1, "lines"), nudge_x = 0.01, nudge_y = 0.001
+  ) +
+  scale_fill_distiller(palette = "YlGnBu", direction = 1) +
+  scale_color_distiller(palette = "YlGnBu", direction = 1) +
+  labs(title = "Parks and Rec: Fortaleza Edition", fill = "Number of parks", color = "Number of parks") +
+  theme_void()
+```
+
+
+
+## Calculate distance from each park to the nearest station
+
+```{r}
+shp_parks <- shp_parks %>% 
+  dist_nearest(shp_metro, name = "dist_metro")
+
+shp_parks %>% 
+  ggplot() +
+  geom_histogram(aes(x = dist_metro/1000)) +
+  labs(title = "How close are parks to Metrofor stations?", x = "Distance (km)", y = "Number of parks") +
+  theme_minimal()
+```
+
+
+
+## Trinary sets
+
+[under construction]
+
+
+
+## Conclusion
+
+[under construction]
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    *.html
+    *.R
+    data/