From d9d90263b496b6169091351a066c9af7f8f41067 Mon Sep 17 00:00:00 2001 From: arthur-mac Date: Mon, 23 Oct 2023 16:07:33 -0300 Subject: [PATCH] vignette v1 --- .DS_Store | Bin 6148 -> 18436 bytes .Rbuildignore | 2 + .gitignore | 3 + DESCRIPTION | 4 + NAMESPACE | 2 +- R/get_osm.R | 30 +++++-- R/shp_extract_read.R | 4 +- man/get_osm_.Rd | 26 ++++-- man/shp_extract_read.Rd | 2 +- vignettes/.gitignore | 4 + vignettes/spatialops.Rmd | 170 +++++++++++++++++++++++++++++++++++++++ 11 files changed, 230 insertions(+), 17 deletions(-) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/spatialops.Rmd diff --git a/.DS_Store b/.DS_Store index 351c098ff07dc02efd1ea57db7220f5505e40f70..4d11b6f2ac28fca83df8d7b96f84f5cb5a208ccd 100644 GIT binary patch literal 18436 zcmeHOdvp}l8UKEh5N6#886d#2BxC{0%M~}g5`scVf;@r}NJ0`o!X`Vw9yYsicaxx$ zHrk#(MB8es_4H+1`?6Yl>``l5wWkkXeb^&KD%5KA54DeTDs7LaSWi#Cd++R$>@ER( zKoV#6%-p@-%$=G0&G+5!`~Bt)5uv6hR8CY)L^`jcG!3FkH<6Fa0GFvn>=$d+lE)J8 z>ftq?+Y==y8A|IYBii*jI*)P#~|)?R@fZLbWQLt)l@)1KApIu_%waC`i%X#&|Rmi|%HL za5B*5j=n^`7RgEk0Mc*Q(<)CR)RMNzt%4-68b{u6xH#DVgzs`IjtSwxVWD{l>#Z=J*Llu`#)& zH=GD}M6Bk%a45N_Etu%EqM>kf_g2baR&8+S3uf zx+k2xMxAQ9$LBKd)3=BCKCXy`M8l9ee!AyEUEe0;wQ-BLN^H#W%yXGRy(z)Bt4`rJ z7P!oWt7$JkXVtXaSfraFS92uTVMUZ^sZpt$^{$p)rD`QyZd|61hu_uO8SCoe+fFF) zbk8bX-yyENA(%*R?6#t*d#dwXuA5KlTN8VNacg5A8=7iS=UK1o^6{};G+G@BCu8=` z>7FZfy-l2MRVc(~nqZDEYS`rQ@Pi0Pd&6wj{P2`@n}>8=pOtzDv#RH-zfy~?S=9l1 zk4~kukm_gy?W8#Er<>?@x|{B&$LMi-mR_J&=Y7IIiVshE}MJ(>&S?ZLzjgtI#U7Wm=6^uQh0!)SYW7|Jj=Aqb(P8 z9tqfg?2$mbGq?Ygty#Nfol!qx{O9O6;V~17CrzH>6K+&_Sxws7lzpa5a-gY8>mKA2 z(z>M`WE!Sts&`sRsZrL<=5OuG^v9H+nq!5b>xaeBGiDlP<`{MmRh>PDs|D;Fs(O*X ztjyHdCsj32US^ni>~SvHc8Q;>uq(@|48zQ4-&6ZmtmG=O*Q;u+I1$_PJ2dE{^fJ9h zuhUy>(Bs&YJ(z<4mf})ejw{iCMl_+B4R|Ly*oZ^Yh_6N*34D+ZxnCOcLEMB-<1=i| zU%;2xn7_itd>0$@J@_6T#6$Qoeu9Vb7@okBcnZJ5@9{id!pj*(yJ4`=;%~}mGXk26 zdMQl1DM}V4`DamLlmxgOJ#zw{6Y!jX_cjAwZhvwDAm@5c06dw|4etP$!-z8i;^3(1 z)ZUL85cofzLX7{D6dWb#P`W>-OM={mUV-{s8TDsD?{kCs(*QpwZgS;B`4fxX?g7co zsepyVBr@k0K!@RlH;wQa?duXBmrnP(2ca287cRMH=5()jAn>sp%%0h`AKl*BVaShd{kO+zjPf`oV_UpWIx*%z;^^Lhs$ ze8&5#7i(Fbjq&ZB8~Ai5F+K|g%J**aHtLM&b0Gawfuc0X&!S?Oxh|yu)zEs{LEUr> z9i-dn4o3YC(1VQppQfYqBK?Jq(;M_3IzewD4+aCKBAA$p*_bP_|01kJHCACgHZu0# z%;0G|f)X}ah$4wz^kE-ErcdG^Ze-AO8}7i@aVNgPKMvQKvoo5Sth&of7723A%u`i= zNrg){p(<7GO9jnl=Bw&T_bNf~nIx+v{*u+Y&S?J>!;`tJx70LI_o(7j}2Ov zB-UIkKqXeNDc3NKwVGJ~Wz0>I$l56lIz?v1v0w54g3!7_8uh2}dECxCz!z;&>n^6X z?!g1h1N;y_VkY1h_zj-Hu?*AAh5gf}o00J7sX#{;Gd&liz99pi>pdj6b(`Q`yu|NY z7+CfW3GS?Zoi6a%ba$(fGwhu6(3drAWR0;o1EE8ApAWh_Jz}_I;LD`Dzc#Gt?zW84 zBNyGB%bSJ_fJUJtzt@i(Z#q{2aRidzXK3E%5BvW$!-R7Mz$_Ea2!#1egh$0}?I8o; z$a%x7RN`#UUISwKOxhYw|CUc?`bPZ#*m|DcjOR>TPWtY-8{p0+<2MFp{KgO&zcD!D zHwI?>MwKxz<2P0sXOr=}vXUzq$7fR|Pqk{IT@<71=tc=(AE8I-89K&!{%iDaI)MVX z8MKx$WEJoEs`)+N8V0QOxB^?yf;N6-y&V>IVGqLC%b>MKg4S#C5quOM!^auC-h$8a zYij{szbfJDH*qh%kNfb0^y}-N;pg}@1KFcznCaUCS;hcoPiYw?!<*fk(`EGzo#oJ3 znG;vuW!yh<=`1n9B`Yl&a(o!w_@C7wrqGHtGY7E0-#gVzImwwDZNokX$TdT5VzFVo zd)&Ekw%oaq$(G4GU>QgPA(EN^deH=PXDLy`s#YaUVr zb*aOe4-eLyZI>7EA;xHYo#n*;!PDLOQ5VPuPSxW3ztyMy{$CyCY#j|a8W@!tKz?0g zT@80F^$|d3F9k^O@w$^&v54dPRVkbBMjRsK@^d_{h|lrX+Oh__+$l=2@NxaBl+C#= on{z3fbE&@Od(S=w^iFd-Z_ULMuopRFUj29Cf9eCb(T)HA4|rc5R{#J2 delta 326 zcmZpfz}R9S!N9=4=v10w$iN@~WO4v8h-TrMSSU78Pja$>3yUO>&%g%6j3BwB^5TM| zoctspf7`~w<;;>G85V{VhD3%EhD2o9$zpnis$$jEx&}H5<`%U&3f1POW;zNMW@fdu zoE)Oc`qn}5**Up+`5lwj>B%#8PQI&V&bSq%OGUK0nvkY;u%?d5_jHx{85x2oviPBx z=w@~~XU4_s92|noK>q-N05_0u1%<@M!tczJ`CU9g;ljWK2_sOjF>H?KnZpbKW;INN diff --git a/.Rbuildignore b/.Rbuildignore index 5e5cc50..5048e2d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,5 @@ ^data-raw$ ^LICENSE\.md$ ^\.github$ +^doc$ +^Meta$ diff --git a/.gitignore b/.gitignore index 0bd6e01..76027c7 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,6 @@ rsconnect/ # data sources data-raw/* +inst/doc +/doc/ +/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index 4879823..cbd17c8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,3 +28,7 @@ Imports: units Depends: R (>= 2.10) +Suggests: + knitr, + rmarkdown +VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 756ab37..2fd1473 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,9 +5,9 @@ export(coef_rqs) export(count_features) export(dist_nearest) export(geom_sf_rais) +export(get_osm_postcodes) export(get_osm_rail) export(get_osm_roads) -export(get_osm_zips) export(get_rais) export(gg_rqs) export(maphub_tidy_lines) diff --git a/R/get_osm.R b/R/get_osm.R index 7ef6d59..71dd6a7 100644 --- a/R/get_osm.R +++ b/R/get_osm.R @@ -4,10 +4,14 @@ #' A convenient wrapper for osm features to get some features I need easily convertible to `sf` format. #' #' @param bbox A bounding box for the search area. +#' @param use Should all geometries be kept? Defaults to all, but can also be one of +#' "points", "lines", "multilines", "multipolygons". Use this when sure about what geometry you +#' want to retrieve (e.g. when getting postcodes, you most surely want `use = "points"`). #' #' @import osmdata #' -#' @returns A `list` for each kind of geometry retrieved by osmdata. +#' @returns If `use = ("all")`, a `list` for each kind of geometry retrieved by `{osmdata}`. Else, a +#' `simple feature` dataframe for the selected geometry type instead of a list with all geometries. #' #' @seealso [osmdata::add_osm_feature()] @@ -30,20 +34,24 @@ #' get_osm_roads() #' } -get_osm_roads <- function(bbox) { +get_osm_roads <- function(bbox, use = c("all", "points", "lines", "multilines", "multipolygons")) { query <- opq(bbox, timeout = 50) %>% add_osm_feature( key = "highway", value = c("motorway", "primary", "secondary", "tertiary", "residential", "living_street", "pedestrian") ) %>% osmdata_sf() + + if(use != "all") { + query <- query[[paste0("osm_",use)]] + } + return(query) } - #' @rdname get_osm_ -#' @name get_osm_zips +#' @name get_osm_postcodes #' @export #' #' @details @@ -55,17 +63,21 @@ get_osm_roads <- function(bbox) { #' # Get postcodes in Brazil's smallest city #' geobr::read_municipality(code_muni = 3157336) %>% #' sf::st_bbox() %>% -#' get_osm_zips() +#' get_osm_postcodes() #' } -get_osm_zips <- function(bbox) { +get_osm_postcodes <- function(bbox, use = c("all", "points", "lines", "multilines", "multipolygons")) { query <- opq(bbox, timeout = 50) %>% add_osm_feature( key = "addr:postcode" ) %>% osmdata_sf() + if(use != "all") { + query <- query[[paste0("osm_",use)]] + } + return(query) } @@ -89,7 +101,7 @@ get_osm_zips <- function(bbox) { #' } -get_osm_rail <- function(bbox) { +get_osm_rail <- function(bbox, use = c("all", "points", "lines", "multilines", "multipolygons")) { query <- opq(bbox, timeout = 50) %>% add_osm_feature( key = "railway", @@ -98,5 +110,9 @@ get_osm_rail <- function(bbox) { ) %>% osmdata_sf() + if(use != "all") { + query <- query[[paste0("osm_",use)]] + } + return(query) } diff --git a/R/shp_extract_read.R b/R/shp_extract_read.R index ff0b600..bd84207 100644 --- a/R/shp_extract_read.R +++ b/R/shp_extract_read.R @@ -12,11 +12,11 @@ #' #' @returns A `sf` object -shp_extract_read <- function(path, dsn, ...) { +shp_extract_read <- function(path, dsn = "shp", ...) { temp <- tempfile() unzip(path, exdir = temp, junkpaths = TRUE) - listfiles <- list.files(temp, pattern = dsn, full.names = TRUE) + listfiles <- list.files(temp, pattern = paste0("\\.", dsn, "$"), full.names = TRUE) shp <- sf::st_read(dsn = listfiles, ...) diff --git a/man/get_osm_.Rd b/man/get_osm_.Rd index c9b6832..17a00bc 100644 --- a/man/get_osm_.Rd +++ b/man/get_osm_.Rd @@ -2,21 +2,35 @@ % Please edit documentation in R/get_osm.R \name{get_osm_roads} \alias{get_osm_roads} -\alias{get_osm_zips} +\alias{get_osm_postcodes} \alias{get_osm_rail} \title{Download osm data} \usage{ -get_osm_roads(bbox) +get_osm_roads( + bbox, + use = c("all", "points", "lines", "multilines", "multipolygons") +) -get_osm_zips(bbox) +get_osm_postcodes( + bbox, + use = c("all", "points", "lines", "multilines", "multipolygons") +) -get_osm_rail(bbox) +get_osm_rail( + bbox, + use = c("all", "points", "lines", "multilines", "multipolygons") +) } \arguments{ \item{bbox}{A bounding box for the search area.} + +\item{use}{Should all geometries be kept? Defaults to all, but can also be one of +"points", "lines", "multilines", "multipolygons". Use this when sure about what geometry you +want to retrieve (e.g. when getting postcodes, you most surely want \code{use = "points"}).} } \value{ -A \code{list} for each kind of geometry retrieved by osmdata. +If \code{use = ("all")}, a \code{list} for each kind of geometry retrieved by \code{{osmdata}}. Else, a +\verb{simple feature} dataframe for the selected geometry type instead of a list with all geometries. } \description{ A convenient wrapper for osm features to get some features I need easily convertible to \code{sf} format. @@ -44,7 +58,7 @@ get_osm_roads() # Get postcodes in Brazil's smallest city geobr::read_municipality(code_muni = 3157336) \%>\% sf::st_bbox() \%>\% -get_osm_zips() +get_osm_postcodes() } \dontrun{ # Get railways in Brazil's smallest city diff --git a/man/shp_extract_read.Rd b/man/shp_extract_read.Rd index c3b5fa3..4f24b7f 100644 --- a/man/shp_extract_read.Rd +++ b/man/shp_extract_read.Rd @@ -4,7 +4,7 @@ \alias{shp_extract_read} \title{Extract and read compacted shapefiles} \usage{ -shp_extract_read(path, dsn, ...) +shp_extract_read(path, dsn = "shp", ...) } \arguments{ \item{path}{Path of the compacted folder, including file name and extension.} diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..157f75c --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,4 @@ +*.html +*.R + +data/ diff --git a/vignettes/spatialops.Rmd b/vignettes/spatialops.Rmd new file mode 100644 index 0000000..520b5b4 --- /dev/null +++ b/vignettes/spatialops.Rmd @@ -0,0 +1,170 @@ +--- +title: "An introduction to spatialops" +output: + rmarkdown::html_vignette: + df_print: kable +vignette: > + %\VignetteIndexEntry{An introduction to spatialops} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +In this vignette, I demonstrate how to use the main features of the `{spatialops}` package. Basing on the sunny city of Fortaleza, we will first read a manually downloaded shapefiles from .zip folders. Then, we will use our `{osmdata}` wrappers to get info on rail stations and perform two kind of spatial operations: count metro stations per neighborhood and the distance from each park to the closest station. + +**Setup** +```{r, include=FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + #out.width = "400px", + #out.height = "300px", + #fig.retina = 2 + out.width = "100%" +) +``` + +```{r setup, message=FALSE} +library(sf) +library(dplyr) +library(ggplot2) +library(spatialops) +``` + + + +## Read shapefiles from a .zip folder + +> Tip: I highly recommend that you save your data in an organized way, such as [this structure](https://martinctc.github.io/blog/rstudio-projects-and-working-directories-a-beginner's-guide/). + +From the official [Fortaleza Maps Portal](https://mapas.fortaleza.ce.gov.br/), I downloaded the neighborhoods and parks files. Since they are in the `Esri Shapefile` format, they come in `.zip` folders containing multiple files (`.dbf`, `.prj`, `.shp`, `.shx`), all of them necessary. To avoid creating a mess, we can use `shp_extract_read()`: Just specify the path and you're done! You can also give a `dsn` (data source name) extension. By default, `dsn = shp` but you can also use `dsn = ` `geojson`, `kml` and so on. Other `st_read()` arguments can also be passed. Here, we define the `options` argument to ensure the `latin1` encoding, otherwise this file won't work. + +```{r shp-read} +# load +shp_bairr <- shp_extract_read(path = "data/Bairros_de_Fortaleza.zip", options = "ENCODING=latin1") +shp_parks <- shp_extract_read(path = "data/Pracas_de_Fortaleza.zip", options = "ENCODING=latin1") + +# and a bit of tidying +shp_bairr <- shp_bairr %>% + select(id, name = Nome, area_ha = Área..ha., geometry) + +shp_parks <- shp_parks %>% + select(id, name = nome, area_m2 = area.m2, geometry) +``` + + +## Get OSM data + +### Rail stations + +Now, we'll use our `get_osm_rail()` wrapper to get all rail things from OSM already into a `simple feature`, by keeping only points since we don't want polygons or lines. + +```{r metrofor-scaffoldings1, eval=FALSE, include=FALSE} +shp_bairr %>% + st_bbox() %>% + get_osm_rail(use = "points") %>% + saveRDS("data/shp_metro.RDS") +``` + +```{r metrofor-scaffoldings2, include=FALSE} +shp_metro <- readRDS("data/shp_metro.RDS") +``` + +```{r get-metrofor, eval=FALSE} +shp_metro <- shp_bairr %>% + st_bbox() %>% + get_osm_rail(use = "points") + +head(shp_metro) +``` + +Most of this data is useless for us, so let's begin by `filter`ing only stations---this will reduce the number of rows from 2609 to 36---and then `select`ing relevant columns. +```{r} +shp_metro <- shp_metro %>% + filter(public_transport == "station") %>% + select(osm_id, name, railway, start_date, station) +``` + +All set! Let's visualize: +```{r shp-plot} +ggplot() + + geom_sf( + data = shp_bairr, fill = "#f9f9f9" + ) + + geom_sf( + data = shp_parks, aes(fill = "Park"), color = "#86d7bc" + ) + + geom_sf( + data = shp_metro, aes(color = "Metrofor station"), size = 0.5 + ) + + scale_fill_manual(values = c("Park" = "#86d7bc")) + + scale_color_manual(values = c("Metrofor station" = "red")) + + labs(title = "Fortaleza: Parks and Metro") + + theme_void() + + theme( + legend.title = element_blank(), + legend.key.size = unit(0.5, "lines") + ) + + guides(color = guide_legend(override.aes = list(size = 1))) +``` + + + + +## Count parks per neighborhood + +Parks and plazas are an important indicator of urban development, as they both attract households and companies, but are sometimes concentrated in historically affluent areas: In both cases, understanding how amenities are distributed along the space is key in most urban studies. Here, we demonstrate how `{spatialops}` makes it easy to count how many parks each neighborhood has. + +All we need to do is call `count_features(x,y)` and it will bind to `x` a column with the number of `y` features for each row. If `x` is a list of `POINT`s, we need to provide a `radius` to create a buffer and then count features inside it. There are two optional arguments: `name` is pretty obvious (the count variable name), whereas `predicate` lets you decide which spatial operation to perform. By default, `predicate = st_intersects` means that `count_features()` will count how many `y`s intersect with each `x`, but you can use `st_covers`, `st_contains_properly`, and so on. +```{r, warning=FALSE} +shp_bairr <- shp_bairr %>% + count_features(shp_parks, name = "n_parks") + +shp_bairr %>% + arrange(desc(n_parks)) %>% + slice(1:10) %>% + ggplot() + + geom_col(aes(y = reorder(name, n_parks), x = n_parks)) + + scale_y_discrete(labels = scales::label_wrap(10)) + + labs(title = "Parks and Rec: Fortaleza Edition", y = "Neighborhood", x = "Number of parks") + + theme_minimal() + +shp_bairr %>% + ggplot() + + geom_sf(aes(fill = n_parks, color = n_parks)) + + geom_sf_label( + data = shp_bairr %>% arrange(desc(n_parks)) %>% slice(1:10), + aes(label = name), + size = 1.5, color = "black", label.padding = unit(0.1, "lines"), nudge_x = 0.01, nudge_y = 0.001 + ) + + scale_fill_distiller(palette = "YlGnBu", direction = 1) + + scale_color_distiller(palette = "YlGnBu", direction = 1) + + labs(title = "Parks and Rec: Fortaleza Edition", fill = "Number of parks", color = "Number of parks") + + theme_void() +``` + + + +## Calculate distance from each park to the nearest station + +```{r} +shp_parks <- shp_parks %>% + dist_nearest(shp_metro, name = "dist_metro") + +shp_parks %>% + ggplot() + + geom_histogram(aes(x = dist_metro/1000)) + + labs(title = "How close are parks to Metrofor stations?", x = "Distance (km)", y = "Number of parks") + + theme_minimal() +``` + + + +## Trinary sets + +[under construction] + + + +## Conclusion + +[under construction]