Skip to content

Commit

Permalink
Merge pull request #90 from carlesmila/master
Browse files Browse the repository at this point in the history
Improvements in visualization of NN distances in geodist, nndm and knndm
  • Loading branch information
HannaMeyer authored Mar 12, 2024
2 parents 89dd634 + 6414015 commit 81615bd
Show file tree
Hide file tree
Showing 10 changed files with 153 additions and 41 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Suggests:
rnaturalearth,
MASS,
twosamples,
RColorBrewer,
testthat (>= 3.0.0)
RoxygenNote: 7.2.3
VignetteBuilder: knitr
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* normalize_DI for a more intuitive interpretation
* modifications:
* function DItoErrormetric renamed to errorProfiles and allows for other dissimilarity measures
* Improvement and homogenization of plotting methods for nndm, knndm and geodist objects

# `CAST` 0.9.1
* new features:
Expand Down
6 changes: 6 additions & 0 deletions R/geodist.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@
#'
#' ########### Distance between training data and new data:
#' dist <- geodist(splotdata, studyArea)
#' # With density functions
#' plot(dist)
#' # Or ECDFs (relevant for nndm and knnmd methods)
#' plot(dist, stat="ecdf")
#'
#' ########### Distance between training data, new data and test data (here Chile):
#' plot(splotdata[,"Country"])
Expand All @@ -48,7 +51,10 @@
#' ########### Distance between training data, new data and CV folds:
#' folds <- createFolds(1:nrow(splotdata), k=3, returnTrain=FALSE)
#' dist <- geodist(x=splotdata, modeldomain=studyArea, cvfolds=folds)
#' # Using density functions
#' plot(dist)
#' # Using ECDFs (relevant for nndm and knnmd methods)
#' plot(dist, stat="ecdf")
#'
#' ########### Distances in the feature space:
#' predictors <- terra::rast(system.file("extdata","predictors_chile.tif", package="CAST"))
Expand Down
4 changes: 4 additions & 0 deletions R/knndm.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@
#' knndm_folds <- knndm(train_points, predpoints = pred_points, k = 5)
#' knndm_folds
#' plot(knndm_folds)
#' plot(knndm_folds, type = "simple") # For more accessible legend labels
#' plot(knndm_folds, type = "simple", stat = "density") # To visualize densities rather than ECDFs
#' folds <- as.character(knndm_folds$clusters)
#' ggplot() +
#' geom_sf(data = simarea, alpha = 0) +
Expand All @@ -122,6 +124,8 @@
#' knndm_folds <- knndm(train_points, predpoints = pred_points, k = 5)
#' knndm_folds
#' plot(knndm_folds)
#' plot(knndm_folds, type = "simple") # For more accessible legend labels
#' plot(knndm_folds, type = "simple", stat = "density") # To visualize densities rather than ECDFs
#' folds <- as.character(knndm_folds$clusters)
#' ggplot() +
#' geom_sf(data = simarea, alpha = 0) +
Expand Down
2 changes: 2 additions & 0 deletions R/nndm.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
#' nndm_pred <- nndm(train_points, predpoints=pred_points)
#' nndm_pred
#' plot(nndm_pred)
#' plot(nndm_pred, type = "simple") # For more accessible legend labels
#'
#' # ...or run NNDM with a known autocorrelation range of 10
#' # to restrict the matching to distances lower than that.
Expand Down Expand Up @@ -96,6 +97,7 @@
#' nndm_pred <- nndm(train_points, predpoints=pred_points)
#' nndm_pred
#' plot(nndm_pred)
#' plot(nndm_pred, type = "simple") # For more accessible legend labels
#'
#' ########################################################################
#' # Example 3: Real- world example; using a SpatRast modeldomain instead
Expand Down
154 changes: 118 additions & 36 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,13 @@ plot.aoa = function(x, samplesize = 1000, ...){

#' @name plot
#' @param x An object of type \emph{nndm}.
#' @param type String, defaults to "strict" to show the original nearest neighbour distance definitions in the legend.
#' Alternatively, set to "simple" to have more intuitive labels.
#' @param ... other arguments.
#' @author Carles Milà
#'
#' @export
plot.nndm <- function(x, ...){
plot.nndm <- function(x, type="strict", ...){

# Prepare data for plotting: Gij function
Gij_df <- data.frame(r=x$Gij[order(x$Gij)])
Expand Down Expand Up @@ -127,35 +129,62 @@ plot.nndm <- function(x, ...){
Gplot <- rbind(Gij_df, Gjstar_df, Gj_df)
}

# Define colours matching those of geodist
myColors <- RColorBrewer::brewer.pal(3, "Dark2")

# Plot
ggplot2::ggplot(Gplot) +
ggplot2::geom_step(ggplot2::aes_string(x="r", y="val", colour="Function", size="Function"),
alpha = 0.8) +
ggplot2::scale_size_manual(values=c(1.1, 1.1, 0.5),
labels=c(expression(hat(G)[ij](r)),
expression(hat(G)[j]^"*"*"(r,"*bold(L)*")"),
expression(hat(G)[j](r)))) +
ggplot2::scale_colour_manual(values=c("#000000", "#E69F00", "#56B4E9"),
labels=c(expression(hat(G)[ij](r)),
expression(hat(G)[j]^"*"*"(r,"*bold(L)*")"),
expression(hat(G)[j](r)))) +
ggplot2::ylab(expression(paste(hat(G)[ij](r), ", ",
hat(G)[j]^"*"*"(r,"*bold(L)*")", ", ",
hat(G)[j](r)))) +
ggplot2::labs(colour="", size="") +
ggplot2::theme_bw() +
ggplot2::theme(legend.text.align=0,
legend.text=ggplot2::element_text(size=12))
if(type=="strict"){
ggplot2::ggplot(Gplot) +
ggplot2::geom_step(ggplot2::aes_string(x="r", y="val", colour="Function"),
alpha = 0.8, lwd = 0.8) +
ggplot2::scale_colour_manual(values=c(myColors[2], myColors[3], myColors[1]),
labels=c(expression(hat(G)[ij](r)),
expression(hat(G)[j]^"*"*"(r,"*bold(L)*")"),
expression(hat(G)[j](r)))) +
ggplot2::geom_vline(xintercept=0, lwd = 0.1) +
ggplot2::geom_hline(yintercept=0, lwd = 0.1) +
ggplot2::geom_hline(yintercept=1, lwd = 0.1) +
ggplot2::ylab("ECDF") +
ggplot2::labs(colour="Distance function", size="Distance function") +
ggplot2::theme_bw() +
ggplot2::theme(legend.text.align=0,
legend.text=ggplot2::element_text(size=10),
legend.position = "bottom")

}else if(type=="simple"){

ggplot2::ggplot(Gplot) +
ggplot2::geom_step(ggplot2::aes_string(x="r", y="val", colour="Function"),
alpha = 0.8, lwd = 0.8) +
ggplot2::scale_colour_manual(values=c(myColors[2], myColors[3], myColors[1]),
labels=c("prediction-to-sample",
"CV-distances",
"sample-to-sample")) +
ggplot2::geom_vline(xintercept=0, lwd = 0.1) +
ggplot2::geom_hline(yintercept=0, lwd = 0.1) +
ggplot2::geom_hline(yintercept=1, lwd = 0.1) +
ggplot2::ylab("ECDF") +
ggplot2::labs(colour="Distance function", size="Distance function") +
ggplot2::theme_bw() +
ggplot2::theme(legend.text.align=0,
legend.text=ggplot2::element_text(size=10),
legend.position = "bottom")

}

}


#' @name plot
#' @param x An object of type \emph{knndm}.
#' @param type String, defaults to "strict" to show the original nearest neighbour distance definitions in the legend.
#' Alternatively, set to "simple" to have more intuitive labels.
#' @param stat String, defaults to "ecdf" but can be set to "density" to estimate density functions.
#' @param ... other arguments.
#' @author Carles Milà
#'
#' @export
plot.knndm <- function(x, ...){
plot.knndm <- function(x, type="strict", stat = "ecdf", ...){

# Prepare data for plotting: Gij function
Gij_df <- data.frame(r=x$Gij[order(x$Gij)])
Expand All @@ -172,19 +201,61 @@ plot.knndm <- function(x, ...){
# Merge data for plotting
Gplot <- rbind(Gij_df, Gjstar_df, Gj_df)

# Define colours matching those of geodist
myColors <- RColorBrewer::brewer.pal(3, "Dark2")

# Plot
ggplot2::ggplot(data=Gplot, ggplot2::aes_string(x="r", group="Function", col="Function")) +
ggplot2::geom_vline(xintercept=0, lwd = 0.1) +
ggplot2::geom_hline(yintercept=0, lwd = 0.1) +
ggplot2::geom_hline(yintercept=1, lwd = 0.1) +
ggplot2::stat_ecdf(geom = "step", lwd = 1) +
ggplot2::scale_colour_manual(values=c("#000000", "#E69F00", "#56B4E9"),
labels=c(expression(hat(G)[ij](r)),
expression(hat(G)[j]^"*"*"(r,L)"),
expression(hat(G)[j](r)))) +
ggplot2::ylab(expression(paste(hat(G)[ij](r), ", ",
hat(G)[j]^"*"*"(r,L)", ", ",
hat(G)[j](r))))
if(stat=="ecdf"){
p <- ggplot2::ggplot(data=Gplot, ggplot2::aes_string(x="r", group="Function", col="Function")) +
ggplot2::geom_vline(xintercept=0, lwd = 0.1) +
ggplot2::geom_hline(yintercept=0, lwd = 0.1) +
ggplot2::geom_hline(yintercept=1, lwd = 0.1) +
ggplot2::stat_ecdf(geom = "step", lwd = 0.8) +
ggplot2::theme_bw() +
ggplot2::ylab("ECDF") +
ggplot2::labs(group="Distance function", col="Distance function") +
ggplot2::theme(legend.position = "bottom",
legend.text=ggplot2::element_text(size=10))

if(type=="strict"){
p <- p +
ggplot2::scale_colour_manual(values=c(myColors[2], myColors[3], myColors[1]),
labels=c(expression(hat(G)[ij](r)),
expression(hat(G)[j]^"*"*"(r,L)"),
expression(hat(G)[j](r))))
}else if(type == "simple"){
p <- p +
ggplot2::scale_colour_manual(values=c(myColors[2], myColors[3], myColors[1]),
labels=c("prediction-to-sample",
"CV-distances",
"sample-to-sample"))
}

}else if(stat=="density"){
p <- ggplot2::ggplot(data=Gplot, ggplot2::aes_string(x="r", group="Function", fill="Function")) +
ggplot2::geom_density(adjust=1.5, alpha=.5, stat=stat, lwd = 0.3) +
ggplot2::theme_bw() +
ggplot2::ylab("Density") +
ggplot2::labs(group="Distance function", col="Distance function") +
ggplot2::theme(legend.position = "bottom",
legend.text=ggplot2::element_text(size=10))

if(type=="strict"){
p <- p +
ggplot2::scale_fill_manual(values=c(myColors[2], myColors[3], myColors[1]),
labels=c(expression(hat(G)[ij](r)),
expression(hat(G)[j]^"*"*"(r,L)"),
expression(hat(G)[j](r))))
}else if(type == "simple"){
p <- p +
ggplot2::scale_fill_manual(values=c(myColors[2], myColors[3], myColors[1]),
labels=c("prediction-to-sample",
"CV-distances",
"sample-to-sample"))
}
}

p
}

#' Plot results of a Forward feature selection or best subset selection
Expand Down Expand Up @@ -319,7 +390,8 @@ plot.ffs <- function(x,plotType="all",palette=rainbow,reverse=FALSE,


#' @name plot
#' @description Density plot of nearest neighbor distances in geographic space or feature space between training data as well as between training data and prediction locations.
#' @description Density plot of nearest neighbor distances in geographic space or feature space between training data as well as between training data and
#' prediction locations.
#' Optional, the nearest neighbor distances between training data and test data or between training data and CV iterations is shown.
#' The plot can be used to check the suitability of a chosen CV method to be representative to estimate map accuracy.
#' @param x geodist, see \code{\link{geodist}}
Expand All @@ -333,6 +405,14 @@ plot.ffs <- function(x,plotType="all",palette=rainbow,reverse=FALSE,

plot.geodist <- function(x, unit = "m", stat = "density", ...){

# Define colours - they must match those of knndm and nndm
labs <- c("sample-to-sample",
"prediction-to-sample",
"CV-distances",
"test-to-sample")
myColors <- RColorBrewer::brewer.pal(4, "Dark2")
names(myColors) <- labs


type <- attr(x, "type")

Expand All @@ -348,8 +428,9 @@ plot.geodist <- function(x, unit = "m", stat = "density", ...){
if (type=="feature"){unit ="unitless"}
if(stat=="density"){
p <- ggplot2::ggplot(data=x, aes(x=dist, group=what, fill=what)) +
ggplot2::geom_density(adjust=1.5, alpha=.4, stat=stat) +
ggplot2::scale_fill_discrete(name = "distance function") +
ggplot2::geom_density(adjust=1.5, alpha=.5, stat=stat, lwd = 0.3) +
ggplot2::scale_fill_manual(name = "distance function", values = myColors) +
ggplot2::theme_bw() +
ggplot2::xlab(xlabs) +
ggplot2::theme(legend.position="bottom",
plot.margin = unit(c(0,0.5,0,0),"cm"))
Expand All @@ -359,7 +440,8 @@ plot.geodist <- function(x, unit = "m", stat = "density", ...){
ggplot2::geom_hline(yintercept=0, lwd = 0.1) +
ggplot2::geom_hline(yintercept=1, lwd = 0.1) +
ggplot2::stat_ecdf(geom = "step", lwd = 1) +
ggplot2::scale_color_discrete(name = "distance function") +
ggplot2::scale_color_manual(name = "distance function", values = myColors) +
ggplot2::theme_bw() +
ggplot2::xlab(xlabs) +
ggplot2::ylab("ECDF") +
ggplot2::theme(legend.position="bottom",
Expand Down
6 changes: 6 additions & 0 deletions man/geodist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/knndm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/nndm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions man/plot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 81615bd

Please sign in to comment.