Skip to content

Commit

Permalink
option of uceCV implemented in aoa
Browse files Browse the repository at this point in the history
  • Loading branch information
HannaMeyer committed Nov 27, 2024
1 parent 2a154f3 commit 66afdbe
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 5 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,6 @@ Suggests:
gower,
clustMixType,
testthat (>= 3.0.0)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
VignetteBuilder: knitr
Config/testthat/edition: 3
4 changes: 3 additions & 1 deletion R/aoa.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#' Relevant if some data points are excluded, e.g. when using \code{\link{nndm}}.
#' @param method Character. Method used for distance calculation. Currently euclidean distance (L2) and Mahalanobis distance (MD) are implemented but only L2 is tested. Note that MD takes considerably longer.
#' @param useWeight Logical. Only if a model is given. Weight variables according to importance in the model?
#' @param useCV Logical. Only if a model is given. Use the CV folds to calculate the DI threshold?
#' @param LPD Logical. Indicates whether the local point density should be calculated or not.
#' @param maxLPD numeric or integer. Only if \code{LPD = TRUE}. Number of nearest neighbors to be considered for the calculation of the LPD. Either define a number between 0 and 1 to use a percentage of the number of training samples for the LPD calculation or a whole number larger than 1 and smaller than the number of training samples. CAUTION! If not all training samples are considered, a fitted relationship between LPD and error metric will not make sense (@seealso \code{\link{DItoErrormetric}})
#' @param indices logical. Calculate indices of the training data points that are responsible for the LPD of a new prediction location? Output is a matrix with the dimensions num(raster_cells) x maxLPD. Each row holds the indices of the training data points that are relevant for the specific LPD value at that location. Can be used in combination with exploreAOA(aoa) function from the \href{https://github.com/fab-scm/CASTvis}{CASTvis package} for a better visual interpretation of the results. Note that the matrix can be quite big for examples with a high resolution and a larger number of training samples, which can cause memory issues.
Expand Down Expand Up @@ -147,6 +148,7 @@ aoa <- function(newdata,
CVtrain=NULL,
method="L2",
useWeight=TRUE,
useCV=TRUE,
LPD = FALSE,
maxLPD = 1,
indices = FALSE,
Expand Down Expand Up @@ -207,7 +209,7 @@ aoa <- function(newdata,
if (verbose) {
message("No trainDI provided.")
}
trainDI <- trainDI(model, train, variables, weight, CVtest, CVtrain, method, useWeight, LPD, verbose)
trainDI <- trainDI(model, train, variables, weight, CVtest, CVtrain, method, useWeight, useCV, LPD, verbose)
}

if (calc_LPD == TRUE) {
Expand Down
14 changes: 11 additions & 3 deletions R/trainDI.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#' Relevant if some data points are excluded, e.g. when using \code{\link{nndm}}.
#' @param method Character. Method used for distance calculation. Currently euclidean distance (L2) and Mahalanobis distance (MD) are implemented but only L2 is tested. Note that MD takes considerably longer.
#' @param useWeight Logical. Only if a model is given. Weight variables according to importance in the model?
#' @param useCV Logical. Only if a model is given. Use the CV folds to calculate the DI threshold?
#' @param LPD Logical. Indicates whether the local point density should be calculated or not.
#' @param verbose Logical. Print progress or not?
#'
Expand Down Expand Up @@ -101,6 +102,7 @@ trainDI <- function(model = NA,
CVtrain = NULL,
method="L2",
useWeight = TRUE,
useCV =TRUE,
LPD = FALSE,
verbose = TRUE){

Expand All @@ -127,7 +129,7 @@ trainDI <- function(model = NA,
}

# get CV folds from model or from parameters
folds <- aoa_get_folds(model,CVtrain,CVtest)
folds <- aoa_get_folds(model,CVtrain,CVtest,useCV)
CVtest <- folds[[2]]
CVtrain <- folds[[1]]

Expand Down Expand Up @@ -449,9 +451,9 @@ aoa_get_train <- function(model){
# Get folds from train object


aoa_get_folds <- function(model, CVtrain, CVtest){
aoa_get_folds <- function(model, CVtrain, CVtest, useCV){
### if folds are to be extracted from the model:
if (!is.na(model)[1]){
if (useCV&!is.na(model)[1]){
if(tolower(model$control$method)!="cv"){
message("note: Either no model was given or no CV was used for model training. The DI threshold is therefore based on all training data")
}else{
Expand Down Expand Up @@ -481,6 +483,12 @@ aoa_get_folds <- function(model, CVtrain, CVtest){
}
}
}

}
if(!is.na(model)[1]&useCV==FALSE){
message("note: useCV is set to FALSE. The DI threshold is therefore based on all training data")
CVtrain <- NULL
CVtest <- NULL
}
return(list(CVtrain,CVtest))
}
Expand Down
3 changes: 3 additions & 0 deletions man/aoa.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/trainDI.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 66afdbe

Please sign in to comment.