From aa1d2d97fec95851e617b5dc7cbb61c37653b9a3 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Thu, 28 Dec 2023 17:07:17 +0100 Subject: [PATCH 1/7] Add parameter n_bins to potential_interactions() --- R/sv_dependence.R | 19 ++++++++----- tests/testthat/test-potential_interactions.R | 30 ++++++++++++++++++++ 2 files changed, 42 insertions(+), 7 deletions(-) create mode 100644 tests/testthat/test-potential_interactions.R diff --git a/R/sv_dependence.R b/R/sv_dependence.R index 8205b50..5d66265 100644 --- a/R/sv_dependence.R +++ b/R/sv_dependence.R @@ -194,25 +194,28 @@ sv_dependence.mshapviz <- function(object, v, color_var = "auto", color = "#3b52 #' #' Returns vector of interaction strengths between variable `v` and all other variables. #' -#' If SHAP interaction values are available, interaction strength +#' If SHAP interaction values are available, the interaction strength #' between feature `v` and another feature `v'` is measured by twice their #' mean absolute SHAP interaction values. Otherwise, we use as heuristic the #' squared correlation between feature values of `v'` and #' SHAP values of `v`, averaged over (binned) values of `v`. -#' A numeric `v` with more than `n_bins` unique values is binned into quantile bins. -#' Currently `n_bins` equals the smaller of \eqn{n/20} and \eqn{\sqrt n}, where \eqn{n} -#' is the sample size. #' The average squared correlation is weighted by the number of non-missing feature #' values in the bin. Note that non-numeric color features are turned to numeric #' by calling [data.matrix()], which does not necessarily make sense. #' #' @param obj An object of class "shapviz". #' @param v Variable name. +#' @param n_bins A numeric `v` with more than `n_bins` unique values is binned +#' into that many quantile bins. If `NULL` (default), `n_bins` equals the smaller +#' of \eqn{n/20} and \eqn{\sqrt n} (rounded up), where \eqn{n} is the sample size. #' @returns A named vector of decreasing interaction strengths. #' @export #' @seealso [sv_dependence()] -potential_interactions <- function(obj, v) { - stopifnot(is.shapviz(obj)) +potential_interactions <- function(obj, v, n_bins = NULL) { + stopifnot( + is.shapviz(obj), + is.null(n_bins) || (length(n_bins) == 1L && n_bins >= 1L) + ) S <- get_shap_values(obj) S_inter <- get_shap_interactions(obj) X <- get_feature_values(obj) @@ -233,7 +236,9 @@ potential_interactions <- function(obj, v) { r_sq <- function(s, x) { suppressWarnings(stats::cor(s, data.matrix(x), use = "p")^2) } - n_bins <- ceiling(min(sqrt(nrow(X)), nrow(X) / 20)) + if (is.null(n_bins)) { + n_bins <- ceiling(min(sqrt(nrow(X)), nrow(X) / 20)) + } v_bin <- .fast_bin(X[[v]], n_bins = n_bins) s_bin <- split(S[, v], v_bin) X_bin <- split(X[v_other], v_bin) diff --git a/tests/testthat/test-potential_interactions.R b/tests/testthat/test-potential_interactions.R new file mode 100644 index 0000000..03da32e --- /dev/null +++ b/tests/testthat/test-potential_interactions.R @@ -0,0 +1,30 @@ +dtrain <- xgboost::xgb.DMatrix( + data.matrix(iris[, -1L]), label = iris[, 1L], nthread = 1 +) +fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 10L) +x <- shapviz(fit, X_pred = dtrain, X = iris[, -1L]) + +test_that("n_bins has no effect for factor v", { + expect_equal( + potential_interactions(x, "Species", n_bins = NULL), + potential_interactions(x, "Species", n_bins = 2) + ) +}) + +test_that("n_bins has an effect for numeric v", { + expect_false( + identical( + potential_interactions(x, "Sepal.Width", n_bins = 2), + potential_interactions(x, "Sepal.Width", n_bins = 3) + ) + ) +}) + +# Now with SHAP interactions +test_that("potential_interactions respects true SHAP interactions", { + xi <- shapviz(fit, X_pred = dtrain, X = iris[, -1L], interactions = TRUE) + i1 <- potential_interactions(xi, "Species") + i2 <- sv_interaction(xi, kind = "no")[names(i1), "Species"] + expect_equal(i1, i2, tolerance = 1e-5) +}) + From 14cfac1f9e7d5d2865b208610f5235f2fa560fe8 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Thu, 28 Dec 2023 18:06:16 +0100 Subject: [PATCH 2/7] Add argument color_numeric to potential_interactions() --- R/sv_dependence.R | 57 +++++++++++++++++--- tests/testthat/test-potential_interactions.R | 41 +++++++++++++- 2 files changed, 89 insertions(+), 9 deletions(-) diff --git a/R/sv_dependence.R b/R/sv_dependence.R index 5d66265..465eb37 100644 --- a/R/sv_dependence.R +++ b/R/sv_dependence.R @@ -208,10 +208,13 @@ sv_dependence.mshapviz <- function(object, v, color_var = "auto", color = "#3b52 #' @param n_bins A numeric `v` with more than `n_bins` unique values is binned #' into that many quantile bins. If `NULL` (default), `n_bins` equals the smaller #' of \eqn{n/20} and \eqn{\sqrt n} (rounded up), where \eqn{n} is the sample size. +#' Ignored if `obj` contains SHAP interactions. +#' @param color_numeric Should color feature values be converted to numeric? Default is +#' `TRUE`. Ignored if `obj` contains SHAP interactions. #' @returns A named vector of decreasing interaction strengths. #' @export #' @seealso [sv_dependence()] -potential_interactions <- function(obj, v, n_bins = NULL) { +potential_interactions <- function(obj, v, n_bins = NULL, color_numeric = TRUE) { stopifnot( is.shapviz(obj), is.null(n_bins) || (length(n_bins) == 1L && n_bins >= 1L) @@ -232,19 +235,20 @@ potential_interactions <- function(obj, v, n_bins = NULL) { return(sort(2 * colMeans(abs(S_inter[, v, ]))[v_other], decreasing = TRUE)) } - # Complicated case: we need to rely on correlation based heuristic - r_sq <- function(s, x) { - suppressWarnings(stats::cor(s, data.matrix(x), use = "p")^2) + # Complicated case: we need to rely on R-squared adjusted based heuristic + COL <- X[v_other] + if (isTRUE(color_numeric)) { + COL <- as.data.frame(data.matrix(COL)) } if (is.null(n_bins)) { n_bins <- ceiling(min(sqrt(nrow(X)), nrow(X) / 20)) } v_bin <- .fast_bin(X[[v]], n_bins = n_bins) s_bin <- split(S[, v], v_bin) - X_bin <- split(X[v_other], v_bin) - w <- do.call(rbind, lapply(X_bin, function(z) colSums(!is.na(z)))) - cor_squared <- do.call(rbind, mapply(r_sq, s_bin, X_bin, SIMPLIFY = FALSE)) - sort(colSums(w * cor_squared, na.rm = TRUE) / colSums(w), decreasing = TRUE) + COL_bin <- split(COL, v_bin) + w <- do.call(rbind, lapply(COL_bin, function(z) colSums(!is.na(z)))) + r2 <- do.call(rbind, mapply(r2_adj, s_bin, COL_bin, SIMPLIFY = FALSE)) + sort(colSums(w * r2, na.rm = TRUE) / colSums(w), decreasing = TRUE) } # Helper functions @@ -261,3 +265,40 @@ potential_interactions <- function(obj, v, n_bins = NULL) { q <- stats::quantile(z, seq(0, 1, length.out = n_bins + 1L), na.rm = TRUE) findInterval(z, unique(q), rightmost.closed = TRUE) } + +#' R-squared adjusted +#' +#' Internal function used to calculate the R-squared adjusted for a simple linear +#' regression with the SHAP values of the feature on the x-axis as response and +#' the (potential) color feature as single feature. +#' +#' @noRd +#' @keywords internal +#' +#' @param s Vector of within-bin SHAP values of the feature on the x-axis. +#' @param color Feature values of the color feature. +#' @returns The R-squared adjusted from regressing `s` onto `color`. If the calculation +#' fails (e.g., too many factor levels in `color`), the function returns `NA`. +r2_adj_uni <- function(s, color) { + tryCatch( + summary(stats::lm(s ~ color))[["adj.r.squared"]], + error = function(e) return(NA) + ) +} + +#' R-squared adjusted for multiple v +#' +#' Internal function that calls `r2_adj_uni()` for multiple color features. +#' +#' @noRd +#' @keywords internal +#' +#' @param s Vector of SHAP values of the feature on the x-axis. +#' @param df Data frame of (multiple) color features. +#' @returns Vector of R-squared adjusted (one per column in `df`). For columns in `df` +#' where the calculations fail, the value is `NA`. +r2_adj <- function(s, df) { + suppressWarnings( + vapply(df, FUN = r2_adj_uni, FUN.VALUE = 1.0, s = s, USE.NAMES = FALSE) + ) +} diff --git a/tests/testthat/test-potential_interactions.R b/tests/testthat/test-potential_interactions.R index 03da32e..27b6d4d 100644 --- a/tests/testthat/test-potential_interactions.R +++ b/tests/testthat/test-potential_interactions.R @@ -20,7 +20,14 @@ test_that("n_bins has an effect for numeric v", { ) }) -# Now with SHAP interactions +test_that("color_numeric has an effect", { + p1 <- potential_interactions(x, "Sepal.Width", color_numeric = TRUE) + p2 <- potential_interactions(x, "Sepal.Width", color_numeric = FALSE) + num <- c("Petal.Width", "Petal.Length") + expect_equal(p1[num], p2[num]) + expect_false(p1["Species"] == p2["Species"]) +}) + test_that("potential_interactions respects true SHAP interactions", { xi <- shapviz(fit, X_pred = dtrain, X = iris[, -1L], interactions = TRUE) i1 <- potential_interactions(xi, "Species") @@ -28,3 +35,35 @@ test_that("potential_interactions respects true SHAP interactions", { expect_equal(i1, i2, tolerance = 1e-5) }) +test_that("r2_adj_uni() returns R-squared adjusted", { + fit_lm <- lm(Sepal.Length ~ Species, data = iris) + expect_equal( + r2_adj_uni(iris$Sepal.Length, iris$Species), + summary(fit_lm)[["adj.r.squared"]] + ) +}) + +test_that("r2_adj_uni() fails with NA", { + expect_equal(r2_adj_uni(0, 1:2), NA) +}) + +test_that("r2_adj() returns R-squared adjusted per column in df", { + fit_lm1 <- lm(Sepal.Length ~ Species, data = iris) + fit_lm2 <- lm(Sepal.Length ~ Sepal.Width, data = iris) + + expect_equal( + r2_adj(iris$Sepal.Length, iris[c("Species", "Sepal.Width")]), + c(summary(fit_lm1)[["adj.r.squared"]], summary(fit_lm2)[["adj.r.squared"]]) + ) +}) + +test_that("r2_adj() can fail with NA", { + expect_equal(r2_adj(0, df = data.frame(x = c(1, 1), y = 1:2)), c(NA_real_, NA_real_)) +}) + + + +# r_sq <- function(s, x) { +# suppressWarnings(stats::cor(s, data.matrix(x), use = "p")^2) +# } + From d28b494dcc4c91115e6ab94f3d69cd504d888de2 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Thu, 28 Dec 2023 20:01:07 +0100 Subject: [PATCH 3/7] Update docu of potential_interactions() --- R/sv_dependence.R | 26 +++++++++--------- man/potential_interactions.Rd | 29 ++++++++++++-------- tests/testthat/test-potential_interactions.R | 16 +++++++++-- 3 files changed, 43 insertions(+), 28 deletions(-) diff --git a/R/sv_dependence.R b/R/sv_dependence.R index 465eb37..ac52bc6 100644 --- a/R/sv_dependence.R +++ b/R/sv_dependence.R @@ -195,22 +195,22 @@ sv_dependence.mshapviz <- function(object, v, color_var = "auto", color = "#3b52 #' Returns vector of interaction strengths between variable `v` and all other variables. #' #' If SHAP interaction values are available, the interaction strength -#' between feature `v` and another feature `v'` is measured by twice their -#' mean absolute SHAP interaction values. Otherwise, we use as heuristic the -#' squared correlation between feature values of `v'` and -#' SHAP values of `v`, averaged over (binned) values of `v`. -#' The average squared correlation is weighted by the number of non-missing feature -#' values in the bin. Note that non-numeric color features are turned to numeric -#' by calling [data.matrix()], which does not necessarily make sense. +#' between feature `v` and another feature `z` is measured by twice their +#' mean absolute SHAP interaction values. +#' +#' Otherwise, we use as heuristic the adjusted R-squared between the SHAP values of `v` +#' and the feature values of `z` , averaged over (binned) values of `v`. +#' The average adjusted R-squared is weighted by the number of non-missing values of `z` +#' in the bin. If `color_numeric = TRUE`, a non-numeric `z` is turned into numeric, +#' which does not necessarily make sense (but works in almost all cases). #' #' @param obj An object of class "shapviz". #' @param v Variable name. -#' @param n_bins A numeric `v` with more than `n_bins` unique values is binned -#' into that many quantile bins. If `NULL` (default), `n_bins` equals the smaller -#' of \eqn{n/20} and \eqn{\sqrt n} (rounded up), where \eqn{n} is the sample size. -#' Ignored if `obj` contains SHAP interactions. -#' @param color_numeric Should color feature values be converted to numeric? Default is -#' `TRUE`. Ignored if `obj` contains SHAP interactions. +#' @param n_bins Into how many quantile bins should a numeric `v` be binned? +#' The default `NULL` equals the smaller of \eqn{n/20} and \eqn{\sqrt n} (rounded up), +#' where \eqn{n} is the sample size. Ignored if `obj` contains SHAP interactions. +#' @param color_numeric Should color features be converted to numeric, even if they are +#' factors/characters? Default is `TRUE`. Ignored if `obj` contains SHAP interactions. #' @returns A named vector of decreasing interaction strengths. #' @export #' @seealso [sv_dependence()] diff --git a/man/potential_interactions.Rd b/man/potential_interactions.Rd index 7c938a0..e60db3b 100644 --- a/man/potential_interactions.Rd +++ b/man/potential_interactions.Rd @@ -4,12 +4,19 @@ \alias{potential_interactions} \title{Interaction Strength} \usage{ -potential_interactions(obj, v) +potential_interactions(obj, v, n_bins = NULL, color_numeric = TRUE) } \arguments{ \item{obj}{An object of class "shapviz".} \item{v}{Variable name.} + +\item{n_bins}{Into how many quantile bins should a numeric \code{v} be binned? +The default \code{NULL} equals the smaller of \eqn{n/20} and \eqn{\sqrt n} (rounded up), +where \eqn{n} is the sample size. Ignored if \code{obj} contains SHAP interactions.} + +\item{color_numeric}{Should color features be converted to numeric, even if they are +factors/characters? Default is \code{TRUE}. Ignored if \code{obj} contains SHAP interactions.} } \value{ A named vector of decreasing interaction strengths. @@ -18,17 +25,15 @@ A named vector of decreasing interaction strengths. Returns vector of interaction strengths between variable \code{v} and all other variables. } \details{ -If SHAP interaction values are available, interaction strength -between feature \code{v} and another feature \verb{v'} is measured by twice their -mean absolute SHAP interaction values. Otherwise, we use as heuristic the -squared correlation between feature values of \verb{v'} and -SHAP values of \code{v}, averaged over (binned) values of \code{v}. -A numeric \code{v} with more than \code{n_bins} unique values is binned into quantile bins. -Currently \code{n_bins} equals the smaller of \eqn{n/20} and \eqn{\sqrt n}, where \eqn{n} -is the sample size. -The average squared correlation is weighted by the number of non-missing feature -values in the bin. Note that non-numeric color features are turned to numeric -by calling \code{\link[=data.matrix]{data.matrix()}}, which does not necessarily make sense. +If SHAP interaction values are available, the interaction strength +between feature \code{v} and another feature \code{z} is measured by twice their +mean absolute SHAP interaction values. + +Otherwise, we use as heuristic the adjusted R-squared between the SHAP values of \code{v} +and the feature values of \code{z} , averaged over (binned) values of \code{v}. +The average adjusted R-squared is weighted by the number of non-missing values of \code{z} +in the bin. If \code{color_numeric = TRUE}, a non-numeric \code{z} is turned into numeric, +which does not necessarily make sense (but works in almost all cases). } \seealso{ \code{\link[=sv_dependence]{sv_dependence()}} diff --git a/tests/testthat/test-potential_interactions.R b/tests/testthat/test-potential_interactions.R index 27b6d4d..f6b8a9c 100644 --- a/tests/testthat/test-potential_interactions.R +++ b/tests/testthat/test-potential_interactions.R @@ -61,9 +61,19 @@ test_that("r2_adj() can fail with NA", { expect_equal(r2_adj(0, df = data.frame(x = c(1, 1), y = 1:2)), c(NA_real_, NA_real_)) }) +test_that("r2_adj() gives quite similar results to r_sq()", { + # This was the original approach to calculate R-squared NON-adjusted + r_sq <- function(s, x) { + suppressWarnings(stats::cor(s, data.matrix(x), use = "p")^2) + } + p1 <- r2_adj(iris$Sepal.Length, iris[2:4]) + names(p1) <- colnames(iris[2:4]) + p2 <- r_sq(iris$Sepal.Length, iris[2:4]) + + expect_equal(p1, 1 - (1 - drop(p2)) * 149 / 148) +}) + + -# r_sq <- function(s, x) { -# suppressWarnings(stats::cor(s, data.matrix(x), use = "p")^2) -# } From f121275d0cb9c89e1b19232e761331c34efa0485 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Sat, 30 Dec 2023 20:59:55 +0100 Subject: [PATCH 4/7] Implement scaling logic and add arguments to sv_dependence() --- R/sv_dependence.R | 167 ++++++++++++------- man/potential_interactions.Rd | 38 +++-- man/sv_dependence.Rd | 10 ++ tests/testthat/test-potential_interactions.R | 71 ++++---- 4 files changed, 180 insertions(+), 106 deletions(-) diff --git a/R/sv_dependence.R b/R/sv_dependence.R index ac52bc6..4d7e1cc 100644 --- a/R/sv_dependence.R +++ b/R/sv_dependence.R @@ -31,6 +31,9 @@ #' Requires SHAP interaction values. If `color_var = NULL` (or it is equal to `v`), #' the pure main effect of `v` is visualized. Otherwise, twice the SHAP interaction #' values between `v` and the `color_var` are plotted. +#' @param ih_nbins,ih_color_num,ih_scale Interaction heuristic (ih) parameters used to +#' select the color variable, see [potential_interactions()]. +#' Only used if `color_var = "auto"` and if there are no SHAP interaction values. #' @param ... Arguments passed to [ggplot2::geom_jitter()]. #' @returns An object of class "ggplot" (or "patchwork") representing a dependence plot. #' @examples @@ -71,7 +74,9 @@ sv_dependence.default <- function(object, ...) { #' @export sv_dependence.shapviz <- function(object, v, color_var = "auto", color = "#3b528b", viridis_args = getOption("shapviz.viridis_args"), - jitter_width = NULL, interactions = FALSE, ...) { + jitter_width = NULL, interactions = FALSE, + ih_nbins = NULL, ih_color_num = TRUE, + ih_scale = FALSE, ...) { p <- length(v) if (p > 1L || length(color_var) > 1L) { if (is.null(color_var)) { @@ -90,6 +95,9 @@ sv_dependence.shapviz <- function(object, v, color_var = "auto", color = "#3b528 object = object, viridis_args = viridis_args, interactions = interactions, + ih_nbins = ih_nbins, + ih_color_num = ih_color_num, + ih_scale = ih_scale, ... ), SIMPLIFY = FALSE @@ -118,7 +126,9 @@ sv_dependence.shapviz <- function(object, v, color_var = "auto", color = "#3b528 # Set color value if (!is.null(color_var) && color_var == "auto" && !("auto" %in% nms)) { - scores <- potential_interactions(object, v) + scores <- potential_interactions( + object, v, nbins = ih_nbins, color_num = ih_color_num, scale = ih_scale + ) color_var <- names(scores)[1L] # NULL if p = 1L } if (isTRUE(interactions)) { @@ -169,7 +179,9 @@ sv_dependence.shapviz <- function(object, v, color_var = "auto", color = "#3b528 #' @export sv_dependence.mshapviz <- function(object, v, color_var = "auto", color = "#3b528b", viridis_args = getOption("shapviz.viridis_args"), - jitter_width = NULL, interactions = FALSE, ...) { + jitter_width = NULL, interactions = FALSE, + ih_nbins = NULL, ih_color_num = TRUE, + ih_scale = FALSE, ...) { stopifnot( length(v) == 1L, length(color_var) <= 1L @@ -184,6 +196,9 @@ sv_dependence.mshapviz <- function(object, v, color_var = "auto", color = "#3b52 viridis_args = viridis_args, jitter_width = jitter_width, interactions = interactions, + ih_nbins = ih_nbins, + ih_color_num = ih_color_num, + ih_scale = ih_scale, ... ) plot_list <- add_titles(plot_list, nms = names(object)) # see sv_waterfall() @@ -192,40 +207,48 @@ sv_dependence.mshapviz <- function(object, v, color_var = "auto", color = "#3b52 #' Interaction Strength #' -#' Returns vector of interaction strengths between variable `v` and all other variables. +#' Returns vector of interaction strengths between variable `v` and all other variables, +#' see Details. #' #' If SHAP interaction values are available, the interaction strength -#' between feature `v` and another feature `z` is measured by twice their +#' between feature `v` and another feature `v'` is measured by twice their #' mean absolute SHAP interaction values. #' -#' Otherwise, we use as heuristic the adjusted R-squared between the SHAP values of `v` -#' and the feature values of `z` , averaged over (binned) values of `v`. -#' The average adjusted R-squared is weighted by the number of non-missing values of `z` -#' in the bin. If `color_numeric = TRUE`, a non-numeric `z` is turned into numeric, -#' which does not necessarily make sense (but works in almost all cases). +#' Otherwise, we use a heuristic calculated as follows to calculate interaction strength +#' between `v` and each other "color" feature `v': +#' 1. If `v` is numeric, it is binned into `nbins` bins. +#' 2. Per bin, the SHAP values of `v` are regressed onto `v`. +#' 3. Per bin, the adjusted R-squared is calculated. +#' 4. The adjusted R-squared are averaged over bins, weighted by the bin size. +#' +#' Set `scale = TRUE` to multiply the adjusted R-squared by the within-bin variance +#' of the SHAP values. This will put higher weight to bins with larger scatter. +#' +#' Set `color_num = FALSE` to *not* turn the values of `v` into numerics. #' #' @param obj An object of class "shapviz". -#' @param v Variable name. -#' @param n_bins Into how many quantile bins should a numeric `v` be binned? +#' @param v Variable name for which potential SHAP interactions are to be computed. +#' @param nbins Into how many quantile bins should a numeric `v` be binned? #' The default `NULL` equals the smaller of \eqn{n/20} and \eqn{\sqrt n} (rounded up), #' where \eqn{n} is the sample size. Ignored if `obj` contains SHAP interactions. -#' @param color_numeric Should color features be converted to numeric, even if they are -#' factors/characters? Default is `TRUE`. Ignored if `obj` contains SHAP interactions. +#' @param color_num Should other ("color") features `v'` be converted to numeric, +#' even if they are factors/characters? Default is `TRUE`. +#' Ignored if `obj` contains SHAP interactions. +#' @param scale Should adjusted R-squared be multiplied with the sample variance of +#' within-bin SHAP values? The default is `FALSE`. +#' Ignored if `obj` contains SHAP interactions. #' @returns A named vector of decreasing interaction strengths. #' @export #' @seealso [sv_dependence()] -potential_interactions <- function(obj, v, n_bins = NULL, color_numeric = TRUE) { - stopifnot( - is.shapviz(obj), - is.null(n_bins) || (length(n_bins) == 1L && n_bins >= 1L) - ) +potential_interactions <- function(obj, v, nbins = NULL, + color_num = TRUE, scale = FALSE) { + stopifnot(is.shapviz(obj)) S <- get_shap_values(obj) S_inter <- get_shap_interactions(obj) X <- get_feature_values(obj) nms <- colnames(obj) v_other <- setdiff(nms, v) stopifnot(v %in% nms) - if (ncol(obj) <= 1L) { return(NULL) } @@ -235,70 +258,98 @@ potential_interactions <- function(obj, v, n_bins = NULL, color_numeric = TRUE) return(sort(2 * colMeans(abs(S_inter[, v, ]))[v_other], decreasing = TRUE)) } - # Complicated case: we need to rely on R-squared adjusted based heuristic - COL <- X[v_other] - if (isTRUE(color_numeric)) { - COL <- as.data.frame(data.matrix(COL)) + # Complicated case: calculate heuristic per color variable + if (is.null(nbins)) { + nbins <- ceiling(min(sqrt(nrow(X)), nrow(X) / 20)) } - if (is.null(n_bins)) { - n_bins <- ceiling(min(sqrt(nrow(X)), nrow(X) / 20)) - } - v_bin <- .fast_bin(X[[v]], n_bins = n_bins) - s_bin <- split(S[, v], v_bin) - COL_bin <- split(COL, v_bin) - w <- do.call(rbind, lapply(COL_bin, function(z) colSums(!is.na(z)))) - r2 <- do.call(rbind, mapply(r2_adj, s_bin, COL_bin, SIMPLIFY = FALSE)) - sort(colSums(w * r2, na.rm = TRUE) / colSums(w), decreasing = TRUE) + out <- vapply( + X[v_other], # data.frame is a list + FUN = heuristic, + FUN.VALUE = 1.0, + s = S[, v], + bins = .fast_bin(X[[v]], nbins = nbins), + color_num = color_num, + scale = scale + ) + sort(out, decreasing = TRUE, na.last = TRUE) } # Helper functions +# Checks if z is discrete .is_discrete <- function(z, n_unique) { is.factor(z) || is.character(z) || is.logical(z) || (length(unique(z)) <= n_unique) } -# Bins z into integer valued bins, but only if discrete -.fast_bin <- function(z, n_bins) { - if (.is_discrete(z, n_unique = n_bins)) { +# Like as.numeric(), but can deal with factor variables +.as_numeric <- function(z) { + if (is.numeric(z)) { return(z) } - q <- stats::quantile(z, seq(0, 1, length.out = n_bins + 1L), na.rm = TRUE) + if (is.character(z)) { + z <- factor(z) + } + as.numeric(z) +} + +# Bins discrete z into integer valued bins +.fast_bin <- function(z, nbins) { + if (.is_discrete(z, n_unique = nbins)) { + return(z) + } + q <- stats::quantile(z, seq(0, 1, length.out = nbins + 1L), na.rm = TRUE) findInterval(z, unique(q), rightmost.closed = TRUE) } -#' R-squared adjusted +#' Interaction Heuristic #' -#' Internal function used to calculate the R-squared adjusted for a simple linear -#' regression with the SHAP values of the feature on the x-axis as response and -#' the (potential) color feature as single feature. +#' Internal function used to calculate the interaction heuristics described in +#' [potential_interactions()]. #' #' @noRd #' @keywords internal #' -#' @param s Vector of within-bin SHAP values of the feature on the x-axis. -#' @param color Feature values of the color feature. -#' @returns The R-squared adjusted from regressing `s` onto `color`. If the calculation -#' fails (e.g., too many factor levels in `color`), the function returns `NA`. -r2_adj_uni <- function(s, color) { - tryCatch( - summary(stats::lm(s ~ color))[["adj.r.squared"]], - error = function(e) return(NA) - ) +#' @inheritParams potential_interactions +#' @param color Feature values of the "color" feature. +#' @param s SHAP values of `v`. +#' @returns A single number. +heuristic <- function(color, s, bins, color_num, scale) { + if (isTRUE(color_num)) { + color <- .as_numeric(color) + } + color <- split(color, bins) + s <- split(s, bins) + M <- mapply(heuristic_in_bin, color = color, s = s, MoreArgs = list(scale = scale)) + stats::weighted.mean(M[1L, ], M[2L, ], na.rm = TRUE) } -#' R-squared adjusted for multiple v +#' Interaction Heuristic in Bin #' -#' Internal function that calls `r2_adj_uni()` for multiple color features. +#' Internal function used to calculate the within-bin heuristic used in `heuristic()`. +#' See `heuristic()` for details. #' #' @noRd #' @keywords internal #' -#' @param s Vector of SHAP values of the feature on the x-axis. -#' @param df Data frame of (multiple) color features. -#' @returns Vector of R-squared adjusted (one per column in `df`). For columns in `df` -#' where the calculations fail, the value is `NA`. -r2_adj <- function(s, df) { +#' @inheritParams heuristic +#' @returns +#' A (1x2) matrix. The first value equals the (possibly) scaled adjusted R-squared, +#' the second the number of used observations. +heuristic_in_bin <- function(color, s, scale = FALSE) { suppressWarnings( - vapply(df, FUN = r2_adj_uni, FUN.VALUE = 1.0, s = s, USE.NAMES = FALSE) + tryCatch( + { + z <- stats::lm(s ~ color) + r <- z$residuals + var_y <- stats::var(z$fitted.values + r) + var_r <- sum(r^2) / z$df.residual # var(r) would give non-adjusted R2 + stat <- 1 - var_r / var_y + if (scale) { + stat <- stat * var_y # = var_y - var_r + } + cbind(stat = stat, n = length(r)) + }, + error = function(e) return(cbind(stat = NA, n = 0)) + ) ) } diff --git a/man/potential_interactions.Rd b/man/potential_interactions.Rd index e60db3b..6dc8b9c 100644 --- a/man/potential_interactions.Rd +++ b/man/potential_interactions.Rd @@ -4,36 +4,50 @@ \alias{potential_interactions} \title{Interaction Strength} \usage{ -potential_interactions(obj, v, n_bins = NULL, color_numeric = TRUE) +potential_interactions(obj, v, nbins = NULL, color_num = TRUE, scale = FALSE) } \arguments{ \item{obj}{An object of class "shapviz".} -\item{v}{Variable name.} +\item{v}{Variable name for which potential SHAP interactions are to be computed.} -\item{n_bins}{Into how many quantile bins should a numeric \code{v} be binned? +\item{nbins}{Into how many quantile bins should a numeric \code{v} be binned? The default \code{NULL} equals the smaller of \eqn{n/20} and \eqn{\sqrt n} (rounded up), where \eqn{n} is the sample size. Ignored if \code{obj} contains SHAP interactions.} -\item{color_numeric}{Should color features be converted to numeric, even if they are -factors/characters? Default is \code{TRUE}. Ignored if \code{obj} contains SHAP interactions.} +\item{color_num}{Should other ("color") features \verb{v'} be converted to numeric, +even if they are factors/characters? Default is \code{TRUE}. +Ignored if \code{obj} contains SHAP interactions.} + +\item{scale}{Should adjusted R-squared be multiplied with the sample variance of +within-bin SHAP values? The default is \code{FALSE}. +Ignored if \code{obj} contains SHAP interactions.} } \value{ A named vector of decreasing interaction strengths. } \description{ -Returns vector of interaction strengths between variable \code{v} and all other variables. +Returns vector of interaction strengths between variable \code{v} and all other variables, +see Details. } \details{ If SHAP interaction values are available, the interaction strength -between feature \code{v} and another feature \code{z} is measured by twice their +between feature \code{v} and another feature \verb{v'} is measured by twice their mean absolute SHAP interaction values. -Otherwise, we use as heuristic the adjusted R-squared between the SHAP values of \code{v} -and the feature values of \code{z} , averaged over (binned) values of \code{v}. -The average adjusted R-squared is weighted by the number of non-missing values of \code{z} -in the bin. If \code{color_numeric = TRUE}, a non-numeric \code{z} is turned into numeric, -which does not necessarily make sense (but works in almost all cases). +Otherwise, we use a heuristic calculated as follows to calculate interaction strength +between \code{v} and each other "color" feature `v': +\enumerate{ +\item If \code{v} is numeric, it is binned into \code{nbins} bins. +\item Per bin, the SHAP values of \code{v} are regressed onto \code{v}. +\item Per bin, the adjusted R-squared is calculated. +\item The adjusted R-squared are averaged over bins, weighted by the bin size. +} + +Set \code{scale = TRUE} to multiply the adjusted R-squared by the within-bin variance +of the SHAP values. This will put higher weight to bins with larger scatter. + +Set \code{color_num = FALSE} to \emph{not} turn the values of \code{v} into numerics. } \seealso{ \code{\link[=sv_dependence]{sv_dependence()}} diff --git a/man/sv_dependence.Rd b/man/sv_dependence.Rd index ec08909..ff3b546 100644 --- a/man/sv_dependence.Rd +++ b/man/sv_dependence.Rd @@ -19,6 +19,9 @@ sv_dependence(object, ...) viridis_args = getOption("shapviz.viridis_args"), jitter_width = NULL, interactions = FALSE, + ih_nbins = NULL, + ih_color_num = TRUE, + ih_scale = FALSE, ... ) @@ -30,6 +33,9 @@ sv_dependence(object, ...) viridis_args = getOption("shapviz.viridis_args"), jitter_width = NULL, interactions = FALSE, + ih_nbins = NULL, + ih_color_num = TRUE, + ih_scale = FALSE, ... ) } @@ -67,6 +73,10 @@ Can be a vector/list if \code{v} is a vector.} Requires SHAP interaction values. If \code{color_var = NULL} (or it is equal to \code{v}), the pure main effect of \code{v} is visualized. Otherwise, twice the SHAP interaction values between \code{v} and the \code{color_var} are plotted.} + +\item{ih_nbins, ih_color_num, ih_scale}{Interaction heuristic (ih) parameters used to +select the color variable, see \code{\link[=potential_interactions]{potential_interactions()}}. +Only used if \code{color_var = "auto"} and if there are no SHAP interaction values.} } \value{ An object of class "ggplot" (or "patchwork") representing a dependence plot. diff --git a/tests/testthat/test-potential_interactions.R b/tests/testthat/test-potential_interactions.R index f6b8a9c..a7e6535 100644 --- a/tests/testthat/test-potential_interactions.R +++ b/tests/testthat/test-potential_interactions.R @@ -4,25 +4,25 @@ dtrain <- xgboost::xgb.DMatrix( fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 10L) x <- shapviz(fit, X_pred = dtrain, X = iris[, -1L]) -test_that("n_bins has no effect for factor v", { +test_that("nbins has no effect for factor v", { expect_equal( - potential_interactions(x, "Species", n_bins = NULL), - potential_interactions(x, "Species", n_bins = 2) + potential_interactions(x, "Species", nbins = NULL), + potential_interactions(x, "Species", nbins = 2) ) }) -test_that("n_bins has an effect for numeric v", { +test_that("nbins has an effect for numeric v", { expect_false( identical( - potential_interactions(x, "Sepal.Width", n_bins = 2), - potential_interactions(x, "Sepal.Width", n_bins = 3) + potential_interactions(x, "Sepal.Width", nbins = 2), + potential_interactions(x, "Sepal.Width", nbins = 3) ) ) }) -test_that("color_numeric has an effect", { - p1 <- potential_interactions(x, "Sepal.Width", color_numeric = TRUE) - p2 <- potential_interactions(x, "Sepal.Width", color_numeric = FALSE) +test_that("color_num has an effect only for non-numeric features", { + p1 <- potential_interactions(x, "Sepal.Width", color_num = TRUE) + p2 <- potential_interactions(x, "Sepal.Width", color_num = FALSE) num <- c("Petal.Width", "Petal.Length") expect_equal(p1[num], p2[num]) expect_false(p1["Species"] == p2["Species"]) @@ -35,45 +35,44 @@ test_that("potential_interactions respects true SHAP interactions", { expect_equal(i1, i2, tolerance = 1e-5) }) -test_that("r2_adj_uni() returns R-squared adjusted", { +test_that("heuristic_in_bin() returns R-squared adjusted", { fit_lm <- lm(Sepal.Length ~ Species, data = iris) expect_equal( - r2_adj_uni(iris$Sepal.Length, iris$Species), + unname(heuristic_in_bin(iris$Species, iris$Sepal.Length)[1, 1]), summary(fit_lm)[["adj.r.squared"]] ) -}) - -test_that("r2_adj_uni() fails with NA", { - expect_equal(r2_adj_uni(0, 1:2), NA) -}) - -test_that("r2_adj() returns R-squared adjusted per column in df", { - fit_lm1 <- lm(Sepal.Length ~ Species, data = iris) - fit_lm2 <- lm(Sepal.Length ~ Sepal.Width, data = iris) expect_equal( - r2_adj(iris$Sepal.Length, iris[c("Species", "Sepal.Width")]), - c(summary(fit_lm1)[["adj.r.squared"]], summary(fit_lm2)[["adj.r.squared"]]) + unname(heuristic_in_bin(iris$Species, iris$Sepal.Length, scale = TRUE)[1, 1]), + summary(fit_lm)[["adj.r.squared"]] * var(iris$Sepal.Length) ) }) -test_that("r2_adj() can fail with NA", { - expect_equal(r2_adj(0, df = data.frame(x = c(1, 1), y = 1:2)), c(NA_real_, NA_real_)) +test_that("Failing heuristic_in_bin() returns NA", { + expect_equal(heuristic_in_bin(0, 1:2), cbind(stat = NA, n = 0)) }) -test_that("r2_adj() gives quite similar results to r_sq()", { - # This was the original approach to calculate R-squared NON-adjusted - r_sq <- function(s, x) { - suppressWarnings(stats::cor(s, data.matrix(x), use = "p")^2) +test_that("heuristic() returns average R-squared adjusted", { + ix <- c(rep(1, 60), rep(2, 90)) + y <- split(iris$Sepal.Length, ix) + x1 <- split(iris$Sepal.Width, ix) + x2 <- split(iris$Species, ix) + f <- function(y, x) { + summary(lm(y ~ x))[["adj.r.squared"]] } - p1 <- r2_adj(iris$Sepal.Length, iris[2:4]) - names(p1) <- colnames(iris[2:4]) - p2 <- r_sq(iris$Sepal.Length, iris[2:4]) - - expect_equal(p1, 1 - (1 - drop(p2)) * 149 / 148) -}) - - + expect_equal( + heuristic( + iris$Sepal.Width, iris$Sepal.Length, bins = ix, color_num = TRUE, scale = FALSE + ), + weighted.mean(mapply(f, y, x1), c(60, 90)) + ) + expect_equal( + heuristic( + iris$Species, iris$Sepal.Length, bins = ix, color_num = FALSE, scale = FALSE + ), + weighted.mean(mapply(f, y, x2), c(60, 90)) + ) +}) From 915d074715ca3ec65cb55150bb3396e8700681b6 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Sun, 31 Dec 2023 10:15:49 +0100 Subject: [PATCH 5/7] Update NEWS --- NEWS.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 8996565..f1e37ab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,27 @@ # shapviz 0.9.3 -## User-visible changes +## Selection of color scale in `sv_dependence()` + +If no SHAP interaction values are available, by default, the color feature `v'` is selected by the heuristic `potential_interaction()`, which essentially works as follows: + +1. If the feature `v` (the on the x-axis) is numeric, it is binned into `nbins` bins. +2. Per bin, the SHAP values of `v` are regressed onto `v` and the R-squared is calculated. +3. The R-squared are averaged over bins, weighted by the bin size. + +In this release, we have replaced R-squared by *adjusted* R-squared. In rare cases, this may have an impact on the color feature selected by `sv_dependence()`. + +Furthermore, we have introduced three parameters to control the heuristic: + +- `nbin = NULL`: Into how many quantile bins should a numeric `v` be binned? The default `NULL` equals the smaller of $n/20$ and $\sqrt n$ (rounded up), where $n$ is the sample size. +- `color_num` Should color features be converted to numeric, even if they are factors/characters? Default is `TRUE`. +- `scale = FALSE`: Should R-squared be multiplied with the sample variance of +within-bin SHAP values? If `TRUE`, bins with stronger vertical scatter will get higher weight. The default is `FALSE`. + +If SHAP interaction values are available, these parameters have no effect. In `sv_dependence()` they are called `ih_nbin`, `ih_color_num`, and `ih_scale`. + +This implementation partly implements the ideas in [#119](https://github.com/ModelOriented/shapviz/issues/119) of Roel Verbelen, thanks! + +## Other user-visible changes - `mshapviz()` objects can now be rowbinded via `rbind()` or `+`. Implemented by [@jmaspons](https://github.com/jmaspons) in [#110](https://github.com/ModelOriented/shapviz/pull/110). - `mshapviz()` is more strict when combining multiple "shapviz" objects. These now need to have identical column names, see [#114](https://github.com/ModelOriented/shapviz/pull/114). From b2accbdf6931516751fd7f617853f3ff4273b844 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Sun, 31 Dec 2023 10:16:47 +0100 Subject: [PATCH 6/7] Move functions related to potential_interactions() into separate script --- R/potential_interactions.R | 142 +++++++++++++++++++++++++++++++++ R/sv_dependence.R | 144 +--------------------------------- man/potential_interactions.Rd | 15 ++-- man/sv_dependence.Rd | 2 + 4 files changed, 154 insertions(+), 149 deletions(-) create mode 100644 R/potential_interactions.R diff --git a/R/potential_interactions.R b/R/potential_interactions.R new file mode 100644 index 0000000..9e8c558 --- /dev/null +++ b/R/potential_interactions.R @@ -0,0 +1,142 @@ +#' Interaction Strength +#' +#' Returns vector of interaction strengths between variable `v` and all other variables, +#' see Details. +#' +#' If SHAP interaction values are available, the interaction strength +#' between feature `v` and another feature `v'` is measured by twice their +#' mean absolute SHAP interaction values. +#' +#' Otherwise, we use a heuristic calculated as follows to calculate interaction strength +#' between `v` and each other "color" feature `v': +#' 1. If `v` is numeric, it is binned into `nbins` bins. +#' 2. Per bin, the SHAP values of `v` are regressed onto `v` and the adjusted R-squared +#' is calculated. +#' 3. The adjusted R-squared are averaged over bins, weighted by the bin size. +#' +#' Set `scale = TRUE` to multiply the adjusted R-squared by the within-bin variance +#' of the SHAP values. This will put higher weight to bins with larger scatter. +#' +#' Set `color_num = FALSE` to *not* turn the values of the "color" feature `v'` +#' to numeric. +#' +#' @param obj An object of class "shapviz". +#' @param v Variable name to calculate potential SHAP interactions for. +#' @param nbins Into how many quantile bins should a numeric `v` be binned? +#' The default `NULL` equals the smaller of \eqn{n/20} and \eqn{\sqrt n} (rounded up), +#' where \eqn{n} is the sample size. Ignored if `obj` contains SHAP interactions. +#' @param color_num Should other ("color") features `v'` be converted to numeric, +#' even if they are factors/characters? Default is `TRUE`. +#' Ignored if `obj` contains SHAP interactions. +#' @param scale Should adjusted R-squared be multiplied with the sample variance of +#' within-bin SHAP values? If `TRUE`, bins with stronger vertical scatter will get +#' higher weight. The default is `FALSE`. Ignored if `obj` contains SHAP interactions. +#' @returns A named vector of decreasing interaction strengths. +#' @export +#' @seealso [sv_dependence()] +potential_interactions <- function(obj, v, nbins = NULL, + color_num = TRUE, scale = FALSE) { + stopifnot(is.shapviz(obj)) + S <- get_shap_values(obj) + S_inter <- get_shap_interactions(obj) + X <- get_feature_values(obj) + nms <- colnames(obj) + v_other <- setdiff(nms, v) + stopifnot(v %in% nms) + if (ncol(obj) <= 1L) { + return(NULL) + } + + # Simple case: we have SHAP interaction values + if (!is.null(S_inter)) { + return(sort(2 * colMeans(abs(S_inter[, v, ]))[v_other], decreasing = TRUE)) + } + + # Complicated case: calculate heuristic per color variable + if (is.null(nbins)) { + nbins <- ceiling(min(sqrt(nrow(X)), nrow(X) / 20)) + } + out <- vapply( + X[v_other], # data.frame is a list + FUN = heuristic, + FUN.VALUE = 1.0, + s = S[, v], + bins = .fast_bin(X[[v]], nbins = nbins), + color_num = color_num, + scale = scale + ) + sort(out, decreasing = TRUE, na.last = TRUE) +} + +#' Interaction Heuristic +#' +#' Internal function used to calculate the interaction heuristics described in +#' [potential_interactions()]. +#' +#' @noRd +#' @keywords internal +#' +#' @inheritParams potential_interactions +#' @param color Feature values of the "color" feature. +#' @param s SHAP values of `v`. +#' @returns A single number. +heuristic <- function(color, s, bins, color_num, scale) { + if (isTRUE(color_num)) { + color <- .as_numeric(color) + } + color <- split(color, bins) + s <- split(s, bins) + M <- mapply(heuristic_in_bin, color = color, s = s, MoreArgs = list(scale = scale)) + stats::weighted.mean(M[1L, ], M[2L, ], na.rm = TRUE) +} + +#' Interaction Heuristic in Bin +#' +#' Internal function used to calculate the within-bin heuristic used in `heuristic()`. +#' See `heuristic()` for details. +#' +#' @noRd +#' @keywords internal +#' +#' @inheritParams heuristic +#' @returns +#' A (1x2) matrix. The first value equals the (possibly) scaled adjusted R-squared, +#' the second the number of used observations. +heuristic_in_bin <- function(color, s, scale = FALSE) { + suppressWarnings( + tryCatch( + { + z <- stats::lm(s ~ color) + r <- z$residuals + var_y <- stats::var(z$fitted.values + r) + var_r <- sum(r^2) / z$df.residual # var(r) would give non-adjusted R2 + stat <- 1 - var_r / var_y + if (scale) { + stat <- stat * var_y # = var_y - var_r + } + cbind(stat = stat, n = length(r)) + }, + error = function(e) return(cbind(stat = NA, n = 0)) + ) + ) +} + +# Like as.numeric(), but can deal with factor variables +.as_numeric <- function(z) { + if (is.numeric(z)) { + return(z) + } + if (is.character(z)) { + z <- factor(z) + } + as.numeric(z) +} + +# Bins discrete z into integer valued bins +.fast_bin <- function(z, nbins) { + if (.is_discrete(z, n_unique = nbins)) { + return(z) + } + q <- stats::quantile(z, seq(0, 1, length.out = nbins + 1L), na.rm = TRUE) + findInterval(z, unique(q), rightmost.closed = TRUE) +} diff --git a/R/sv_dependence.R b/R/sv_dependence.R index 4d7e1cc..ebc9924 100644 --- a/R/sv_dependence.R +++ b/R/sv_dependence.R @@ -3,6 +3,8 @@ #' Scatterplot of the SHAP values of a feature against its feature values. #' If SHAP interaction values are available, setting `interactions = TRUE` allows #' to focus on pure interaction effects (multiplied by two) or on pure main effects. +#' By default, the feature on the color scale is selected via SHAP interactions +#' (if available) or an interaction heuristic, see [potential_interactions()]. #' #' @importFrom rlang .data #' @@ -205,151 +207,9 @@ sv_dependence.mshapviz <- function(object, v, color_var = "auto", color = "#3b52 patchwork::wrap_plots(plot_list) } -#' Interaction Strength -#' -#' Returns vector of interaction strengths between variable `v` and all other variables, -#' see Details. -#' -#' If SHAP interaction values are available, the interaction strength -#' between feature `v` and another feature `v'` is measured by twice their -#' mean absolute SHAP interaction values. -#' -#' Otherwise, we use a heuristic calculated as follows to calculate interaction strength -#' between `v` and each other "color" feature `v': -#' 1. If `v` is numeric, it is binned into `nbins` bins. -#' 2. Per bin, the SHAP values of `v` are regressed onto `v`. -#' 3. Per bin, the adjusted R-squared is calculated. -#' 4. The adjusted R-squared are averaged over bins, weighted by the bin size. -#' -#' Set `scale = TRUE` to multiply the adjusted R-squared by the within-bin variance -#' of the SHAP values. This will put higher weight to bins with larger scatter. -#' -#' Set `color_num = FALSE` to *not* turn the values of `v` into numerics. -#' -#' @param obj An object of class "shapviz". -#' @param v Variable name for which potential SHAP interactions are to be computed. -#' @param nbins Into how many quantile bins should a numeric `v` be binned? -#' The default `NULL` equals the smaller of \eqn{n/20} and \eqn{\sqrt n} (rounded up), -#' where \eqn{n} is the sample size. Ignored if `obj` contains SHAP interactions. -#' @param color_num Should other ("color") features `v'` be converted to numeric, -#' even if they are factors/characters? Default is `TRUE`. -#' Ignored if `obj` contains SHAP interactions. -#' @param scale Should adjusted R-squared be multiplied with the sample variance of -#' within-bin SHAP values? The default is `FALSE`. -#' Ignored if `obj` contains SHAP interactions. -#' @returns A named vector of decreasing interaction strengths. -#' @export -#' @seealso [sv_dependence()] -potential_interactions <- function(obj, v, nbins = NULL, - color_num = TRUE, scale = FALSE) { - stopifnot(is.shapviz(obj)) - S <- get_shap_values(obj) - S_inter <- get_shap_interactions(obj) - X <- get_feature_values(obj) - nms <- colnames(obj) - v_other <- setdiff(nms, v) - stopifnot(v %in% nms) - if (ncol(obj) <= 1L) { - return(NULL) - } - - # Simple case: we have SHAP interaction values - if (!is.null(S_inter)) { - return(sort(2 * colMeans(abs(S_inter[, v, ]))[v_other], decreasing = TRUE)) - } - - # Complicated case: calculate heuristic per color variable - if (is.null(nbins)) { - nbins <- ceiling(min(sqrt(nrow(X)), nrow(X) / 20)) - } - out <- vapply( - X[v_other], # data.frame is a list - FUN = heuristic, - FUN.VALUE = 1.0, - s = S[, v], - bins = .fast_bin(X[[v]], nbins = nbins), - color_num = color_num, - scale = scale - ) - sort(out, decreasing = TRUE, na.last = TRUE) -} - # Helper functions # Checks if z is discrete .is_discrete <- function(z, n_unique) { is.factor(z) || is.character(z) || is.logical(z) || (length(unique(z)) <= n_unique) } - -# Like as.numeric(), but can deal with factor variables -.as_numeric <- function(z) { - if (is.numeric(z)) { - return(z) - } - if (is.character(z)) { - z <- factor(z) - } - as.numeric(z) -} - -# Bins discrete z into integer valued bins -.fast_bin <- function(z, nbins) { - if (.is_discrete(z, n_unique = nbins)) { - return(z) - } - q <- stats::quantile(z, seq(0, 1, length.out = nbins + 1L), na.rm = TRUE) - findInterval(z, unique(q), rightmost.closed = TRUE) -} - -#' Interaction Heuristic -#' -#' Internal function used to calculate the interaction heuristics described in -#' [potential_interactions()]. -#' -#' @noRd -#' @keywords internal -#' -#' @inheritParams potential_interactions -#' @param color Feature values of the "color" feature. -#' @param s SHAP values of `v`. -#' @returns A single number. -heuristic <- function(color, s, bins, color_num, scale) { - if (isTRUE(color_num)) { - color <- .as_numeric(color) - } - color <- split(color, bins) - s <- split(s, bins) - M <- mapply(heuristic_in_bin, color = color, s = s, MoreArgs = list(scale = scale)) - stats::weighted.mean(M[1L, ], M[2L, ], na.rm = TRUE) -} - -#' Interaction Heuristic in Bin -#' -#' Internal function used to calculate the within-bin heuristic used in `heuristic()`. -#' See `heuristic()` for details. -#' -#' @noRd -#' @keywords internal -#' -#' @inheritParams heuristic -#' @returns -#' A (1x2) matrix. The first value equals the (possibly) scaled adjusted R-squared, -#' the second the number of used observations. -heuristic_in_bin <- function(color, s, scale = FALSE) { - suppressWarnings( - tryCatch( - { - z <- stats::lm(s ~ color) - r <- z$residuals - var_y <- stats::var(z$fitted.values + r) - var_r <- sum(r^2) / z$df.residual # var(r) would give non-adjusted R2 - stat <- 1 - var_r / var_y - if (scale) { - stat <- stat * var_y # = var_y - var_r - } - cbind(stat = stat, n = length(r)) - }, - error = function(e) return(cbind(stat = NA, n = 0)) - ) - ) -} diff --git a/man/potential_interactions.Rd b/man/potential_interactions.Rd index 6dc8b9c..24edc51 100644 --- a/man/potential_interactions.Rd +++ b/man/potential_interactions.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sv_dependence.R +% Please edit documentation in R/potential_interactions.R \name{potential_interactions} \alias{potential_interactions} \title{Interaction Strength} @@ -9,7 +9,7 @@ potential_interactions(obj, v, nbins = NULL, color_num = TRUE, scale = FALSE) \arguments{ \item{obj}{An object of class "shapviz".} -\item{v}{Variable name for which potential SHAP interactions are to be computed.} +\item{v}{Variable name to calculate potential SHAP interactions for.} \item{nbins}{Into how many quantile bins should a numeric \code{v} be binned? The default \code{NULL} equals the smaller of \eqn{n/20} and \eqn{\sqrt n} (rounded up), @@ -20,8 +20,8 @@ even if they are factors/characters? Default is \code{TRUE}. Ignored if \code{obj} contains SHAP interactions.} \item{scale}{Should adjusted R-squared be multiplied with the sample variance of -within-bin SHAP values? The default is \code{FALSE}. -Ignored if \code{obj} contains SHAP interactions.} +within-bin SHAP values? If \code{TRUE}, bins with stronger vertical scatter will get +higher weight. The default is \code{FALSE}. Ignored if \code{obj} contains SHAP interactions.} } \value{ A named vector of decreasing interaction strengths. @@ -39,15 +39,16 @@ Otherwise, we use a heuristic calculated as follows to calculate interaction str between \code{v} and each other "color" feature `v': \enumerate{ \item If \code{v} is numeric, it is binned into \code{nbins} bins. -\item Per bin, the SHAP values of \code{v} are regressed onto \code{v}. -\item Per bin, the adjusted R-squared is calculated. +\item Per bin, the SHAP values of \code{v} are regressed onto \code{v} and the adjusted R-squared +is calculated. \item The adjusted R-squared are averaged over bins, weighted by the bin size. } Set \code{scale = TRUE} to multiply the adjusted R-squared by the within-bin variance of the SHAP values. This will put higher weight to bins with larger scatter. -Set \code{color_num = FALSE} to \emph{not} turn the values of \code{v} into numerics. +Set \code{color_num = FALSE} to \emph{not} turn the values of the "color" feature \verb{v'} +to numeric. } \seealso{ \code{\link[=sv_dependence]{sv_dependence()}} diff --git a/man/sv_dependence.Rd b/man/sv_dependence.Rd index ff3b546..ba7f9cb 100644 --- a/man/sv_dependence.Rd +++ b/man/sv_dependence.Rd @@ -85,6 +85,8 @@ An object of class "ggplot" (or "patchwork") representing a dependence plot. Scatterplot of the SHAP values of a feature against its feature values. If SHAP interaction values are available, setting \code{interactions = TRUE} allows to focus on pure interaction effects (multiplied by two) or on pure main effects. +By default, the feature on the color scale is selected via SHAP interactions +(if available) or an interaction heuristic, see \code{\link[=potential_interactions]{potential_interactions()}}. } \section{Methods (by class)}{ \itemize{ From 785d9ee1d83a5d56fb4f791074fa1e9f58e9679f Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Mon, 1 Jan 2024 11:32:20 +0100 Subject: [PATCH 7/7] Add "adjusted" parameter --- NEWS.md | 27 ++++++++---- R/potential_interactions.R | 41 ++++++++++++------- R/sv_dependence.R | 24 +++++++---- man/figures/VIGNETTE-dep-ranger.png | Bin 10865 -> 12190 bytes man/figures/VIGNETTE-dep.png | Bin 7571 -> 6720 bytes man/potential_interactions.Rd | 19 +++++++-- man/sv_dependence.Rd | 6 ++- tests/testthat/test-potential_interactions.R | 40 +++++++++++++++--- vignettes/basic_use.Rmd | 2 +- vignettes/multiple_output.Rmd | 2 +- 10 files changed, 118 insertions(+), 43 deletions(-) diff --git a/NEWS.md b/NEWS.md index f1e37ab..4c3eaea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,28 +1,41 @@ # shapviz 0.9.3 -## Selection of color scale in `sv_dependence()` +## `sv_dependence()`: Control over automatic color feature selection -If no SHAP interaction values are available, by default, the color feature `v'` is selected by the heuristic `potential_interaction()`, which essentially works as follows: +### How is the color feature selected anyway? + +If no SHAP interaction values are available, by default, the color feature `v'` is selected by the heuristic `potential_interaction()`, which works as follows: 1. If the feature `v` (the on the x-axis) is numeric, it is binned into `nbins` bins. 2. Per bin, the SHAP values of `v` are regressed onto `v` and the R-squared is calculated. 3. The R-squared are averaged over bins, weighted by the bin size. -In this release, we have replaced R-squared by *adjusted* R-squared. In rare cases, this may have an impact on the color feature selected by `sv_dependence()`. +This measures how much variability in the SHAP values is explained by `v'`, after accounting for `v`. -Furthermore, we have introduced three parameters to control the heuristic: +We have introduced four parameters to control the heuristic. Their defaults are in line with the old behaviour. - `nbin = NULL`: Into how many quantile bins should a numeric `v` be binned? The default `NULL` equals the smaller of $n/20$ and $\sqrt n$ (rounded up), where $n$ is the sample size. - `color_num` Should color features be converted to numeric, even if they are factors/characters? Default is `TRUE`. - `scale = FALSE`: Should R-squared be multiplied with the sample variance of within-bin SHAP values? If `TRUE`, bins with stronger vertical scatter will get higher weight. The default is `FALSE`. +- `adjusted = FALSE`: Should *adjusted* R-squared be calculated? + +If SHAP interaction values are available, these parameters have no effect. In `sv_dependence()` they are called `ih_nbin` etc. + +This partly implements the ideas in [#119](https://github.com/ModelOriented/shapviz/issues/119) of Roel Verbelen, thanks a lot for your patient explanations! + +### Further plans? -If SHAP interaction values are available, these parameters have no effect. In `sv_dependence()` they are called `ih_nbin`, `ih_color_num`, and `ih_scale`. +We will continue to experiment with the defaults, which might change in the future. A good alternative to the current (naive) defaults could be: -This implementation partly implements the ideas in [#119](https://github.com/ModelOriented/shapviz/issues/119) of Roel Verbelen, thanks! +- `nbins = 7`: Smaller than now to not overfit too strongly with factor/character color features. +- `color_num = FALSE`: To not naively integer encode factors/characters. +- `scale = TRUE`: To account for non-equal spread in bins. +- `adjusted = TRUE`: To not put too much weight on factors with many categories. ## Other user-visible changes +- `sv_dependence()`: If `color_var = "auto"` (default) and no color feature seems to be relevant (SHAP interaction is `NULL`, or heuristic returns no positive value), there won't be any color scale. - `mshapviz()` objects can now be rowbinded via `rbind()` or `+`. Implemented by [@jmaspons](https://github.com/jmaspons) in [#110](https://github.com/ModelOriented/shapviz/pull/110). - `mshapviz()` is more strict when combining multiple "shapviz" objects. These now need to have identical column names, see [#114](https://github.com/ModelOriented/shapviz/pull/114). @@ -30,7 +43,7 @@ This implementation partly implements the ideas in [#119](https://github.com/Mod - `print.shapviz()` now shows top two rows of SHAP matrix. - Re-activate all unit tests. -- Setting `nthread = 1` in all calls to `xgb.DMatrix()` as suggested by [@jmaspons](https://github.com/jmaspons) in [issue #109](https://github.com/ModelOriented/shapviz/issues/109). +- Setting `nthread = 1` in all calls to `xgb.DMatrix()` as suggested by [@jmaspons](https://github.com/jmaspons) in [#109](https://github.com/ModelOriented/shapviz/issues/109). - Added "How to contribute" to README. - `permshap()` connector is now part of {kerneshap} [#122](https://github.com/ModelOriented/shapviz/pull/122). diff --git a/R/potential_interactions.R b/R/potential_interactions.R index 9e8c558..fec72e7 100644 --- a/R/potential_interactions.R +++ b/R/potential_interactions.R @@ -10,16 +10,18 @@ #' Otherwise, we use a heuristic calculated as follows to calculate interaction strength #' between `v` and each other "color" feature `v': #' 1. If `v` is numeric, it is binned into `nbins` bins. -#' 2. Per bin, the SHAP values of `v` are regressed onto `v` and the adjusted R-squared +#' 2. Per bin, the SHAP values of `v` are regressed onto `v`, and the R-squared #' is calculated. -#' 3. The adjusted R-squared are averaged over bins, weighted by the bin size. +#' 3. The R-squared are averaged over bins, weighted by the bin size. #' -#' Set `scale = TRUE` to multiply the adjusted R-squared by the within-bin variance +#' Set `scale = TRUE` to multiply the R-squared by the within-bin variance #' of the SHAP values. This will put higher weight to bins with larger scatter. #' #' Set `color_num = FALSE` to *not* turn the values of the "color" feature `v'` #' to numeric. #' +#' Finally, set `adjusted = TRUE` to use *adjusted* R-squared. +#' #' @param obj An object of class "shapviz". #' @param v Variable name to calculate potential SHAP interactions for. #' @param nbins Into how many quantile bins should a numeric `v` be binned? @@ -31,11 +33,12 @@ #' @param scale Should adjusted R-squared be multiplied with the sample variance of #' within-bin SHAP values? If `TRUE`, bins with stronger vertical scatter will get #' higher weight. The default is `FALSE`. Ignored if `obj` contains SHAP interactions. +#' @param adjusted Should *adjusted* R-squared be used? Default is `FALSE`. #' @returns A named vector of decreasing interaction strengths. #' @export #' @seealso [sv_dependence()] -potential_interactions <- function(obj, v, nbins = NULL, - color_num = TRUE, scale = FALSE) { +potential_interactions <- function(obj, v, nbins = NULL, color_num = TRUE, + scale = FALSE, adjusted = FALSE) { stopifnot(is.shapviz(obj)) S <- get_shap_values(obj) S_inter <- get_shap_interactions(obj) @@ -63,7 +66,8 @@ potential_interactions <- function(obj, v, nbins = NULL, s = S[, v], bins = .fast_bin(X[[v]], nbins = nbins), color_num = color_num, - scale = scale + scale = scale, + adjusted = adjusted ) sort(out, decreasing = TRUE, na.last = TRUE) } @@ -71,7 +75,8 @@ potential_interactions <- function(obj, v, nbins = NULL, #' Interaction Heuristic #' #' Internal function used to calculate the interaction heuristics described in -#' [potential_interactions()]. +#' [potential_interactions()]. It calls `heuristic_in_bin()` per bin and aggregates +#' the result. #' #' @noRd #' @keywords internal @@ -80,13 +85,18 @@ potential_interactions <- function(obj, v, nbins = NULL, #' @param color Feature values of the "color" feature. #' @param s SHAP values of `v`. #' @returns A single number. -heuristic <- function(color, s, bins, color_num, scale) { +heuristic <- function(color, s, bins, color_num, scale, adjusted) { if (isTRUE(color_num)) { color <- .as_numeric(color) } color <- split(color, bins) s <- split(s, bins) - M <- mapply(heuristic_in_bin, color = color, s = s, MoreArgs = list(scale = scale)) + M <- mapply( + heuristic_in_bin, + color = color, + s = s, + MoreArgs = list(scale = scale, adjusted = adjusted) + ) stats::weighted.mean(M[1L, ], M[2L, ], na.rm = TRUE) } @@ -100,21 +110,22 @@ heuristic <- function(color, s, bins, color_num, scale) { #' #' @inheritParams heuristic #' @returns -#' A (1x2) matrix. The first value equals the (possibly) scaled adjusted R-squared, -#' the second the number of used observations. -heuristic_in_bin <- function(color, s, scale = FALSE) { +#' A (1x2) matrix with heuristic and number of observations. +heuristic_in_bin <- function(color, s, scale = FALSE, adjusted = FALSE) { suppressWarnings( tryCatch( { z <- stats::lm(s ~ color) r <- z$residuals + n <- length(r) var_y <- stats::var(z$fitted.values + r) - var_r <- sum(r^2) / z$df.residual # var(r) would give non-adjusted R2 + denom <- if (adjusted) z$df.residual else n - 1 + var_r <- sum(r^2) / denom stat <- 1 - var_r / var_y if (scale) { - stat <- stat * var_y # = var_y - var_r + stat <- stat * var_y } - cbind(stat = stat, n = length(r)) + cbind(stat = stat, n = n) }, error = function(e) return(cbind(stat = NA, n = 0)) ) diff --git a/R/sv_dependence.R b/R/sv_dependence.R index ebc9924..5b6f180 100644 --- a/R/sv_dependence.R +++ b/R/sv_dependence.R @@ -33,8 +33,8 @@ #' Requires SHAP interaction values. If `color_var = NULL` (or it is equal to `v`), #' the pure main effect of `v` is visualized. Otherwise, twice the SHAP interaction #' values between `v` and the `color_var` are plotted. -#' @param ih_nbins,ih_color_num,ih_scale Interaction heuristic (ih) parameters used to -#' select the color variable, see [potential_interactions()]. +#' @param ih_nbins,ih_color_num,ih_scale,ih_adjusted Interaction heuristic (ih) +#' parameters used to select the color variable, see [potential_interactions()]. #' Only used if `color_var = "auto"` and if there are no SHAP interaction values. #' @param ... Arguments passed to [ggplot2::geom_jitter()]. #' @returns An object of class "ggplot" (or "patchwork") representing a dependence plot. @@ -78,7 +78,7 @@ sv_dependence.shapviz <- function(object, v, color_var = "auto", color = "#3b528 viridis_args = getOption("shapviz.viridis_args"), jitter_width = NULL, interactions = FALSE, ih_nbins = NULL, ih_color_num = TRUE, - ih_scale = FALSE, ...) { + ih_scale = FALSE, ih_adjusted = FALSE, ...) { p <- length(v) if (p > 1L || length(color_var) > 1L) { if (is.null(color_var)) { @@ -100,6 +100,7 @@ sv_dependence.shapviz <- function(object, v, color_var = "auto", color = "#3b528 ih_nbins = ih_nbins, ih_color_num = ih_color_num, ih_scale = ih_scale, + ih_adjusted = ih_adjusted, ... ), SIMPLIFY = FALSE @@ -126,12 +127,20 @@ sv_dependence.shapviz <- function(object, v, color_var = "auto", color = "#3b528 jitter_width <- 0.2 * .is_discrete(X[[v]], n_unique = 7L) } - # Set color value + # Set color value if "auto" if (!is.null(color_var) && color_var == "auto" && !("auto" %in% nms)) { scores <- potential_interactions( - object, v, nbins = ih_nbins, color_num = ih_color_num, scale = ih_scale + object, + v, + nbins = ih_nbins, + color_num = ih_color_num, + scale = ih_scale, + adjusted = ih_adjusted ) - color_var <- names(scores)[1L] # NULL if p = 1L + # 'scores' can be NULL, or a numeric vector like c(0.1, 0, -0.01, NaN, NA) + # Thus, let's take the first positive one (or none) + scores <- scores[!is.na(scores) & scores > 0] # NULL stays NULL + color_var <- if (length(scores) >= 1L) names(scores)[1L] } if (isTRUE(interactions)) { if (is.null(color_var)) { @@ -183,7 +192,7 @@ sv_dependence.mshapviz <- function(object, v, color_var = "auto", color = "#3b52 viridis_args = getOption("shapviz.viridis_args"), jitter_width = NULL, interactions = FALSE, ih_nbins = NULL, ih_color_num = TRUE, - ih_scale = FALSE, ...) { + ih_scale = FALSE, ih_adjusted = FALSE, ...) { stopifnot( length(v) == 1L, length(color_var) <= 1L @@ -201,6 +210,7 @@ sv_dependence.mshapviz <- function(object, v, color_var = "auto", color = "#3b52 ih_nbins = ih_nbins, ih_color_num = ih_color_num, ih_scale = ih_scale, + ih_adjusted = ih_adjusted, ... ) plot_list <- add_titles(plot_list, nms = names(object)) # see sv_waterfall() diff --git a/man/figures/VIGNETTE-dep-ranger.png b/man/figures/VIGNETTE-dep-ranger.png index 75abf164bb609d5491ca8f1c971a1c11b52f4564..d083d8a980e849b5546c47408c9e9d134e9d4da7 100644 GIT binary patch literal 12190 zcmd6NcUV(Rw=ar_fV2Ri_a+jGG!c|8AiW7e1vFsjRf_Z`QiTYDQdNq82_=-!s|meH zQ=}uH5IWM^+2MWPd(Zvucg}O}e>ZIA*?Y>GSu<e6hQw?sc?=Yotfzr4?7@b{!hd|fGMK|8X1$dAy z$ayy=XAfp4PZlRH7AGH8r{}Cr{%lSG>`sB~PQe^bp`4CkoQ@G(j*;As(L4^Zy!Nkn z?c>i!7Oxv04-o1&L-5%lSXWN-!iVhWzr~T z)O_2Z^^X1*1wE{y9!^QOU0J(bS*Igjx8v-rQ&p==^~VG)YS(x)xK(~ z4c=27)>aQyS2%5b}+Px^+{TR@G9BBU)-F^byej@$B8-|0ozym%=1s|j_9%L{c zzGpi82t3S(xy*-oEQg<14hvWhk!*)WY)7T+M`avGSkW_1s5|JZBjA z83uFK5q~z0I2*?h*quk75oQ{gJb-n8t+R(_PXTEpB6HI;e?~+^^@Z?%q20B>hKPuZ z=$@Lg$@7fWO#gRm?dB!6$t&ijT3237(y*2`gs}xs2@1a;hRP~cvs}Fr5v%E$awN^i zCC;JOv3apAglL?Jw+A%vPPObx=xg^&mx(!?F3DVY63)$}6B1JWF8y*s&dBF8`JItG zq}X(isqTo`(D+e9f6ipqnyCG9zRzc*e=Smu`3ucJ>MrZ!A7rsOL^JFLUn%q|83e3) z@qhF=5G8%Qj^qn=jUBr!RkzkydoU0+GLD1!w!&(fVK|szY~=xeW?v8T$x?)wmmKGF z?Jc1M&=9feKOt(wL%d>GTzu!jO@nbKq_`=kunn{|ysG8mtyC`42E;XhZ ze%bHg4SA@AXt_vQsT=y`wwM*8;-Xs7Y|>si>|T&kgkH(zWcW7<|K@zk6mOQ#vrB6a zD!HQrz*DM8!IXOw9-4V}5lM&_(p`_QG#@UlpHgRXbPNO*mZH&>haY=2DgQ`>LLcIX}sR_#pc!?Jb1 z_CYFoiZ?VGt+h}ARxriFS)jY^tpt7~8@aIm3RVHkMv>4=+V#=|ajDGxHvJkG`f?_< zdB4mV^|0UKtGOPg+6#BQL<{ON4mfx|ddYfC$tv(s@J0Ob{1sSjvQ}(n zrCY}nuU0qlSTALP0mb26qby*vJLw)(+a2%2k(ii^(9N3Jde==kE>H_))U={v;D}jk zjh#{X-G-FQc4kQtMKn%4`t5J+GoJASo15QnuA}(?l%yLJYGQGZ11L?ga;Df{yZrJJ zIF>sU2h)6vW&%MW|N8mD4SfhYaa<`I(mN%=5Wm^+xsG513zPkS`H}&S&TIcR@I~OB zW$DjmDxZ>qObnBwA^q37wijimdL&Xy5)R4G1B2u=*ebhf_~Zi-)6a1a{)k8PSz8Nn zo)z7KNk@1xSC-(y20q}XE6I7YL<`*Y867?rT40xN_H@%0%B09ix_;l5xh63YD7*V^ z_Iw&{=j=xFx7WYqZn6EweeGbDUN@M(BH-=8nAi7ubOo39{kl}ImajS>BM=&7GkhIw z(@w9Bh#-%x;0oRArk(XJF;VIE3B93=lR2ohRe9LQ!$~cKUpL|MRWAZ{ds6S<@vl|$-Xm&f2I2eatMuB`1qEpbsSNW&(21wEF?TZzg@NsemonH4 zWlOK#wTG-F-rNqikk^1L$2|~AVR3j4Cy~oT^tm;H)5dU@Y~(%}*l0xE%>aNd4@I^oi0?mS}y@ZOWheX@4QCffn0$Z4ZZgP)nptxU;00IaUw+**DSI$)CZrP zeoeJ#CM$k?HEfwPCxqKvb6g4KSo|9PbWr#yF!bQLOJ?Gixpxwbnh}s(aP$tRYzsr2 zvgSz+y*M95=UtTcO_9be3tV38HxW?|sfV4?9vlu!>$w5REP79nE4rNr_wAk7#P{~S;Z!OCf3@?Pr6v}}PtmCivHRLfmhkBM-BC95=>@?cxTgB1iVCs(vqc>~bYjE{ zc5?rLo-di(EM8A8D?~UfwcHKaM-5n^n%aT>QVc5)rOkar?pQ4_=!Y+7f&bFvMt`TX z`7Sy@;z&M&Nr?OT(_uD%P6cJ-pnLC%t9zI*Q+Vm220g%y2ng@$*0b2Fnzz|D2!aNW zwfoJ4k_u4wARv4d+fz4qHLAsx_1r0#PFdQW)boU#4A3E@9HnqwQPVPnItI$> z&)%CUp1%(}VTdp6*Jxu)>5$AJ!##NTuv}~|DjAadHB!aw+2k`GFvWg12q*I8ZJ_(H z>c{PT|Lnt(OgS&DRDLi|G|8~4#J#&$?c<11p_#4MxJBaXa^Grq4HluXiNh zX3k6@$#93y*o=`0@j@xK>bck1|tf> zTpH2g$^uc_zlzKs!gup!;pKKV+3{xh2!BmATltqZ-1l)3jCP+q*|np+7Lv_AzqN#& zkma-ZqID$ClvDSt_SqLu03vlHrh zEMY{tbQCE?P|HA+VjE#+so$Udj<%jjmcdU|R#y7!lhwU3--~^_r}38vdSrzO1?3sj zbPJX%wno#N7^p)v+Y>lROXAt<9zulBqapqme7{;W#zBB3qCv}0i#TyS5Y?%Y;PA11 zEvT(o^HY#Dg-hW@b$$tF9=|r(YmyG; z4Gy6bE=0s##LBtPsl~lWij-18O=TQ?W4JLEJkuO{LYHz9m{G9aPm}f4{YlTJ{0lQ5 zw&0=kDbGhm^gK_Q|IFkW=g%a3UsXf-Wb$7n3)IpO7)@7?y&o{uz3JLJ-LXEaMt=Po zFJCTQ)r3cP^eMZ~#MfZD$4Fjzll-aE-KuWL@y(wRYFCjmY3KLnHXpEqa5P z5k|{9m(&Ow$L`nU!7N>KOR5NxQt=PjM`)G-YP#h{xQ}eJi42p5-t6N6Hm)lUc>4oYS|Qrp%Q9ZM6@v8 zilA&op7ORLHO_YsZDgy<;Su`Jq()Z5i3+^wRZf*lqrz@rCkOK@>ZPUp_iLO@ZnI$^ z{OM7Vxu4)pFy|Y$EkxUda@xLd=3{io^mW4t9@l$OKkS~aF~6c*vj3Xw_kik#@HbxA z&h&QH-k7qNeDLeE%+vv?X9I*3@qH9}}$4)Xx$i1J?6@hb-Won~Ww^Kt=3 zU=n_l7$wLkWI;>lEE`1CP*&|iBajD43vXX1K$8`~G=>1fulBXtiG4`gRy@B@-mIs( zLW}?A7PBs$2F+r27BRK*=O6uI-M^5@69edpLd*8VZ;ZvY%b!jN>NlposN~;5+{%mR#y}9KBKgUaZJ3 zW)U&alWa4Ra+3>Qs>kzyjxJ;lgd8=Sg+QBL5QHKQ8OA_C4wdx3D+htGd+PxLAMzO#jY^2F$4 z^DWxJn)7ya6fb#yALK3;D|v<2YX~TkE1XtKIeMUjE(mJrgficH`67C!gEt`~iPBGo zhfk5v6@%mUK-uYROx;B+?Bw?O7GU871@uF12}9%@o5F6w?(j&)@B$iOSzkpAInGNl zEPyE!#&kjQpn`6IO?ygDlUOgQF4`%U{+@wU@#$#!59GUZi-h`tHTA}srhXhr;pyab zKY}WT1J+ar6@mbmmA9?Ef21~kpO%8GQ~%rw-%s~!DgJ(467}QAgN>pZzB>fiXw`>1 zovJFhu;){hQPS(CfhZMNQfxAWj}+}Ng#FHtq~r7yNA5d)MKOZzz^c;?t)c?5T;i`@ z)r#TY_N{8M#8U*V1I_@G^CuZ}vc-k!DiN&6C`)`%x>c~h)OS0{uPR-KWi5WwM^jbn zleT>sg;CP(k2_VTj+DJ@=JqxY0yv^}wdPGu~wU?8)&d|Dx%Mr)oU9nw=S=G%$sN&0`shasdt zF5nPamYG-*2R&NVGesTGYM)CAo%ZDNh%Qe`O$gmD)P{z;Q$xUFC~K^bx`J)QZV-eV zkfs-w?OlUWd5wImlGbVOp^9PF)AseL=@7iF99)f8~AcPhW>N>R8^srezmB(|%!d zOG%THpg0U(e?5fmc77`7DK6i26d&vAPKPB!_m5S9g`&}mdjt5w^IR$S1j?s3xJXur z7=K$0e?V-Mw}!S8qklT&VnL}E2=~mSn~WD2(BzU2bwx+1=hUu{;RJJva9xVe2!56Q zuoq23)XlynCa*6~x4RZ9i1V zhrKp&Y_TLFSCkXm=vHUP&WxADftZFW*LV=mvP|NLBJ_;cWG{QDjtK4W?i21QdJajsgj*YfY3KzBz zARydS-fF{rm$AF&Q%DcdkQrlf0k)QzK!*$~p;{wv_DTs*0(U{e{W{V7`P&Fp-EX)b zut%|-xI=HYa$g#EDVOOuiA=*56yoqLoMML#Sbz6oYX*t%ZlgSN#+Z?$x5&rW6?Uu# zA8+Ny-;?rWWl~&LW|j)ULpy}8oqUJk^V))F-CtWKJw`}Az?sxF=JnwvRV-~=1LFMe^_B>T{mH;_s2Ly7Sg z@<%mTIM}Ye1b1OiVv%0NA}c46vC!@PdHXJTSg_0{4(bJUr7#D1gq1-4j42! zKz{x50X&)cc{qnR{R32V75N}M-tCVmlfB$vLnArxvAPRIYLIWFzoF7z|> z6?d6EbF)# zMl<|x?=HifMQj*4n zFZpxtdbiXWC9)&b=%HH(fz;f4mv3!Wc-RDx$4_bD86r(^%BUZ4_Ni&&>XOOST)R(> zZP*&_zj~!FUW?Y1Q9gA{B5#ZQ*YD z5NUiq`?Xtuo&?;cYYon~G8W$uECeu<(7#tZU+AxeR`{21jadRnuuyder|?$=>3@WV z68aZi5Z>YMHVYy-|72ZfJJQRSy=1@StZ)3+nkF=NR#9hDwXbUY?>y@E8|ihDu6CNS z(!GC?2X8##zrJ1}6xnKj$uyV4h+9a3K0EbQ1kNJ_mu3m4QzBp{eWlQ$Zl{X`c%+#b z?C6)OOZ4<4lU()-OjoR&i~yBfu!Ta-dOlqtWb;Mhv@V{%2=-NYTs01Th~hg3T0Eft zFE20)7bfYft`2w8-xN{_ne?F8WW>Lu!FgZ)6?CS#o{|~dqStF+Nv%2__}k*(<*#kC zg^S*Sc_A!`O_t5YB@A{f@;7po&@0Cj-VQIf{N$q=FSQv#dS?tCS_vVP1xWEED~Eoe zFVVcJ-L9L7(2A@fK{N}Cf!15BgR{~K#UR=%F z$+j2OG37ISU*LD4vhl&p*2Bj8m5a$24+!-*uCYoOnb1OFbUS>Et)#VBH@@q*P8St zeUNFvw9~?jC5K{@5pQmj;aIO|;pSjT2}3C!9sJRJ1&g?dEBpGrkhu&0(Yf(h#8+xT zD9$N#i%%osqIpo;Bhkm-zEMcUu`?Tefcl-1$aAX(Hb4V#UyaB%%S=C%HX12CH5LQv zkQ3^FtbA?%9onji3#&oenyR~UuBEWnmE%2r-P(M%J&<9UPU&3)7n^YKb`W;%jWR-U z=y-@TNeb2W5f9SZTTqNSLqy&c!K7CPng@6Wf5ctE{kYBd)9Af4eP+9x!^L?5VLNUY(!uk4v+Sh75P~+S8w)F7l;>}YTYmW zyR$NwW&8EiXvIz5F4bA0unTJWO1K+MjtnU`Q&JX`n*#>0pHOv(9jv7VGr!a>tM=iL ztQcnZ)6FK3NKtRpE{ewQ)3e%euEfOEnk#cB0~@i^5u(@F=wAApa_gZ@!M{@nQpyD| zJ7_z5_4&7~GAZzH+;#CzXfuf&L$;1*+;q6MDF!A>HrOOFX?SwhZSV^EBoZD0PM3kJ z1+#bCAeKJI+5xRhW8SNOtyZNg36@r@_+25+tAg%1h_kEQv5#{=@1dx;l0r|UVzN|y zxX+%Z9n3N7m#f5T_)BIxp=%Kgo@zI#qu+4xP2Py0iQJADcqx{wz5I6NU0-K>;hLV5 zNJUhI4DXik`c^KfUf$F0AgH%VdGT{!wLf6*l7emL-!>hSHrrY6wMt^xf3Yw<(D{*O zIJi!pet!o(m=Xs?z5M!5x{qQ0S9JZq$KDC0+}Y6OsWx|f>794ptw2O*<0FEb`w?Wv zX0L6KtwJL`a?vl-q6)s(>jOW{5872b_Qh+X6sbG+3N&pR& z`kW!C!LBh$anuoORFbm8tH*wa$D|(xf1p_n{mi`FI^<@QO=8ywpL*Be))6CJCE9+~ zxa5V&UUY*Db##iZ-7UtCV4a^)1MSPb&5|zatkMXk+gC{YzI(wFW&;%5^mm`Ro(vvP z#g1t4M3H>H`X^LR+Um|h_5P<->!NY9<&MIxrJwvmDnMav6z|6p=>H0-fPf~jM=rm6 z*)H5csg0;UF^2?RGkNLGiOj>&I1-{GX?IWGiqCMomgoXKt0BhIEC!W<@)_slj^TB zI#>e((Xtc*~-bWMNq z?{;k~ZdvF0eZ zJzLPv_e0yGTGLna$<&W{OutY;Oxb>n!tUwIq-@P<(xB%J!dU$iRv1byW8AIL*@MLj zrpgf6>*V(r_?XX^9w=LEHSYu)ASk-7OX_ydK$C z3%%w!bdy@d_RAgQhYYB&&l)p2MN)y|<&X?o$Ldi+SnUUT<__b#Io-gf=T08hI|ey^ zBqQ$MQUJ#?=s4mAa~ca(XM42JI1s6l?A2u}2$;;7p*v$Ol!y4%ZoM;FZT)i{;!Gm4 zb93##`e39hyk0B`sIoFi}b_Uk-J*dkfXECWOd-jI_g8RO|Z4Q`<^& zuwm<0N2)^Zcm-y0TMI^4qHi|&e(au>pes+pvUGH>ei&ZFA9!56frZ^U-uTi>OA%tK zt;gC^)i$6aJo<(DZBI5M{-R!^g_UvyF;CTl+zK7k8TLlHw(&I&#aIv{T6D5vzsrZ= zt*B*H_45HI=Qq%^(44Bz2fdUKFh3Q{B51p1rA3%r`>n#do%g1+#?B)8Y_;$RdKRA} z^PprVD6&^@`pA2l+l7#7avSDlYqt_E( znYiCOJ0uF7AZ_fhzjziSxms08>`sF}Fk{JF8C;P_4MXgkO@f^>E26$t;{tzghv zJuIpcT>)|ZEp0W}`lLUDp(`)ZZ27F(lWvqAcc{UZz47j=Y*b!}rLu8e3df^MEWW1u z)Uvs>5=`GemXb!c4&GgM2e6T*cC!BH%}cBG)br(8@xiJ&*hBe@fj3O954^{FrUBPK zaWn5J?1WO!ji1?SpwGYkYSF9;P{}ca>D~>?I4ATlyE_$uPAgf=*c-7WBBHBWgu4Jy zsV}}pwkiiL1>OcmEao1@0~2@>2OZxu*uSc0pw`0|2S(NbM~bFqvKT^LAQ*6z%)&qV z{^#$a-5M1`jrM*@NoJ9{i49v>hn&D-8ugf!-VuDOS`IK8q4*zm?}aD!JMJ^Jh*huk zXnuyJ#zLjOp|b+E?ma+mQT$Yl zbErab&fe#@vDxd^C*0?Qc)<&tITORCmC21m)sD3Cxl-~aPhlq);(tJ&7b8ln4JB0| zo9^BxTDFca%wC^<5;+LWFgd%O6t0lRTeM6YkhlFiCt(l+ZHlDhzSc&q1Nj~5DbzUV z`^V4`{0eI8-ueu7`<@?rtR-q1M^?=Dv%3O&osHf)W{2tT6{*7$%YmrTS@OPH^Uv<% zvSW7SWtC}fB!wuL{gzhB&$-Bzm?@s7h0<`ls0uq~cE7gL*2t-yrY1J(`t#CD?7rk8 z)gY7A(!B8x#X~HeJ|7Lf%I$oyJYyI90zD^!>m-(dILC=#NROX7->B;hp;5Rc*A9z@ zyEZ!hf)*-(I2qa$PsarM*&kKnd3yt_ef_Ry&Fr^xGNJo&DGH#^)=w1u*Zrp*S%lLl za+_l49^Gqudx=pq8R{1K#s7j400|3LC&r0kVV$g{6=sf(BHqNxzsoZ!Hv6uZ*qTn?M@82HlAsL*-9*ou zf2`1Mgm6;-jNXZr{p29&Y9IpcKfbG{3t=kz_{)0q``PZXfRE5B zA`P_1V?&?DndeMqsP`?MRprn^HyWhOLbSz&{TTk&uuR30-Wb7AeSqsqC}Au>Ljn6T z;Vt`xt_j|ZTGBtYx@<(QL-@5C!K809dC>d_v-~`$J967t_Q0Da_Ls0u=odT5O8bQs zaw2=xt;RPiWxq7)8yWE5mxl5aXe*$Ovcp0W(gLYh%EdY1i(IB_2_S9DX9#Vx*d4QW z2E3qqc6*x>jtSjpTm?N1L^a+zKt_?YtBi~r!7jMt4$0UOf4_5&18K8?H1q(riGH4r zFHm-ppl2c-`jpU(yVtX!?e5nSt=qz4?+5AS%N0$ed_w~Wt=9di_UlnkeL&t%;UA}) zHrrTLv4`&GIxCJvdMq&RJ)n#8>Z}dW!<#fgN(p%N55YhP?|Q$4(72=3(E|&pOUD^W zLt{t6?2Av>=f-r}nNdEx(GhH8!v*RG)(nIBNU=xNTnX=tP{N_eXOVfHEWR5-YHV)Cf$$n`Vb-vK!b&LdSVJ~gTl(QfKE%|D zlT45SA9=nNl_O)~zO}dNZB-bhJ%WhiYgEiWPqZ7!E%AO-NL2%X-WPOfpr4dMrm`E= z7M%}KTID2G_&*V5C%>}%_11|?OfJaV6`~8>SPJ)qw!&pYxo@1t)yDIuiFA!Mae%e% zKycUNb5VP*SKkKAO7(cW=4>Xr8og=Tle~A9O;3TivtJDhRMgW{=~2rCAWl*8lHq5A zZgp0-(T&uCM$kBUyVwP};vyRf^t{BQp6iMwlC=p70;79#m+oS9T7f8x2v}e=C3r{^ z-AF1p?Xt$T;67Dop9-CRWi6Bbi`=(S#p9DGa0%z<ROZw@CFm`ycNozGMOv<$7qXF9P? z-UL3+_#PEBfHSgIuN?sTxh$w>8b`YGi`@3xQ%1)bzaf9X<~d|sGrXbjAxA(gO1;~6 zzd#8+w~KBhKOa(WwGxXissu~W(c0bEL-LFnEr~|JClPD^Crjm@tp0ONPN)TdWq;LT z1tR{P`-2i_1y&8$N}ng20TIJM}#UgYAuTZ(c%` zN*tMWBH__H_he~_ttZ@?;-A`{`MJ^B&703&$oO7(5a>CY|S1A$Qhk4m-)07T8~=ig-(5q}fdq5Y?fGyb2fC;w#W82=ad zwU+dS+bcjW|Nb%;^ewCUE4Y-$8{WfCWC|Lb&fg`vAI2qvg5R1z7=ulT1p z;LG*_(&IuMfM)1M8J@!#;zNQ45c#9%Os2g0DZMA$p|5qcxATWb&B$%I`da*ex|SiS z3ZbyEG5wVflM6d?0%0su7McfL0@>!d58QHtKMH(P$PAr_}86YsY!!7xg`F=&-gNE6E79PT;i z{SLr8RbHbdq;bx!@Wpd;VFRd@(1%N(8A}0V>)p~9vnl3(y;P@(TlQ+xZ@4`na?{)P z+d`i>_5R|=TMaU-D-U`tt4CI_PGZ+^%Vd&e%C^4qR6m%-HJQ9b_5nZnF(ziDlsL9h zgqz-R8)2eVsd4Q`>TLaYckA`n4Ra&S7&pkM_ses1XDJG9Uw!13Pn<>h3#aVSBFpQ; z*3(EAv;+V4WVOx6c5$P)DtZ!Z^YKP6$B^bV>Ih;4DGj?|Ue9syedrYQK?6EN-AI_@mZ+fL+@782ANT$;|3$bwG=0@K{G3hDmLbZnq z^MDmW1|(r3wgI;%yb)F|-=S$1q)l>L1(?LbMF*~a##f@O{$?*sj2iB^H9uPF{JFp} z-VQ@!SpR%O#|fs6s{SrVOO+crp|KZesKO5!V@EDu!AblInCtkYzcXy;+`mjz&-@*% z{inE=;y~=#HAogi8FZYo3yh>tP__-+Mu?NC4QBe#ILlhmIPM$O&U%F3K4eeHJU|Isa?sxSJSn`KcF7Yv@3KYn&dLSKAm4|Ejwj|$Gn8@2 qlm2}_a%LR_xIlYz4zaoIcx8=_daXw^?l#onHGC`0#RBIRcR<$DG3M&Xb@^jIs^nn zZ~_8ClMBSqKeYWupI_oJ_2D!LCnV?jNf0-e-ESk z9`Q1h$d_bS^MS!jHew#o_stvSv`&8_tb0gw&o^$RKAY^V^37X=k>qLy^a(v+#+NbrqA`MMgr60_I)tR?u3ae*V z?`%BZz*oX=RrOcWeIb5cD~;&$JnUzvuc7&$AVH0%ZYfDM$#4VrGa(o*_YDp05IZCg_yY6gza|P z78zHwD?V9eaXYhK$-+j>Ytj3N zm2_lls$ZCoDN<$a+`nO<5M}wB?Tjtm`&-_HA0c^iOfyfBym}F&c8>GS@S(w%TS%4A z$~@rKz9FA-QPxK@Zm>NqRCr& zO;_H9rUoq2>ZhRUF;eZfkonEW;mgk(o8ay4ybC1ceS`1Xb!2LzJf6;HII+vA&z!M4qBrnD5jWW7MrS*m0kw-uu{As@JcdJx^DG~t_Ai|q>`cg z!geTVtq51CKTtCSD6Ao|yGKJ1!4sESel(hb)8%l)Dme^4V1gp_K^h?HaJ zZ$KTZF&uLRwB0K!ZA99`dGpMr$^BHkwswLj3Q0*stJ(uE)QjEKx3r^f?UID=)xhL|%9C2P*<*&7w;Uxm@DI?>c>U8&5L0qbR>H)N*Uc zjPlhOsR{caF~{qWy33&(@oih#D50Br2M-NqT|j(`R5x2p7Vp9!n?k6E=6lav3M4{1 z)b_~bRQ+_$gnD(j1uHR4QGc`DamCtF*5x>vOjNZl>2V$7$b}JKP1Wag)@r1jBmtSP zi&^?6UlzlIR5seRt770(YymB z%%C^l%r}J?(m!#YcFj#WEHca=fm`)8-Xm+~LSkl^-H-3RV>qZL;GF*dHU}~OI=+pb z)|4NG&CIT7(zdN=w?Ku8iSeBoI8`Jq*9mz}@12r~%ep0akINr6aD}gHSi||XT6SyY z!&bh_mTIukoe5ezX~&s`@0{LX_e%jIlVAJFnI{QEu^)$t6sS??KE<3eMPv%_<~`v| zFtky#deA365?D|P{0UL)XGqeKPUsXJ5|DhKN^;N%x6AZ>Ut?0`WGwTgP``-U`A;ll zL$9+ELs6jiN(n>SD4Oo?C&1yL4j=51?oSRhuIk;T;x{6-cm5N4ZmGTkMj;WelzK}S zb=OP=+tqx}9Yy|o6@?TwMw#H2FUlT@(P!CY+M`hd_wg<2g)ys6VY ziwa0`+|nuMPX%7bd`e60y~`Ae>amIz4vbh-x?IjXI+16Cpu_Y&e6UZE>Dv^Xy;PoR z6+OEkiwe(I1F!vumm9M2xjZ5|ae#UXNl~nc)}czr8CkN$d<~d0DWp@yZ?9X;hX$8k z>}P%Tt@T(|FZMhUh$l8?7t+yOhkU=7NQwtOlF(sJiBy&QOGITZEvrGJLh(?%f~K%4 zD^hpUc(5h0CGBlm8qcJFB7+FjVOGi>xC+HV-o&zG%Gp(r*k@Qlj;&(Qg-OhPZ2`x`Fsroi>M~k{=ZDv0*zF zn9ke9v0Il4I^gT5L_}H7oSsUuOIPVVwEomkHQ-TjiZmOH40#jic|so_ zoALWbC$da8_jRUr9ls4Ky-uYPT)k;62s}0CxasWV?LfFPA~5j_y)?cTn!z^mAxNks5LJY ziLV4NH=NchV_2;uR9WKXPEG!-OMY5Pz(^F&O23r;Cd+5k7*PAecC=6*v3|T*)4=i| zi5f9A6Lih}8Z|=c=|}q;+>h2hej(K9pL-p)4C~7*E7F@wdWVkN5%hq*YTSYYm4ide z{<34d6EERW&MO=#aAVY0$nh1e6d*BSC}TC$#oY?M+(Xl}RMq3{UaVXy2r%^YVm@nx z^KY1Xq4Q#>@ecCs; z{4oKtra?3^s8EP4R=1;I~qzJHi5s6@sHOIKhMLmp~SKw*eWk91scP1vZ-9gSJje6 zdO=t6Drel4`f{&L14Cq2VkqV0NTn&awhX!_MVzXcyC~Pf07@V-b|%0LBd;3GZym8x1pK99c?WfQpA z5!ZcjE~Ny=d+)-CD(#cuJPm~_SyGN5Gwx4o_yBxEqb)D!s3$-mghkgABhQZFWfPdX zXnK)TE^^ZLwd%0TfvD_f*l2F=dr{vhfzL6N%gA~3gMPET9<;lS(;)sQyRg-an?3sZ zuj4*;xrt3!eue^OKppJ~Wic9F%N9k7XQ%>NwANFuk4q;2jiHQPkwpvbCLnz;{1<^O zn&5Ta9JBp#^b9?eoQe#TYZ4hit?3ASe%aVAqy4HY27moFCV*ZL1rtOHXGi+B;;fJq zi(jPuzi*Q^ISF2H;GqZ$y-MixZgROe-KB0_4C#w=9=ckK-zQ~fF@r-(yY?oqE0Z$z&z?6!ztOrzo(WBK@i0XuF^puZ7@_ZtAqv&yyoC zK^83o4*4&5k@FR^$gn_RD>OACU(Xu0b}OQ9ss<=G*C=BD5;bjE!RKj%)l+vC z!L@;`kn>%cu(aplN=ioB)gbKh@Ocr4fcB_aiI7 zgV;pE@Cno%4!TPfRPPjA$tbbQFJ;3*)d<-<5U===tpO5nhObi$_b}d5rBvTF?p0{i z@%LptQ(j-#lH)VFFcn?MXj=Y(SizlgVtLHemByuZnK=ZI57t$Sw4qa$;-B{}XuJ|V z(f$TozSA5`{3{!ogyo+QfKy$hShTSPmo}hYXoUX^x4m!J4AOcNfD9g^bS;~5Yo+NG zubv^gRNbR5dn}S0ER5ob^3+&E55R|eVObWm5#-~cmR zW!a+_u0P=Zr3Rt?7HM)LNb&GB=Vo2p>LEWQtK}+0;Jpsu*o-i?!K*!WnN?hpcGQkFK_zcmFDcJGrGs!OgPa7786%zRsHSf z3Dok1S*P>D1bnAcd$2?s=8VpcUaHWzy}+~*UornyoelZQ<9ng_nFu~>Oy`c99SrnW9-(R7)$rMbb`qN39vA);SR|V&P)-T} z3{r^O7A8(BmNU1!BOTD`{`h^Cv4Ll{=^*8*!?~e~Zz2dj_UD%&LeW;EOiME-95+7v`!Wv|G zKLJt#S;6rb5*y6tlR7yE*f#9)6YMrL#g@V96{o4ZAYLpoYL%t~P{0%$S9Hw4(@+DE zuizZ!2wbFf`OBL&f={CkyxyR9rfLsFR#J*A-`Ia=GBgXO&Q0U(fh54(*Uc%7Hgu8dcDccM znK=UKD@v=4vJt!FFpSUc0E*9{t#6}MFSDQvOj>b=G+HG59KAEM8%}DS+gKeJX6@n3 z8~iFG6THwruTYrOEzMLU41er}yLMzbhFF%Ur8}WZ6~)iu5K5A$SRBJOW-q+<;&{wf$Oi9$Pd$h*qY;lXkzLrA%i(m7=KN?_2bjOe$-cdPjKEiF z{AoS<=>*l9-hydcY~I?uUmk9GR5n~nd1IA5+KBz*xE&@}W1-&D-LHo8SrVrCU-%$VT9K|3 zk|O#b%(kZr69?Y~JsLNw@xjsxk&ERGAME znRDKd5B%uNCqw%3#EUxHN9zqkVaMR~T_``lozwKBy`T+#wb?z7Ml;FLPX#In7IvR~ z$Uo)ZuKn;{$HVx6Wcz`;nS-KkE-O@yDe4ZEL7`u=%b$*fYsKEDF8Ip2Bxn8OKsE&5 ztUen)*fWDT;y;S0UCv)w)$_~^s@&(;+()?!1@TEQIi_=(i9!h!c&+a%{U6qh|J{0I z68`kRKi4>7=3aui16nb;ELV4ZAacRs?juopN5GErq9$7Dk!k*R5bHw$q%S)}Y(Bvr z@BAvvZ-W=0F83(E;n32Jg&2tuI}2`}`1>f+)mIY&`V4oLhp>d#NTD@ow&p#&;w5#b zko(mHX`}|P*C1mZ05uZ*)-g!o_x;J+vt0FTJA&F<;Jfz*a;$=}H&&a(>2w6vjo}M( zZAm73LhMX^O{(UWx#~j|W;R*aj(5jS^m~TxfE$DV_-T4K}_4Fi37M z>;|!#9;$mBe5K)_)Xm%xjz6irQ!n*$LyOOWdyMn@dm3T^y9K%mCDK4y-}_Qu&if2~ca#68dORi+9mWDJ1&2JE#FXuOUkH5vZS~MG)~eiSI*3 z4Nsg-nkF?wBS^ncEm$l#0IhF*k1m+#>#bb&?#eY863w2<-@rcc#X-)*fMI)2UnUn^vW zaCzf1$=D5lG({I)-jkWDlAk0)Cv|xK5qjM1&j%DDY9&ZfsP+~VC^*XBmVG!{tuG*; zq4N|Y)>`nhsxa3E;pmM1HMd4Wi4#Xsxi!wZYK8gGB<`gnqNk} zOoQq*wF3rR{pxfujaeDXpk!b+*!Hp2dFUt7(Bvwt!lJVnyfJLZ^e!jCQUFuF|}5hQ{f=fuG8*HeD(w zT!U90(Ix_9$_%V|dMX^1t=DFW5_j-b_7xLdJ}3~~v!P4GrhdKMbMcBHb@wcm;zXPE zahbS|p^xR3lq_mMsK9Q7p|kGbbt*+mj8V!gc+a>ukX2k(=si}Kbks=b>_y7Z6RG^h zA6^~xiMjRhhg)q699B~Dm$G#o?^@5?*SeVrND6$DPF+%E6H8n8HD111nw@zUNwM;R zUmzaDfhI?@?ht5|_*+W=V8TWA;S%UQ};}`(hrB z;SM_B2g;~@6))dM1b%V#r9eN#b#;f3+{>&3Fy!`_1BQBMj>~0_t)hEWY;@|D0i|RG&G5{d zyU!c_^nxaAo4vo})59m>B|JvdIlbJ+4Z#r?9u7PfQTA4P`{AT){O;4-`EIzCAoawV z)n{{z<;VpXl91TbMxQVGHS^hp{`IOJ+TZE&yh(S5!x=F$?WlXs;kWQ1+I(gcq~L?* zOmqn_L4L~FAJ}@0Q3>PLQ|5iqyTy_=fBbxI2-Ui4p7L^@&T%u{LXI$F7K+Mo#M zR^zQu?IJ+JF_aPGsroRi&cZaOy5Q+#*bOfX;~d)^|C&@KF!J#7z^#Jwab@>jC5)vn8(PEaFsP?qIdCaCcdBY%Yd@UCmlLm40|Y{ke|%31L=k1w;g%)hA98o@y;I*8pk^UQIuI|M zE%g-pq-z}XorN=YsH&tq{}rY_jo&w9XDV5>BAkKiZMdMDAjpnq{t@rDz23;DnY)cV zk&cgLIuQB+zR5TEdA}&rY-Ab^wq&h#ztHx^oYF^Rie4mhgc-O!s`3Cc%nE=%L5gGT zFG8?+wLe}*AsT7vCHZX1b;ic^0M~{1ucT{5pNgLfmsCj>5S%D{A)fn(s$=}t`6oDf>oPdDxjEQ zAPqdtjAQ+FKg;cPlY@<~4%XIB^_}vNCp_`7qnojopURjW-ika*?1!sfHDlYliLI$S zK(K}IUBq(dh*tJogm8elos9o%U#V%b^fa>&bW-vHzEX3pZvLmZIhUcKzlSM_YZ3M>B_%M9ctz-B7QCWZQpRCDn=5lpzfq9eul>Vw$0> zN9EmH%%tDNg8rZjWV1k8{hg#Wh31dywCYg_L=qIOdKCLZzfD{ixvtp1hLe>VGqPLi1k4gGVaNAn8cFDK#I=eei|$wx zOiD}P;J-_Is@>E_od{n*f}(j>gNITb{~~?$MUGo~A&}&_c#q{G(GytyYPx|1k(m(VzZCLC8nxa$JY_Z+h3+#^+a*{5rD$mu8;A1q3&w@zuTFDd(7| zcD1mF0o}4?J%;X)g+2^wX_~6DpTX2hmps#B{ENv8Ej2*?)#4M%c+8F6)N|2p8f8Si zbpw*jG)UOdaEp$ts09QO$NLH#T67ft>lN?=#ZW=B*a5d_`Ri$3gMiJd-wRjeTwm0N zC*sk9y|O`HT{w?Aot7uoID*TOX-{0VpN@2w_rToNYWFRZ9bn+7nZwRxrKoIk;7z(o zWs`6hNP0UDe4$o&uW$?8w8|;T1zWjEAYl#hOpN>AW|rtU1~*8HN<*HA#Xo%ae&FOF z)=VoKxURBQo=|K1wlLxeEvMXC2mEaZ#p3O5zQ7mWur_=Mb!}Tp-g*#s1n^F<4>OG8 zVfE7h$q5CKCzl?kru~()9X?o+oQz-768OE{yl#tj?iI&XxlDI=0zSkw z;^fwlV*SG=G*iu(1@io6;3>?AUF2NBLT(apADfNVP#q>!sccEf%R?OgZSx>SE^z9r z@-Xs~)tgxM-?VMa@oPaNwhs43;MAZCTCrC=;?2~VzU-~wT9dYFi~Oasf4)&Mge$0$ zp{rjE9MCVcYVy-k4~LfdAhYTijmSKz#2;q91uau8#9yvY!3nCViqeRVKTU_9jTIXY z8#L_>j20QjjGNSX3MYKQLlRZSy~*~sc1xjh$TDY)(iShP@FaW=4VEmK1BtY)1vefeS(^8k&I@}`o`Z>maL<-}Cb}kkH zt`iWD^C6V(>&=$z{@@AWs-T#eM?M~JVJU80K(g#q_Bgum9t2h|CKqCfI;?JUH;9Qp zpAx8lljK72>t4MlA&6_{#|*&hpD?)4+tRqwtbXnxcQ@-!tl~6y@8YONS_-FjY8tIGAYs z!+Za zJ1cueGa3#ThB&+Y1f)USx)1HA>^dv+e!u6KpqM<2yxaek;^W2fP4D%64=CD7r1ux$ z@JL+VHJSDz{xT{2P+8eaaoX`s3}7Cv`_Y~X{o_z*1x%^+YF+6fV2;?tPPBL37`oan ztX)Q`8PFQ9?Hl^I=6Gtympc3D7HK&0y3{nZTx3f*0|hCtkMyR2Lt8K(x%lX@3NvW^ z;n4bXLIPymsB+*7&>IFJkA(K5GQaSTcb(fPymr)x6QV3T)=tj;$ngn6FYOk;8a32? zJXMR|R?YMw$5HXA<3 zo~#}3c0VXojLQu+vcRQZr=F62ukZ(AZ!zv546+xE{A)jK5qE85e#l5(TCRMkPH;qx za*IANhEqh8I4;+2VZdV<5n>N(FqxFQuNlGFVE-abuFgc5l~vPk>G7lal*nBDV3i?= zuS0e%_i+MM0h1SO^`puvBg6#%H|ZX^4H4)>0e#S)>@U~>>wqQ4^|kd+B9{ttOT}b8 zCZpIE5c>A~>zTv0((Mn}$Ts>ELdX{rpDo|#!6}!d#51$3usY`CB^l7AA&jGQ%7&E5 zS&Lh;R=Qs(?yz)`Uv{=pGH7RYo-mUBAowM(?(VhgNI#hpS{Wa1T1mr95@@Q85|6;q zEG;zq^JQCbL&x{I0@0dS%QC#mDoj2SDHsD_Pi3L$c6^zut+zNsfF}i2$x6( zj`^=CK&$Ac;tI(C37Dyi5h3L8sK zAW-Vz`kybSsQyI{I1SHi)QJZywI5AxF{IvP%cm(cIW@vUSs|6|`V855<<(c|d&yR4 z=HfGTT%h``U{~-)^|qc{T$!XORXE8E{DmqDGPvFH{>BP&h$6tS0=gvBy0jS&kE;jB zQarY#3a_5%3~!^3Xc~WhzC9Gw(3{KX)ADU0q4nYfqbmhCdo{rRclM2Po1D{x)5&+i z*~#7cIaQIX{`UKQ^e|^;)8I$@h3ZV~rF>6ydmCLm81KK5xy_@2rf0_OZ@wmvf_ANe zF^-ft#ua+=O$n+~;zGgWVfztse_6q^}j5af&7 zkAPn%S$%#5v<<;B=`P45A_678qJ$d>WiYfj_zav_IcgEUu};39C9T{0kXkb>kE$r- zhZOf-E6#RKxD{SF+q%Jwou)tPc2B<@@ZATfD>cw~CUcr4!dGFpgqPAv)~S>EA~5hd z!?g31@HkNuD1GGJR;uy+@3j`q>Blo8uQXX)(I820A6f4Hzb~r&m*s!I$#!;8eERjk W8wdLv1O(81K7{f^rH=}hA^!^=+TUdW diff --git a/man/figures/VIGNETTE-dep.png b/man/figures/VIGNETTE-dep.png index 0dac117264b21dbb132d79337870eda9fbbe1e2a..065e6b4b1ac044729f83a474800c3e3e789184b7 100644 GIT binary patch literal 6720 zcmd5>XH=70vyKfFqzQzMG(kuNq)HGFkzSP+ItdV|QRy885$U0Wiiipv2rVEW9qAph z&_dM^M5?q(`flx3Q zFfcF_3WfrnRWJ-N@Cr9$^eBI5J-v86G%&Dw@xbVUGoURP(N<^CwoGUQle;~$JL;!& z6bOZ4aYs?aVNjSV6b6r~!l0^vC;1kOy9?Oejny3u42ncFS}z`r1_l<5CEQ_i_h5JP z;&Aijbo1kM3*>SO;&uz>b_?ZkyMN9#?3`-^uWKZqYZSjrjDT~TfOGtBh2rCXOTor> zkuic!7#Ili(+ms>gaMSGXo100;W79s3?6urlg{5t5kjR2J7x$wJig$NC1U?f6qy4- zynx!}UbM{@vndd>E)ciIN?2jhdRRcYcs*BWb(uf*p$)bX(PEA6WLi)nOQTL&*su!ETst6k_0T8KmauDB9I9LGI`GS(wyBT zB0^%`UVPp`?5pF&g#JL&5wiU{{bq~B9#zYEdBte zHxs=3lyxtgZ7+vy?-l!AF2`OT$KD^Ddswc$Vy^u%?)?g${mOIu)x7&Py!-Wh`;GkM zDipb@irf`X#$(9%Dl#5VCSb`FECF}`TLB};G>2bEfb*WdqiOC50@1Zn{*QFt{=)$T zVufg3S26Z`x||gi%B5l2X^D)JmYsbU-@tdZ44kIZ;TP^M<_YaYgCBr3L7v$vP~sX0 z5`O{=w?8VBaf11H81I?Ob_}B#t;f$iZ1%|5KZAK&0q5DuRx$A=Sp^iAkO~X2wZvfM zY|SqVAyQj`yFR{pt|*umd&iNe)guyjTacIZ?Qm?uW$;)gIJDz56#V5lIOgurnD)!} z+mTP3|M~5(`|yL*W1QPm?~Q%H^Fj1{nsBHp9LfgnWdPH37#7*M&W;WDMdnVLuG@UD z8w)7MZ<}e2H-Bo)^ibp# zd{wA*@adM~>t+csWcShL*mbv-hGAnawz}?#fO0{v$Gbl^ts&Y)gkagdX<3#hEL8S5 zB0*a9vdl%PlK62M>`R2`2ke@Q0Wr`4fl%rcC6;C_FknZLG#b3Uzb<0s(~O}_;q1*w zz6GJrnq5ef7Ub-xc~HA++l||sh8J2la_*c3(?pI+X%%z`tQsmC@h2uX~sE(~iDI9A?CVs7)3Km%Q zCD2@_deF{4WP8J?$ocXf69VV7sVuIAntIY88u1S}Qz0k+P`{P62caFG^CxUx3-6PL z7oC!br&0OnO<)<2xX|Weto;5bwRPUaOI=g$O};-R2csw579ovQ9%w z=A0$em^iMkFD}@PIO-92jv_{}(8_ig|{Q)N1r z!5JM;xT5W7E9ZA&g8-%+4tZ~pY8Z3dqC;)|OnHUa&64Uf=xw@Z{iYV9O*AhAZ7aDb zZo-C3ky@;9ReNzmRk}S1i-W)8z&WbNR93t(7DVEfOx_Eee4Htmc13icWXnRU9MhA% zKJoxQCi;0rxPAS+d0O8U|F3vQt<@Q(sM^vq6p2naScU6Bs{CEeD`k9+Hx(rA`aNZE zEz@3XerQ!+@rKv5-xRU}S!Sk~wn?x)HI#bu)XP`r$<>L4$0i=Y!%Zx7TyT4&0KVay zwX5GT6#ouLI1e{_*lqtfLY)Jj z`^@bUWNN8n+L3Ru4rkiG5$DTaaalX-`VR8AdZoV2DO2c0rj{^TqN3pkEQn-74gHB+ zm4;y+T(k*wRCIS86oGp#gY4C1)am~+CsvT$7e!9W6gOn@ueqkwYJ0(yT~p9N?h2oe z<%QJN2|r#O>|_Y)Zo%FGqnM>hHT-env3@}Kv|XXcmm_WC=2kaX z%_iqn?Wxm?3(cXEUH4Ids-+SfdAtam#IMh0Bcydzj&f6v%-vOK^b85Lsw!Mj%|ZRz zM3}Nkb9bOzpm^{JV26EzwYbOgOh)8^fUe^m>7ZJ^u)b5c!y_Hf&!4BT14t)Hw0}PA z8BH%|2kB2Ge|{psG3xO7f4;`8i+fY?>R`8oTAKt{C@qm30Z3TF6#@|f*gM_A%)Rrx z@S@I>byY-e4S?gkA-n+>0HokM(rweRIV;`in>>3~k&!J=onZ;NobvA|+j9uniZ*Q( zk}!Bx#CT?-qN(FA?I9UK1u5G0*Pg5H5*j?xzm}VX2(5n7I6{eK@R7r_d0$&2-!SX2 zrWb$yK$}Jr0}4H21YP-f=R#4NBNh(rjZnL%94P{hvG$$2Vw9kTicpfQWkl52dz$mvolmtN=4iY~g7sps3suBMHUGYuA72VxOUH@D7CBG6U~HbOh2Z zMcz7MUBEhWztCjV%@uZ<@x|n2ov;jzbvyonX_p?cqaEOz zGx=V|F%ac5bLA$Lq1qb12~nbm;KdPqgMIV98Nh0Pu}h+O!t4jypVZVzP+b(cPYM2O zMNa^lyt6#TEbb+fz8PRO0ou2_>wfzdcLwrq{`!L$v6*>lQqu+3`+(y$biR+}asq4} zL9FK<$c_2$J6Qx=SkfKY$9er^FO5*$X-FdIe4PHWy!3k8|KtL5R_FV;pWkUZku08? zzES>qM0DC-=5@DFV9_^cEaeD}0X3bL8a@T7gRUJf7*OH+D8yud`KCrE4d^(1k9{!$ zf|EW3WABWK(uclxz60#vLTTBwLvAa&ecwL|6#_yJwqD}zQi<#MJ^_6Sd8L8&p?9Sk8MZFTOXs5>L zg6pf)*xubw>XFq;ay5nHLALSZ(}f2eK3vn(8;Qs?n3Ro{PiF_-fMN4||Dly3MICGU#@W1DEPqmdVI! z;{1H^mC&oP!B4m`{!=Q%DEVO`aYhqvz5h8&LGJcc3d+&ubOl;Rltw%*XGH|OHN6?3 zPapB+oCUiLy6y<;RNe|sNGU(OKWOvo(3j2xZiPa>%^`bv6IK6}g8XLzD;;tD{r#>Z zCGO~F%Atqyh)mOP=nl42W~QZ?r9A&BAc_s#Ml50hdl6~%_VW8uE5Qv?0A z{uKf7TdfkU`cBr?2FKQ}n8K#!CO;X4n`!2M4m3HX)&aok3So8CiSjAt>6^2E)m?>g-K@|7O+|tq# zZ#hC;@htRkmy^TB5!LXAalo#lDW8@uM-&Y9q?aUZ97u2sB{r8**TJ%159{l$rasXQa5$Oi znUiAvriW9m%?7=b(gUe!3fD;82$o2FJCu^uc^1F+I=Ib7pgg8eHK#0fea*l40<>QA z686+ug9Hoi^pYp>AT(bW8rOwz9B4aB=4;#I$%r03zLO-}-$3ZDj9p${o`BHe8aXLx zvsuw}E4^Nu>viEX?Eu1a$=mex<5P`F%vb`>vyldfFgh>|ll&ij&*8ZVSsPiD04RfJ zbg2NMYwt5HD%b&`W(S;Ltv&Tw4Hviua6Jti4odVtgDWBmNI7ks2aPBGTMp%rXk^cW zPgHKXPe8$<9#=EvHT(CcltJ6akne=*u6n}nT;!e7e6uwKFx>{e38<7^oP6D zTSqZPRvBs&X!i)<$?WFeoDw3gHklmHGb>uJn15;5zGso4(rnW^W~NpWqmiZ&*+XJ< zsgAx@Ovxo0&~IFeu6FADEA`|3q-aPJalpCKz=)QE|-?sDDoV7$%&+ofrvSR$&S$v<>0tsf65wUQ-;La#&{{O%`7+ zY|4Q7D}#@w7b9^{*7%1zgi9Y41$3+%!i~@US_2>95B@up)Yn|3K1&=L&jOx%eLr{WR(CV=^qTeK5;)DZl8`HsGi#0 z_1_OaeTsJ@=u9S%9ZT#M>f9^aYDsHl-6R7pvD#%1&dtV77h_Z#&*CvVs+v0erupCp zG|SrpUo2N=(t1t(S+I8jN*a`bQcY^ZAuh1!`>uKbLY^R`kZFJ789QGZ;h@8ibcS`< z|H6c|V(MU#Iz7XE*|BkK3zegKbfvg=bZYj88>Myl<>q-#A0Vs*Jnh@V?v9C)kuoCx z(N`NP@WrT={%*!Anabgte`~n^^_KAeDo}tLhhaUCaY|oK#1}XKO`%J3NV>?$a|&U7 zqra{eP^b>@&jwLON~(%-VQ>uBk~0CB7I+RcDqwI z^Sw^SICAc7BtsSq7U;mevYYe5X797#b3@bIzYZFdyi(3pc(Jp?%Ea@x>#rHzqC|-I zle86*;g{ZrWJ$jO*p>sGsD}t&^xRs`gk)T1Z<)w&gU^1Kns?@k0BfW&6=_;|2rZsa z5=tg~@**t`I=W7hzsm8{WE6`$5nZ_wB-YRBNES2Yz#cyA6f0C{UpEUT%HxO!$+y!C zq=I4<4O~+#q#_y3>0^+Jem{@_bBWRoceD?aF0~g2(%bg0hORyIWWwh-l`WYx9!AZ5 zW^A*l!}gX-2i}`i?N05$hMI4hOo^nQGqqN_@zEjSJ0$Sgu+SEYj=x+;w*9RCy6p=~ z{>-J!9kY-oi|rlw1ms0S{=-Gq1m3{hy(H!4wVaOe+`Aj^)*+dT6-;9Am_=DxK(z=e=y-_fs0{dHw&pL#NXrOMIw!`#;`b#if2M@AwAjsEb+a9`HgG+)e8!dG0waUynsQeh0nr3rNFA}Sje|qOk z?Nh}L`XdJTiM}J(huerei!C!ONu$-PC3H4?b0k~mYTK=j@2n8fkV9=E?(O5&bh$tq zzPRib|K%E$^!9a9)@aa1C+JyKO{VIjB8}^}rW#fc7KLZJ5+7C8jSmR*eo&oB6kv=w zP5b*^6u4kr)Jp1mdeL@D;mu&nLrLz;gXl`7y0pAWhwepQ7nHVSAPctX#W-HRY}S)` zOY&l-VoW7w`Wtr;zm`MkK17RO&UrUbOaaR5yJhj2?fnjIu!ZElBC}7jpsQ!8Ox>_; zrp{dG!rpz_-N0LSwPb7F=;$d16s!CAr-*bf?VIF0?H-uxXL4Wa={NSO&S;LG@^HRY(-n)Tc`$CTedt1=OBDiZ(#oQOFbTaPhW8I zSO>c0%k}IrEPQvsR@R&wOjqoEYam|iMr6d0r;-QVd$+vv71N5Q8nT81#*s|sxZ>z4 zaI%F=V*kobPOsy_^!b9Gpj%UOF@js~EeC`Mi%Uft_Tmk5)|1gv!+HpqWTZr=_LEwZ z$Be$fofYTZTPR#Tqc8Dl8xk4*50{>Q3%0d9oW*7Ef Dw0-LZ literal 7571 zcmcIp2{e@7`zMuDk|?_gAz8{c7(|35RQeJI2{UAjVJ3UDeC=7XFEdG!G$F~@DNELw z>`Y_HI%Cfg2ATgmzWvtoKj*)l|9kHHo-_A-?tP!hz;n;f9}i*bPQd;cFO?x$-)9*VS%wgAS@8z8P5U(8bMiE8R!MN zAS^Hl^8%jn5Eu*s1D*s(JkThn%nNC18f%IK8s0P(Xf3yoBW<{m&=W`p9;73WhZC;{ zY;SfL3k-JB1IC;#9tK?T1Xw%;Mu>+IfF}cf(&OGK4|hHf1fK^IXv}$$NYhv(5@>iN zeiFs+;Vpo`2qNwaB7B7qenN;qVMLG!;(-VvL=^D|~GCtV}DPCk8NSZpK*(Y9F0 zrbOAgROL1S#zKGrCPIL@2^0o_!6;L;ssLM7so$zn|D#&NqFUotE#&4Wt?TvL<_#Ck z8gri^t8Mzm?U#)^46YCjj7WxtomVb*UAf$Ctlw*LssEbp zpxMPiGaa(IHs!h|B_2W{z<}>i7!(SFF?0hmVxc~2q4xa`@R+6Qn3d|dwaSF8@}!;8 zBpx|A3FzB0$(WpEFs2+7r|u|DJ6@nVou@i0&|Kta;Bqsra8U_FMNdCP&)}nD`L?q7x3YmIu=Pq{D_3yq zwcyqpp)I`dR)O&Ldy(xAqT3~++ohoGveVlYr?;!cwrj;11Q>%rV6?9)L)&{%`Kw2-5J>LSD=BLkY>mQ1XW?y(u=FI}Z&jLW5@N zQs;U5DwGF;yphvRIqT2~wlT1yO$>~;w5d#*DyR%9(UQeXCpm1ye#o*;wt!N((%e;G zqLZV7{91>4>_&W?(T)`E#nPvp^auN=(%D>eN)P9ilValHo+EzTc%>Yk&+hUqhHUBd z%tUL|r7JKsmlVTwvG13{Q$eSjP^z^eenQGg$`pH&9w1E>diF_9ru|0MJlt~lcn7Y} zuvKU&>;jNVMXZBp-q<2y83*{#yv-X;sPe?~0n(`Z0IU&ll z`Hw@gsLQ8u+u1Uu?=W%wEylRp(Z_$~L)kl_1*=cIvS3I=eW zwk9W&uD<;jF;d22<*IjVwQ*(ZMTSCwmWh%gZLdw_eH;7Fg?{gU=HN(6&LEoINYgcvb2dhI$UeXv7QOGVDA0;H$RpP(ZWDa#7g`u}+wi zwW&>H|scU~alE#?nMwLi5U+X^thG32?uc(r(7?LFlZ{wQy>r;`l9wBi^Ry2HjGAy&K zDxtlmzOm}@(WS$X|Bj#jOGo}c3N+*f$3J}BmF|e~u~|$O#K1bqZWUJdI@2$~^QP?c zFToGua4CVg2XX0ABX-D$4&hGi+6|GKCeqK9_2h-T%?(Q@%TpavWrhnLMvZN=1NBU> ztY{&s+KJ1zJ{;1Ar}SHQBfONi74uR<%Yuk;rz|}cRQhUmV8#0|qBgSZ6giwShdvzq zYIVAPoMn2=8y-7a<-cuYu+UUD15n5gI zi?1=Rf8AodpBVb3gN;&a`);UWm$i1~$>}Rn3ar}==5H?Zez!hb+zAsE}kEDAQ=6z`>9P(%t<t)92}2X zEJ*15$Q;}G`m~XxbPKlh`$ov!p1AX`z+08=(Ko1J662sJqvx!UGK10Ftxum60vK3* z{|98l(%998Cyaz}#l$wo9!ZBiEs$4hJy}nraG{bV7laz;##j}hN2&sqHECFTS$5mf zo1^Fa0k^?=trugWi#-%Ngs;Srf}+Hv1Y5ai*iy@51Z|uJUYA-u^;Rqfy71xn;J3#v zwtz@~b@%+Q>*qsfw1qk~Y5=*KR>OPA&_A=4m1l+#Dh9=rh&IXON8xF8mh`c2P!EGakYO-iAreC7mKM?{T4w~h>II>JiV`%4uBsVjH zn5%8uI2^nY!EGb@z{L9++7SpdlqcI68Y;YhATT4}>dsj=cbO!arrHz={sN&@A}0&v z6{lVZW*_-nz-k*<&Rg8&@3cgAovYPv^;~B07U5b@&ez?gU`KxC#~n-{X0fI!fguhx z);yzwHTv0Ho|xV9=-%O#Ps{cZ7=`_12N51)-DeoIcd2JH6-uG1TcYTMz*6Ek?a^YU zn@{h_#{ke$P>~5Ci^3wdZTi*6TK<4D+|0Z^cepv|+)0)U4vDB&#{oo_{%OKnr7g6b zU~IRQtf-4~Pz+B|mjqt?JTmY`85DHNHoswc+%Cu4sCy^q6Pn7n5YD@KnxY3dre7AC z0{}lH*s*KePmAdlE6!h48cEe2rN`{9Y zq$*tK64fvF4CCH0u|=7Fvi4Am7TGnDud%l5Xh-&M;WYhLN(vwx_)zn%#e0#E-3zjR zIdn@JO>?D`aGJwGD6fZapfasG8Tt>&+4Q!i)}CaBH2GEd1GSZ=H<n`dyyrBh7NgX3 z9#0=i>q?B7XUP5jxGnmMF4bA+IjP~{l>D*?PN=T2&sn0CchR>NVdm9F2aI36T|VyG zM2a~kE!284<1o)(Urn>%U)hgn=<%0>W`7{^%E{)a9VlrI{_VI@jZiW3Phc%vbN%qn zSvbTt{KL%6c0e1K*h>_X769dnVa=X_H%yx%A|w;KO#iv6J=>o^yJ+C8ZE$ zdY}yeyll&loS}{h^ja_Qd5%87(%I)s1N>n*yiMO`( zm-O_RjoDR1ghsP-Aov~AWq?Kk*~*OO4}KEhk2}R0tu^_ph-=lO{sCmzC&{Sg#GwRV z_bo%Jj#79+nLOo(A)E_EghUT?C%XkDZgUZ{Cq3E$?`eN(ime2Y6>{eoE6j+A((_PS zhsyHm^zl1B%zg8#UKM#5yl_uuZ{MtKPan@FH8I`ZAX({`u&gDNlKMBeR{niS8YCgM z>k+MA+HQ$1?F3H8KP0v8tyFHMN^_cd&b86Zp*r_t95gI*dHLU0OV&P_9XHM~f{Tye z*DVy>5e7#e6ybEif~g(@^k7qE-*4iLLc)H3a|cv<@;|7frFIild|-qPNy&5}V^d zI?Ltn>5K$wgo$qUw&exEuz?Kj>DCcRsI5WbST9gv{m8AyeJUtP?_0j1_Wwm!7%{;>byW&dl zAM?)iq)4Kvc1l@d!8NNSP2g-A|MTb1aM1vo%yabDAE-<&(8X$kl;OhsxYb?UKq*Pd z5)v0qxmTH?*)E^5Mpw*(E$;CArLKLscBbz_)csmng7ufzR&FJ0^dXJ>RYz`j-1)+Rea!DO+u$hux`nbX3DnyOGj}gE%0WK=y`*RM4LAK`)M-f}g3ut}DRFGnx z`qU1>=;Bh<$fn`M5V;?waG{Kz-Y^8?Fxo;`rCzpVnFV7B`P+hQp#wW!%s;C>1-=5#p%}W13@vEU8%jLB(y=O+jA)>O|+SURM&LeH@oMh6#*(P zJ^5PO^GsE3*K_^xe*GPpzLZ0j;DnflA*gJ*Na33WGru%~xqb@HbtRs@% ztPW)Uop}t22d?xe&dRaJ$@J3a{+3^~L<@6{6i4Q`H?Aqvu3VB4Q1kWKPOa+~ej8rI zLOC(PkHn6xA`m)~dA|$Gm%g;v%dVBQ-&2pRup63Q^Zz<74 z<3&@P-IMQw=TGeAQY+!$-iqhD$6smv&a>3C#38(n*kDc8cUOVKdopG6an9K~x+>;O z(hX!j_=e(_4)6lR?=a-*w;K6m1-~`p<)M=2`Hac zq3?ncnD7mG4Uil*_y9^PcOkleK ztjQ-3K5)Dzb}_R!hlG?DD!&ZYhgaN8FjqAi7FqI5k)F?{pS`}9;@Q<|uSu;V;*;=U zBd&iV2!FAGth@i}>#D!;<-dHn8~G2mRN8Iru)Q<8vtx>{hycz<_h@El*GP-{y>;}! z)cT-gASD)Z9`n|w%%sN@;OWSgG~?LgTBTk7?u@t2B1Y$L>~R`Qs>a7gnXx@u0)}|b zYYTY{`<|uCi|Q6mWIvEP*W(B!YjyfVKV!aHyaPr5YrN^8C?LKGcW%6R`2t7`g+do!Suw>q8A~uTbIxA zzgNxehExStL)a0f+5L3pPQbSJ*X)Gv_E$Y{+Su9YzTWj^q@=6VQ=7M_a=F#B(oNOg z?)0?UqB^gnZ&MvUISe#TS%23SxOugRQLi8LGp5G#WXN@$csfU~#GOomyL;71xQ}61 z@q_VjTGDyD8al#zyZ=G>wFZ`E`7}v`s?^?wAAbN`^u*8Q99lDvtbpkFfV#6++-egx1pBZ#!~~hTu>e}!JQ`kO&X*s~-Ek&s^2XW( zr&A)=Gv0{g&ALk~wmbS(Y0q^<_7TY0Ky8{2yZO^SI5>RPdUq+MY?VP-J~DBqZ)7yS zDBSp206#s-2OK5sli^tH9B}A>M)0ErBL$FaFV7o;PhjO^-~)*RbypIu?l$8oOLj`N zqRZ*zkz3CK(zGNZf+9M&odH3UPd-5r_ouOw1K)$>D(p^dzS3{F51-yJ$QQ4vOiNh? z>91BfBF4pdZM5s=#Nt4-W<%{qg8LCA`q~iCoX3MzzK`zgfwrTq3-D>G14F8zzo;=D z0a}_VC{)V`C)9>Niw?iuLca=9y72f6m&zL5>H zvB`We<@9~S*Q)nM*uFi0sA+#JYNWj}qew35Of*+cL{&wBwD_R%)&J_VB{m4sCeGpyg$%U!V*4QfoG$ z>H24DJTYm_R83fhUePb#(?C#y80~{AX`u?o9UkV%Xc(QSsFbmJ#!$cV{2B-4+>BiL zdc3r)g4<+(6AYbQTrkoQv2l%^a*u|sp68fcIj|fiv5?L-6F7O~Zop`M+Nv0MPc;o2 zZuO@1h!N=6#DVa4V7tlbt3ipvxAuMexdiEHB#1-W3r-fZD)>r8k~0U p2)) +}) + test_that("color_num has an effect only for non-numeric features", { p1 <- potential_interactions(x, "Sepal.Width", color_num = TRUE) p2 <- potential_interactions(x, "Sepal.Width", color_num = FALSE) @@ -35,15 +41,27 @@ test_that("potential_interactions respects true SHAP interactions", { expect_equal(i1, i2, tolerance = 1e-5) }) -test_that("heuristic_in_bin() returns R-squared adjusted", { +test_that("heuristic_in_bin() returns R-squared", { fit_lm <- lm(Sepal.Length ~ Species, data = iris) expect_equal( unname(heuristic_in_bin(iris$Species, iris$Sepal.Length)[1, 1]), + summary(fit_lm)[["r.squared"]] + ) + expect_equal( + unname(heuristic_in_bin(iris$Species, iris$Sepal.Length, adjusted = TRUE)[1, 1]), summary(fit_lm)[["adj.r.squared"]] ) + # Scaled expect_equal( unname(heuristic_in_bin(iris$Species, iris$Sepal.Length, scale = TRUE)[1, 1]), + summary(fit_lm)[["r.squared"]] * var(iris$Sepal.Length) + ) + expect_equal( + unname( + heuristic_in_bin( + iris$Species, iris$Sepal.Length, scale = TRUE, adjusted = TRUE)[1, 1] + ), summary(fit_lm)[["adj.r.squared"]] * var(iris$Sepal.Length) ) }) @@ -52,25 +70,35 @@ test_that("Failing heuristic_in_bin() returns NA", { expect_equal(heuristic_in_bin(0, 1:2), cbind(stat = NA, n = 0)) }) -test_that("heuristic() returns average R-squared adjusted", { +test_that("heuristic() returns average R-squared", { ix <- c(rep(1, 60), rep(2, 90)) y <- split(iris$Sepal.Length, ix) x1 <- split(iris$Sepal.Width, ix) x2 <- split(iris$Species, ix) f <- function(y, x) { - summary(lm(y ~ x))[["adj.r.squared"]] + summary(lm(y ~ x))[["r.squared"]] } expect_equal( heuristic( - iris$Sepal.Width, iris$Sepal.Length, bins = ix, color_num = TRUE, scale = FALSE + iris$Sepal.Width, + iris$Sepal.Length, + bins = ix, + color_num = TRUE, + scale = FALSE, + adjusted = FALSE ), weighted.mean(mapply(f, y, x1), c(60, 90)) ) expect_equal( heuristic( - iris$Species, iris$Sepal.Length, bins = ix, color_num = FALSE, scale = FALSE + iris$Species, + iris$Sepal.Length, + bins = ix, + color_num = FALSE, + scale = FALSE, + adjusted = FALSE ), weighted.mean(mapply(f, y, x2), c(60, 90)) ) diff --git a/vignettes/basic_use.Rmd b/vignettes/basic_use.Rmd index 9f99858..b993f7f 100644 --- a/vignettes/basic_use.Rmd +++ b/vignettes/basic_use.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ## Overview -SHAP (SHapley Additive exPlanations, see @lundberg2017) is an ingenious way to study black box models. SHAP values decompose - as fair as possible - predictions into additive feature contributions. Crunching SHAP values requires clever algorithms by clever people. Analyzing them, however, is super easy with the right visualizations. {shapviz} offers the latter: +SHAP (SHapley Additive exPlanations, see @lundberg2017) is an ingenious way to study black box models. SHAP values decompose - as fair as possible - predictions into additive feature contributions. Crunching SHAP values requires clever algorithms. Analyzing them, however, is super easy with the right visualizations. {shapviz} offers the latter: - `sv_importance()`: Importance plots (bar plots and/or beeswarm plots) to study variable importance. - `sv_dependence()` and `sv_dependence2D()`: Dependence plots to study feature effects and interactions. diff --git a/vignettes/multiple_output.Rmd b/vignettes/multiple_output.Rmd index b664a9c..084eb30 100644 --- a/vignettes/multiple_output.Rmd +++ b/vignettes/multiple_output.Rmd @@ -98,7 +98,7 @@ library(patchwork) library(ranger) # Model -fit <- ranger(Species ~ ., data = iris, num.trees = 100, probability = TRUE) +fit <- ranger(Species ~ ., data = iris, num.trees = 100, probability = TRUE, seed = 1) # "mshapviz" object x <- kernelshap(fit, X = iris[-5], bg_X = iris)