From 688a185a1f82679439268813b11f1992eef90e93 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Sat, 21 Oct 2023 12:41:58 +0200 Subject: [PATCH] Set verbose = TRUE in perm_importance() --- NEWS.md | 1 + R/perm_importance.R | 10 +- man/perm_importance.Rd | 10 +- tests/testthat/test_perm_importance.R | 143 ++++++++++++++++---------- 4 files changed, 101 insertions(+), 63 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5b71d8b2..b2113f84 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,7 @@ - Missing grid values: `partial_dep()` and `ice()` have received a `na.rm` argument that controls if missing values are dropped during grid creation. The default `TRUE` is compatible with earlier releases. - Missing values in `hstats()`: Discrete variables with missings would cause `rowsum()` to launch repeated warnings. This case is now catched. - The position of some function arguments have changed. +- `perm_importance()`: The default of `verbose` is `TRUE` again. # hstats 0.3.0 diff --git a/R/perm_importance.R b/R/perm_importance.R index dd262e94..4731e9ae 100644 --- a/R/perm_importance.R +++ b/R/perm_importance.R @@ -39,7 +39,7 @@ #' #' # Groups of features can be passed as named list #' v <- list(petal = c("Petal.Length", "Petal.Width"), species = "Species") -#' s <- perm_importance(fit, X = iris, y = "Sepal.Length", v = v) +#' s <- perm_importance(fit, X = iris, y = "Sepal.Length", v = v, verbose = FALSE) #' s #' plot(s) #' @@ -60,7 +60,7 @@ perm_importance.default <- function(object, X, y, v = NULL, loss = "squared_error", m_rep = 4L, agg_cols = FALSE, normalize = FALSE, n_max = 10000L, - w = NULL, verbose = FALSE, ...) { + w = NULL, verbose = TRUE, ...) { stopifnot( is.matrix(X) || is.data.frame(X), is.function(pred_fun), @@ -203,7 +203,7 @@ perm_importance.ranger <- function(object, X, y, v = NULL, loss = "squared_error", m_rep = 4L, agg_cols = FALSE, normalize = FALSE, n_max = 10000L, - w = NULL, verbose = FALSE, ...) { + w = NULL, verbose = TRUE, ...) { perm_importance.default( object = object, X = X, @@ -228,7 +228,7 @@ perm_importance.Learner <- function(object, X, y, v = NULL, loss = "squared_error", m_rep = 4L, agg_cols = FALSE, normalize = FALSE, n_max = 10000L, - w = NULL, verbose = FALSE, ...) { + w = NULL, verbose = TRUE, ...) { if (is.null(pred_fun)) { pred_fun <- mlr3_pred_fun(object, X = X) } @@ -262,7 +262,7 @@ perm_importance.explainer <- function(object, normalize = FALSE, n_max = 10000L, w = object[["weights"]], - verbose = FALSE, + verbose = TRUE, ...) { perm_importance.default( object = object[["model"]], diff --git a/man/perm_importance.Rd b/man/perm_importance.Rd index 9281bbd3..3b0ddfe9 100644 --- a/man/perm_importance.Rd +++ b/man/perm_importance.Rd @@ -22,7 +22,7 @@ perm_importance(object, ...) normalize = FALSE, n_max = 10000L, w = NULL, - verbose = FALSE, + verbose = TRUE, ... ) @@ -38,7 +38,7 @@ perm_importance(object, ...) normalize = FALSE, n_max = 10000L, w = NULL, - verbose = FALSE, + verbose = TRUE, ... ) @@ -54,7 +54,7 @@ perm_importance(object, ...) normalize = FALSE, n_max = 10000L, w = NULL, - verbose = FALSE, + verbose = TRUE, ... ) @@ -70,7 +70,7 @@ perm_importance(object, ...) normalize = FALSE, n_max = 10000L, w = object[["weights"]], - verbose = FALSE, + verbose = TRUE, ... ) } @@ -200,7 +200,7 @@ plot(s, err_type = "SD") # Standard deviations instead of standard errors # Groups of features can be passed as named list v <- list(petal = c("Petal.Length", "Petal.Width"), species = "Species") -s <- perm_importance(fit, X = iris, y = "Sepal.Length", v = v) +s <- perm_importance(fit, X = iris, y = "Sepal.Length", v = v, verbose = FALSE) s plot(s) diff --git a/tests/testthat/test_perm_importance.R b/tests/testthat/test_perm_importance.R index 96731e52..155d7377 100644 --- a/tests/testthat/test_perm_importance.R +++ b/tests/testthat/test_perm_importance.R @@ -4,15 +4,22 @@ v <- setdiff(names(iris), "Sepal.Length") y <- iris$Sepal.Length yy <- "Sepal.Length" set.seed(1L) -s1 <- perm_importance(fit, X = iris[-1L], y = y) +s1 <- perm_importance(fit, X = iris[-1L], y = y, verbose = FALSE) test_that("print() does not give error (univariate)", { capture_output(expect_no_error(print(s1))) }) +test_that("verbose does not produce an error", { + expect_no_error(capture_output( + perm_importance(fit, X = iris[-1L], y = y, verbose = TRUE) + ) + ) +}) + test_that("normalize works (univariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[-1L], y = y, normalize = TRUE) + s2 <- perm_importance(fit, X = iris[-1L], y = y, normalize = TRUE, verbose = FALSE) perf <- average_loss(fit, X = iris, y = y)$M expect_equal(s1$M, s2$M * drop(perf)) expect_equal(s1$SE, s2$SE * drop(perf)) @@ -20,13 +27,13 @@ test_that("normalize works (univariate)", { test_that("v can be selected (univariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris, y = y, v = v) + s2 <- perm_importance(fit, X = iris, y = y, v = v, verbose = FALSE) expect_equal(s1, s2) }) test_that("y can also be passed as name (univariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris, y = yy) + s2 <- perm_importance(fit, X = iris, y = yy, verbose = FALSE) expect_equal(s1, s2) }) @@ -36,19 +43,21 @@ test_that("results are positive for modeled features and zero otherwise (univari }) test_that("perm_importance() raises some errors (univariate)", { - expect_error(perm_importance(fit, X = iris[-1L], y = 1:10)) - expect_error(perm_importance(fit, X = iris[-1], y = "Hello")) + expect_error(perm_importance(fit, X = iris[-1L], y = 1:10, verbose = FALSE)) + expect_error(perm_importance(fit, X = iris[-1], y = "Hello", verbose = FALSE)) }) test_that("constant weights is same as unweighted (univariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[-1L], y = y, w = rep(2, nrow(iris))) + s2 <- perm_importance( + fit, X = iris[-1L], y = y, w = rep(2, nrow(iris)), verbose = FALSE + ) expect_equal(s1, s2) }) test_that("non-constant weights is different from unweighted (univariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris, y = yy, w = "Petal.Width") + s2 <- perm_importance(fit, X = iris, y = yy, w = "Petal.Width", verbose = FALSE) set.seed(1L) s3 <- perm_importance( @@ -56,12 +65,13 @@ test_that("non-constant weights is different from unweighted (univariate)", { X = iris[-1L], v = setdiff(colnames(iris[-1L]), "Petal.Width"), y = y, - w = iris$Petal.Width + w = iris$Petal.Width, + verbose = FALSE ) set.seed(1L) s4 <- perm_importance( - fit, X = iris, v = colnames(iris[-1L]), y = y, w = "Petal.Width" + fit, X = iris, v = colnames(iris[-1L]), y = y, w = "Petal.Width", verbose = FALSE ) expect_false(identical(s1, s2)) @@ -71,7 +81,7 @@ test_that("non-constant weights is different from unweighted (univariate)", { test_that("results reacts to `m_rep` (univariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[-1L], y = y, m_rep = 100L) + s2 <- perm_importance(fit, X = iris[-1L], y = y, m_rep = 100L, verbose = FALSE) expect_false(identical(s1$M, s2$M)) vv <- c("Species", "Sepal.Width") expect_true(all(s1$SE[vv, ] > s2$SE[vv, ])) @@ -79,15 +89,19 @@ test_that("results reacts to `m_rep` (univariate)", { test_that("perm_importance() reacts to `loss` (univariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[-1L], y = y, loss = "gamma") + s2 <- perm_importance(fit, X = iris[-1L], y = y, loss = "gamma", verbose = FALSE) expect_false(identical(s1, s2)) }) test_that("perm_importance() accepts functions as losses (univariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[-1L], y = y, m_rep = 2L, loss = loss_gamma) + s2 <- perm_importance( + fit, X = iris[-1L], y = y, m_rep = 2L, loss = loss_gamma, verbose = FALSE + ) set.seed(1L) - s3 <- perm_importance(fit, X = iris[-1L], y = y, m_rep = 2L, loss = "gamma") + s3 <- perm_importance( + fit, X = iris[-1L], y = y, m_rep = 2L, loss = "gamma", verbose = FALSE + ) expect_equal(s2, s3) }) @@ -98,11 +112,11 @@ test_that("plot() gives ggplot object (univariate)", { test_that("Subsetting has an impact (univariate)", { set.seed(1L) s2 <- perm_importance( - fit, X = iris[-1L], y = y, m_rep = 1L, n_max = 50, w = "Petal.Width" + fit, X = iris[-1L], y = y, m_rep = 1L, n_max = 50, w = "Petal.Width", verbose = FALSE ) set.seed(1L) s3 <- perm_importance( - fit, X = iris[-1L], y = y, m_rep = 2L, n_max = 100, w = "Petal.Width" + fit, X = iris[-1L], y = y, m_rep = 2L, n_max = 100, w = "Petal.Width", verbose = FALSE ) expect_false(identical(s2, s3)) }) @@ -112,13 +126,13 @@ test_that("groups of variables work as well", { v2 <- setNames(as.list(v), v) v3 <- c(v2, list(petal = c("Petal.Width", "Petal.Length"))) set.seed(1L) - s1b <- perm_importance(fit, v = v, X = iris, y = y) + s1b <- perm_importance(fit, v = v, X = iris, y = y, verbose = FALSE) set.seed(1L) - s2 <- perm_importance(fit, v = v2, X = iris, y = y) + s2 <- perm_importance(fit, v = v2, X = iris, y = y, verbose = FALSE) expect_equal(s1b, s2) set.seed(1L) - s3 <- perm_importance(fit, v = v3, X = iris, y = y) + s3 <- perm_importance(fit, v = v3, X = iris, y = y, verbose = FALSE) expect_equal(s1b$M[v, , drop = FALSE], s3$M[v, , drop = FALSE]) expect_equal(sort(rownames(s3$M)), sort(names(v3))) @@ -129,7 +143,7 @@ test_that("matrix case works as well", { fit <- lm.fit(x = X, y = y) pred_fun <- function(m, X) X %*% m$coefficients expect_no_error( - perm_importance(fit, X = X, y = y, pred_fun = pred_fun) + perm_importance(fit, X = X, y = y, pred_fun = pred_fun, verbose = FALSE) ) }) @@ -141,7 +155,8 @@ test_that("non-numeric predictions can work as well (classification error)", { X = iris, y = iris$Species, pred_fun = function(m, X) rep("setosa", times = nrow(X)), - loss = "classification_error" + loss = "classification_error", + verbose = FALSE )$M), 0 ) @@ -156,7 +171,7 @@ yy <- colnames(y) fit <- lm(y ~ Petal.Length + Species, data = iris) v <- c("Petal.Length", "Petal.Width", "Species") set.seed(1L) -s1 <- perm_importance(fit, X = iris[3:5], y = y) +s1 <- perm_importance(fit, X = iris[3:5], y = y, verbose = FALSE) perf <- average_loss(fit, X = iris, y = y)$M test_that("print() does not give error (multivariate)", { @@ -165,17 +180,17 @@ test_that("print() does not give error (multivariate)", { test_that("response can be passed as vector (multivariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris, y = yy) + s2 <- perm_importance(fit, X = iris, y = yy, verbose = FALSE) expect_equal(s1, s2) set.seed(1L) - s3 <- perm_importance(fit, X = iris, y = yy, v = colnames(iris)) + s3 <- perm_importance(fit, X = iris, y = yy, v = colnames(iris), verbose = FALSE) expect_true(nrow(s2$M) < nrow(s3$M)) }) test_that("agg_cols works (multivariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[3:5], y = y, agg_cols = TRUE) + s2 <- perm_importance(fit, X = iris[3:5], y = y, agg_cols = TRUE, verbose = FALSE) expect_equal(rowSums(s1$M), drop(s2$M)) }) @@ -183,7 +198,7 @@ test_that("normalize works (multivariate, non-aggregated)", { i1 <- s1$M / matrix(perf, nrow = 3L, ncol = 2L, byrow = TRUE) set.seed(1L) - s2 <- perm_importance(fit, X = iris[3:5], y = y, normalize = TRUE) + s2 <- perm_importance(fit, X = iris[3:5], y = y, normalize = TRUE, verbose = FALSE) i2 <- s2$M vv <- rownames(i1) expect_equal(i1, i2[vv, , drop = FALSE]) @@ -191,19 +206,21 @@ test_that("normalize works (multivariate, non-aggregated)", { test_that("normalize works (multivariate, aggregated)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[3:5], y = y, agg_cols = TRUE) + s2 <- perm_importance(fit, X = iris[3:5], y = y, agg_cols = TRUE, verbose = FALSE) perf2 <- rowSums(average_loss(fit, X = iris, y = y)$M) i2 <- s2$M / perf2 set.seed(1L) - s3 <- perm_importance(fit, X = iris[3:5], y = y, normalize = TRUE, agg_cols = TRUE) + s3 <- perm_importance( + fit, X = iris[3:5], y = y, normalize = TRUE, agg_cols = TRUE, verbose = FALSE + ) i3 <- s3$M expect_equal(i2, i3) }) test_that("v can be selected (multivariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris, y = y, v = v) + s2 <- perm_importance(fit, X = iris, y = y, v = v, verbose = FALSE) expect_equal(s1, s2) }) @@ -213,20 +230,22 @@ test_that("results are positive for modeled features and zero otherwise (multiva }) test_that("perm_importance() raises some errors (multivariate)", { - expect_error(perm_importance(fit, X = iris[3:5], y = 1:10)) - expect_error(perm_importance(fit, X = iris[3:5], y = "hi")) - expect_error(perm_importance(fit, X = iris, y = rev(yy))) + expect_error(perm_importance(fit, X = iris[3:5], y = 1:10, verbose = FALSE)) + expect_error(perm_importance(fit, X = iris[3:5], y = "hi", verbose = FALSE)) + expect_error(perm_importance(fit, X = iris, y = rev(yy), verbose = FALSE)) }) test_that("constant weights is same as unweighted (multivariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[3:5], y = y, w = rep(2, nrow(iris))) + s2 <- perm_importance( + fit, X = iris[3:5], y = y, w = rep(2, nrow(iris)), verbose = FALSE + ) expect_equal(s1, s2) }) test_that("non-constant weights is different from unweighted (multivariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[3:5], y = y, w = "Petal.Width") + s2 <- perm_importance(fit, X = iris[3:5], y = y, w = "Petal.Width", verbose = FALSE) set.seed(1L) s3 <- perm_importance( @@ -234,11 +253,12 @@ test_that("non-constant weights is different from unweighted (multivariate)", { X = iris[3:5], v = setdiff(colnames(iris[3:5]), "Petal.Width"), y = y, - w = iris$Petal.Width + w = iris$Petal.Width, + verbose = FALSE ) set.seed(1L) s4 <- perm_importance( - fit, X = iris, v = colnames(iris[3:5]), y = y, w = "Petal.Width" + fit, X = iris, v = colnames(iris[3:5]), y = y, w = "Petal.Width", verbose = FALSE ) expect_false(identical(s1, s2)) @@ -248,7 +268,7 @@ test_that("non-constant weights is different from unweighted (multivariate)", { test_that("perm_importance() reacts to `m_rep` (multivariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[3:5], y = y, m_rep = 1L) + s2 <- perm_importance(fit, X = iris[3:5], y = y, m_rep = 1L, verbose = FALSE) expect_false(identical(s1$M, s2$M)) expect_true(!anyNA(s1$SE)) expect_true(all(is.na(s2$SE))) @@ -256,16 +276,22 @@ test_that("perm_importance() reacts to `m_rep` (multivariate)", { test_that("perm_importance() reacts to `loss` (multivariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[3:5], y = y, m_rep = 2L, loss = "gamma") + s2 <- perm_importance( + fit, X = iris[3:5], y = y, m_rep = 2L, loss = "gamma", verbose = FALSE + ) expect_false(identical(s1, s2)) }) test_that("perm_importance() accepts functions as losses (multivariate)", { set.seed(1L) - s2 <- perm_importance(fit, X = iris[3:5], y = y, m_rep = 2L, loss = loss_gamma) + s2 <- perm_importance( + fit, X = iris[3:5], y = y, m_rep = 2L, loss = loss_gamma, verbose = FALSE + ) set.seed(1L) - s3 <- perm_importance(fit, X = iris[3:5], y = y, m_rep = 2L, loss = "gamma") + s3 <- perm_importance( + fit, X = iris[3:5], y = y, m_rep = 2L, loss = "gamma", verbose = FALSE + ) expect_equal(s2, s3) }) @@ -275,7 +301,7 @@ test_that("plot() gives ggplot object (multivariate)", { expect_s3_class(plot(s1, err_type = "No"), "ggplot") expect_s3_class(plot(s1, err_type = "No", swap_dim = TRUE), "ggplot") - s2 <- perm_importance(fit, X = iris[3:5], y = y, m_rep = 1L) + s2 <- perm_importance(fit, X = iris[3:5], y = y, m_rep = 1L, verbose = FALSE) expect_s3_class(plot(s2), "ggplot") expect_s3_class(plot(s2, swap_dim = TRUE, fill = "red"), "ggplot") }) @@ -283,11 +309,11 @@ test_that("plot() gives ggplot object (multivariate)", { test_that("Subsetting has an impact (multivariate)", { set.seed(1L) s2 <- perm_importance( - fit, X = iris[3:5], y = y, m_rep = 1L, n_max = 50, w = "Petal.Width" + fit, X = iris[3:5], y = y, m_rep = 1L, n_max = 50, w = "Petal.Width", verbose = FALSE ) set.seed(1L) s3 <- perm_importance( - fit, v = v, X = iris, y = y, m_rep = 2L, n_max = 100, w = "Petal.Width" + fit, v = v, X = iris, y = y, m_rep = 2L, n_max = 100, w = "Petal.Width", verbose = FALSE ) expect_false(identical(s2, s3)) }) @@ -296,11 +322,11 @@ test_that("groups of variables work as well", { v2 <- setNames(as.list(v), v) v3 <- c(v2, list(petal = c("Petal.Width", "Petal.Length"))) set.seed(1L) - s2 <- perm_importance(fit, v = v2, X = iris, y = y) + s2 <- perm_importance(fit, v = v2, X = iris, y = y, verbose = FALSE) expect_equal(s1, s2) set.seed(1L) - s3 <- perm_importance(fit, v = v3, X = iris, y = y) + s3 <- perm_importance(fit, v = v3, X = iris, y = y, verbose = FALSE) expect_equal(s1$M[v, , drop = FALSE], s3$M[v, , drop = FALSE]) expect_equal(sort(rownames(s3$M)), sort(names(v3))) }) @@ -312,11 +338,11 @@ test_that("mlogloss works with either matrix y or vector y", { Y <- model.matrix(~ y + 0) set.seed(1L) s1 <- perm_importance( - NULL, v = "z", X = X, y = Y, loss = "mlogloss", pred_fun = pred_fun + NULL, v = "z", X = X, y = Y, loss = "mlogloss", pred_fun = pred_fun, verbose = FALSE ) set.seed(1L) s2 <- perm_importance( - NULL, v = "z", X = X, y = y, loss = "mlogloss", pred_fun = pred_fun + NULL, v = "z", X = X, y = y, loss = "mlogloss", pred_fun = pred_fun, verbose = FALSE ) expect_equal(s1, s2) }) @@ -330,9 +356,13 @@ test_that("Single output multiple models works without recycling y", { pf <- function(m, x) sapply(fit, FUN = predict, newdata = x) set.seed(1L) - s1 <- perm_importance(fit, X = iris[-1L], y = Y, loss = "poisson", pred_fun = pf) + s1 <- perm_importance( + fit, X = iris[-1L], y = Y, loss = "poisson", pred_fun = pf, verbose = FALSE + ) set.seed(1L) - s2 <- perm_importance(fit, X = iris[-1L], y = y, loss = "poisson", pred_fun = pf) + s2 <- perm_importance( + fit, X = iris[-1L], y = y, loss = "poisson", pred_fun = pf, verbose = FALSE + ) expect_equal(s1, s2) }) @@ -345,22 +375,29 @@ test_that("loss_mlogloss() is in line with loss_logloss() in binary case", { if (multi) cbind(no = 1 - out, yes = out) else out } set.seed(1L) - imp1 <- perm_importance(fit, X = iris[-5L], y = y, pred_fun = pf, loss = "logloss") + imp1 <- perm_importance( + fit, X = iris[-5L], y = y, pred_fun = pf, loss = "logloss", verbose = FALSE + ) set.seed(1L) imp2 <- perm_importance( - fit, X = iris[-5L], y = Y, pred_fun = pf, loss = "mlogloss", multi = TRUE + fit, X = iris[-5L], y = Y, pred_fun = pf, + loss = "mlogloss", multi = TRUE, verbose = FALSE ) expect_equal(imp1, imp2) }) test_that("perm_importance() works with missing values", { # Univariate model - X <- data.frame(x1 = 1:6, x2 = c(NA, 1, 2, 1, 1, 3), x3 = factor(c("A", NA, NA, "B", "A", "A"))) + X <- data.frame( + x1 = 1:6, x2 = c(NA, 1, 2, 1, 1, 3), x3 = factor(c("A", NA, NA, "B", "A", "A")) + ) y <- 1:6 pf <- function(fit, x) x$x1 fit <- "a model" set.seed(1L) - expect_no_error(r <- perm_importance(fit, X = X, y = y, pred_fun = pf)) + expect_no_error( + r <- perm_importance(fit, X = X, y = y, pred_fun = pf, verbose = FALSE) + ) expect_true(r$M[1L] > 0 && all(r$M[2:3] == 0)) }) \ No newline at end of file