From 29297ef1c7e93dd3f42e9b9ae3f1332616be1fa2 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Thu, 19 Oct 2023 22:05:38 +0200 Subject: [PATCH] Fix arguments of average_loss() --- NEWS.md | 1 + R/average_loss.R | 25 +++++++++++++++++-------- R/hstats.R | 40 ++++++++++++++++++++++------------------ R/ice.R | 4 ++-- man/average_loss.Rd | 3 +++ man/hstats.Rd | 26 +++++++++++++------------- man/ice.Rd | 4 ++-- 7 files changed, 60 insertions(+), 43 deletions(-) diff --git a/NEWS.md b/NEWS.md index 99a5005c..409c78e3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,7 @@ - `average_loss()` also returns a "hstats_matrix" object with `print()` and `plot()` method. The values can be extracted via `$M`. - The default `v` of `hstats()` and `perm_importance()` is now `NULL`. Internally, it is set to `colnames(X)` (minus the column names of `w` and `y` if passed as name). - Missing grid values: `partial_dep()` and `ice()` have received a `na.rm` argument that controls if missing values are dropped during grid creation. The default `TRUE` is compatible with earlier releases. +- The position of some function arguments have changed. # hstats 0.3.0 diff --git a/R/average_loss.R b/R/average_loss.R index 28c663ae..d8f93ce5 100644 --- a/R/average_loss.R +++ b/R/average_loss.R @@ -115,16 +115,19 @@ average_loss.default <- function(object, X, y, #' @export average_loss.ranger <- function(object, X, y, pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions, - loss = "squared_error", + loss = "squared_error", + agg_cols = FALSE, BY = NULL, by_size = 4L, w = NULL, ...) { average_loss.default( object = object, X = X, y = y, - pred_fun = pred_fun, - BY = BY, + pred_fun = pred_fun, loss = loss, + agg_cols = agg_cols, + BY = BY, + by_size = by_size, w = w, ... ) @@ -134,7 +137,8 @@ average_loss.ranger <- function(object, X, y, #' @export average_loss.Learner <- function(object, X, y, pred_fun = NULL, - loss = "squared_error", + loss = "squared_error", + agg_cols = FALSE, BY = NULL, by_size = 4L, w = NULL, ...) { if (is.null(pred_fun)) { @@ -145,8 +149,10 @@ average_loss.Learner <- function(object, X, y, X = X, y = y, pred_fun = pred_fun, - BY = BY, - loss = loss, + loss = loss, + agg_cols = agg_cols, + BY = BY, + by_size = by_size, w = w, ... ) @@ -158,7 +164,8 @@ average_loss.explainer <- function(object, X = object[["data"]], y = object[["y"]], pred_fun = object[["predict_function"]], - loss = "squared_error", + loss = "squared_error", + agg_cols = FALSE, BY = NULL, by_size = 4L, w = object[["weights"]], @@ -168,8 +175,10 @@ average_loss.explainer <- function(object, X = X, y = y, pred_fun = pred_fun, - BY = BY, loss = loss, + agg_cols = agg_cols, + BY = BY, + by_size = by_size, w = w, ... ) diff --git a/R/hstats.R b/R/hstats.R index dcd115a8..5d554eab 100644 --- a/R/hstats.R +++ b/R/hstats.R @@ -33,9 +33,6 @@ #' (such as `type = "response"` in a GLM, or `reshape = TRUE` in a multiclass XGBoost #' model) can be passed via `...`. The default, [stats::predict()], will work in #' most cases. -#' @param n_max If `X` has more than `n_max` rows, a random sample of `n_max` rows is -#' selected from `X`. In this case, set a random seed for reproducibility. -#' @param w Optional vector of case weights. Can also be a column name of `X`. #' @param pairwise_m Number of features for which pairwise statistics are to be #' calculated. The features are selected based on Friedman and Popescu's overall #' interaction strength \eqn{H^2_j}. Set to to 0 to avoid pairwise calculations. @@ -50,6 +47,9 @@ #' speed-up for dense features, mainly for one-way statistics. #' Note that the quantiles are calculated after subsampling to `n_max` rows. #' @param eps Threshold below which numerator values are set to 0. Default is 1e-10. +#' @param n_max If `X` has more than `n_max` rows, a random sample of `n_max` rows is +#' selected from `X`. In this case, set a random seed for reproducibility. +#' @param w Optional vector of case weights. Can also be a column name of `X`. #' @param verbose Should a progress bar be shown? The default is `TRUE`. #' @param ... Additional arguments passed to `pred_fun(object, X, ...)`, #' for instance `type = "response"` in a [glm()] model, or `reshape = TRUE` in a @@ -139,9 +139,10 @@ hstats <- function(object, ...) { #' @describeIn hstats Default hstats method. #' @export hstats.default <- function(object, X, v = NULL, - pred_fun = stats::predict, n_max = 500L, - w = NULL, pairwise_m = 5L, threeway_m = 0L, - quant_approx = NULL, eps = 1e-10, verbose = TRUE, ...) { + pred_fun = stats::predict, + pairwise_m = 5L, threeway_m = 0L, + quant_approx = NULL, eps = 1e-10, + n_max = 500L, w = NULL, verbose = TRUE, ...) { stopifnot( is.matrix(X) || is.data.frame(X), is.function(pred_fun) @@ -275,19 +276,20 @@ hstats.default <- function(object, X, v = NULL, #' @export hstats.ranger <- function(object, X, v = NULL, pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions, - n_max = 500L, w = NULL, pairwise_m = 5L, threeway_m = 0L, - quant_approx = NULL, eps = 1e-10, verbose = TRUE, ...) { + pairwise_m = 5L, threeway_m = 0L, + quant_approx = NULL, eps = 1e-10, + n_max = 500L, w = NULL, verbose = TRUE, ...) { hstats.default( object = object, X = X, v = v, pred_fun = pred_fun, - n_max = n_max, - w = w, pairwise_m = pairwise_m, threeway_m = threeway_m, quant_approx = quant_approx, eps = eps, + n_max = n_max, + w = w, verbose = verbose, ... ) @@ -297,8 +299,9 @@ hstats.ranger <- function(object, X, v = NULL, #' @export hstats.Learner <- function(object, X, v = NULL, pred_fun = NULL, - n_max = 500L, w = NULL, pairwise_m = 5L, threeway_m = 0L, - quant_approx = NULL, eps = 1e-10, verbose = TRUE, ...) { + pairwise_m = 5L, threeway_m = 0L, + quant_approx = NULL, eps = 1e-10, + n_max = 500L, w = NULL, verbose = TRUE, ...) { if (is.null(pred_fun)) { pred_fun <- mlr3_pred_fun(object, X = X) } @@ -307,12 +310,12 @@ hstats.Learner <- function(object, X, v = NULL, X = X, v = v, pred_fun = pred_fun, - n_max = n_max, - w = w, pairwise_m = pairwise_m, threeway_m = threeway_m, quant_approx = quant_approx, eps = eps, + n_max = n_max, + w = w, verbose = verbose, ... ) @@ -323,20 +326,21 @@ hstats.Learner <- function(object, X, v = NULL, hstats.explainer <- function(object, X = object[["data"]], v = NULL, pred_fun = object[["predict_function"]], - n_max = 500L, w = object[["weights"]], pairwise_m = 5L, threeway_m = 0L, - quant_approx = NULL, eps = 1e-10, verbose = TRUE, ...) { + quant_approx = NULL, eps = 1e-10, + n_max = 500L, w = object[["weights"]], + verbose = TRUE, ...) { hstats.default( object = object[["model"]], X = X, v = v, pred_fun = pred_fun, - n_max = n_max, - w = w, pairwise_m = pairwise_m, threeway_m = threeway_m, quant_approx = quant_approx, eps = eps, + n_max = n_max, + w = w, verbose = verbose, ... ) diff --git a/R/ice.R b/R/ice.R index 83a0d3a6..f5581015 100644 --- a/R/ice.R +++ b/R/ice.R @@ -144,7 +144,7 @@ ice.ranger <- function(object, v, X, BY = NULL, grid = NULL, grid_size = 49L, trim = c(0.01, 0.99), strategy = c("uniform", "quantile"), na.rm = TRUE, - n_max = 100, ...) { + n_max = 100L, ...) { ice.default( object = object, v = v, @@ -194,7 +194,7 @@ ice.explainer <- function(object, v = v, X = object[["data"]], BY = NULL, grid = NULL, grid_size = 49L, trim = c(0.01, 0.99), strategy = c("uniform", "quantile"), na.rm = TRUE, - n_max = 100, ...) { + n_max = 100L, ...) { ice.default( object = object[["model"]], v = v, diff --git a/man/average_loss.Rd b/man/average_loss.Rd index 443c1e9c..b5009091 100644 --- a/man/average_loss.Rd +++ b/man/average_loss.Rd @@ -29,6 +29,7 @@ average_loss(object, ...) y, pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions, loss = "squared_error", + agg_cols = FALSE, BY = NULL, by_size = 4L, w = NULL, @@ -41,6 +42,7 @@ average_loss(object, ...) y, pred_fun = NULL, loss = "squared_error", + agg_cols = FALSE, BY = NULL, by_size = 4L, w = NULL, @@ -53,6 +55,7 @@ average_loss(object, ...) y = object[["y"]], pred_fun = object[["predict_function"]], loss = "squared_error", + agg_cols = FALSE, BY = NULL, by_size = 4L, w = object[["weights"]], diff --git a/man/hstats.Rd b/man/hstats.Rd index 9e6f478b..a2bebb79 100644 --- a/man/hstats.Rd +++ b/man/hstats.Rd @@ -15,12 +15,12 @@ hstats(object, ...) X, v = NULL, pred_fun = stats::predict, - n_max = 500L, - w = NULL, pairwise_m = 5L, threeway_m = 0L, quant_approx = NULL, eps = 1e-10, + n_max = 500L, + w = NULL, verbose = TRUE, ... ) @@ -30,12 +30,12 @@ hstats(object, ...) X, v = NULL, pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions, - n_max = 500L, - w = NULL, pairwise_m = 5L, threeway_m = 0L, quant_approx = NULL, eps = 1e-10, + n_max = 500L, + w = NULL, verbose = TRUE, ... ) @@ -45,12 +45,12 @@ hstats(object, ...) X, v = NULL, pred_fun = NULL, - n_max = 500L, - w = NULL, pairwise_m = 5L, threeway_m = 0L, quant_approx = NULL, eps = 1e-10, + n_max = 500L, + w = NULL, verbose = TRUE, ... ) @@ -60,12 +60,12 @@ hstats(object, ...) X = object[["data"]], v = NULL, pred_fun = object[["predict_function"]], - n_max = 500L, - w = object[["weights"]], pairwise_m = 5L, threeway_m = 0L, quant_approx = NULL, eps = 1e-10, + n_max = 500L, + w = object[["weights"]], verbose = TRUE, ... ) @@ -89,11 +89,6 @@ model \code{object}, its second argument a data structure like \code{X}. Additio model) can be passed via \code{...}. The default, \code{\link[stats:predict]{stats::predict()}}, will work in most cases.} -\item{n_max}{If \code{X} has more than \code{n_max} rows, a random sample of \code{n_max} rows is -selected from \code{X}. In this case, set a random seed for reproducibility.} - -\item{w}{Optional vector of case weights. Can also be a column name of \code{X}.} - \item{pairwise_m}{Number of features for which pairwise statistics are to be calculated. The features are selected based on Friedman and Popescu's overall interaction strength \eqn{H^2_j}. Set to to 0 to avoid pairwise calculations. @@ -112,6 +107,11 @@ Note that the quantiles are calculated after subsampling to \code{n_max} rows.} \item{eps}{Threshold below which numerator values are set to 0. Default is 1e-10.} +\item{n_max}{If \code{X} has more than \code{n_max} rows, a random sample of \code{n_max} rows is +selected from \code{X}. In this case, set a random seed for reproducibility.} + +\item{w}{Optional vector of case weights. Can also be a column name of \code{X}.} + \item{verbose}{Should a progress bar be shown? The default is \code{TRUE}.} } \value{ diff --git a/man/ice.Rd b/man/ice.Rd index 8100eeb7..f6ad5842 100644 --- a/man/ice.Rd +++ b/man/ice.Rd @@ -36,7 +36,7 @@ ice(object, ...) trim = c(0.01, 0.99), strategy = c("uniform", "quantile"), na.rm = TRUE, - n_max = 100, + n_max = 100L, ... ) @@ -66,7 +66,7 @@ ice(object, ...) trim = c(0.01, 0.99), strategy = c("uniform", "quantile"), na.rm = TRUE, - n_max = 100, + n_max = 100L, ... ) }