From 29297ef1c7e93dd3f42e9b9ae3f1332616be1fa2 Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Thu, 19 Oct 2023 22:05:38 +0200
Subject: [PATCH] Fix arguments of average_loss()

---
 NEWS.md             |  1 +
 R/average_loss.R    | 25 +++++++++++++++++--------
 R/hstats.R          | 40 ++++++++++++++++++++++------------------
 R/ice.R             |  4 ++--
 man/average_loss.Rd |  3 +++
 man/hstats.Rd       | 26 +++++++++++++-------------
 man/ice.Rd          |  4 ++--
 7 files changed, 60 insertions(+), 43 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 99a5005c..409c78e3 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -22,6 +22,7 @@
 - `average_loss()` also returns a "hstats_matrix" object with `print()` and `plot()` method. The values can be extracted via `$M`.
 - The default `v` of `hstats()` and `perm_importance()` is now `NULL`. Internally, it is set to `colnames(X)` (minus the column names of `w` and `y` if passed as name).
 - Missing grid values: `partial_dep()` and `ice()` have received a `na.rm` argument that controls if missing values are dropped during grid creation. The default `TRUE` is compatible with earlier releases.
+- The position of some function arguments have changed.
 
 # hstats 0.3.0
 
diff --git a/R/average_loss.R b/R/average_loss.R
index 28c663ae..d8f93ce5 100644
--- a/R/average_loss.R
+++ b/R/average_loss.R
@@ -115,16 +115,19 @@ average_loss.default <- function(object, X, y,
 #' @export
 average_loss.ranger <- function(object, X, y, 
                                 pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
-                                loss = "squared_error", 
+                                loss = "squared_error",
+                                agg_cols = FALSE,
                                 BY = NULL, by_size = 4L, 
                                 w = NULL, ...) {
   average_loss.default(
     object = object, 
     X = X, 
     y = y, 
-    pred_fun = pred_fun, 
-    BY = BY, 
+    pred_fun = pred_fun,
     loss = loss, 
+    agg_cols = agg_cols,
+    BY = BY,
+    by_size = by_size,
     w = w, 
     ...
   )
@@ -134,7 +137,8 @@ average_loss.ranger <- function(object, X, y,
 #' @export
 average_loss.Learner <- function(object, X, y, 
                                  pred_fun = NULL,
-                                 loss = "squared_error", 
+                                 loss = "squared_error",
+                                 agg_cols = FALSE,
                                  BY = NULL, by_size = 4L, 
                                  w = NULL, ...) {
   if (is.null(pred_fun)) {
@@ -145,8 +149,10 @@ average_loss.Learner <- function(object, X, y,
     X = X, 
     y = y, 
     pred_fun = pred_fun, 
-    BY = BY, 
-    loss = loss, 
+    loss = loss,
+    agg_cols = agg_cols,
+    BY = BY,
+    by_size = by_size,
     w = w, 
     ...
   )
@@ -158,7 +164,8 @@ average_loss.explainer <- function(object,
                                    X = object[["data"]], 
                                    y = object[["y"]], 
                                    pred_fun = object[["predict_function"]],
-                                   loss = "squared_error", 
+                                   loss = "squared_error",
+                                   agg_cols = FALSE,
                                    BY = NULL, 
                                    by_size = 4L,
                                    w = object[["weights"]], 
@@ -168,8 +175,10 @@ average_loss.explainer <- function(object,
     X = X,
     y = y,
     pred_fun = pred_fun,
-    BY = BY,
     loss = loss,
+    agg_cols = agg_cols,
+    BY = BY,
+    by_size = by_size,
     w = w,
     ...
   )
diff --git a/R/hstats.R b/R/hstats.R
index dcd115a8..5d554eab 100644
--- a/R/hstats.R
+++ b/R/hstats.R
@@ -33,9 +33,6 @@
 #'   (such as `type = "response"` in a GLM, or `reshape = TRUE` in a multiclass XGBoost
 #'   model) can be passed via `...`. The default, [stats::predict()], will work in 
 #'   most cases.
-#' @param n_max If `X` has more than `n_max` rows, a random sample of `n_max` rows is
-#'   selected from `X`. In this case, set a random seed for reproducibility.
-#' @param w Optional vector of case weights. Can also be a column name of `X`.
 #' @param pairwise_m Number of features for which pairwise statistics are to be 
 #'   calculated. The features are selected based on Friedman and Popescu's overall 
 #'   interaction strength \eqn{H^2_j}. Set to to 0 to avoid pairwise calculations.
@@ -50,6 +47,9 @@
 #'   speed-up for dense features, mainly for one-way statistics. 
 #'   Note that the quantiles are calculated after subsampling to `n_max` rows.
 #' @param eps Threshold below which numerator values are set to 0. Default is 1e-10.
+#' @param n_max If `X` has more than `n_max` rows, a random sample of `n_max` rows is
+#'   selected from `X`. In this case, set a random seed for reproducibility.
+#' @param w Optional vector of case weights. Can also be a column name of `X`.
 #' @param verbose Should a progress bar be shown? The default is `TRUE`.
 #' @param ... Additional arguments passed to `pred_fun(object, X, ...)`, 
 #'   for instance `type = "response"` in a [glm()] model, or `reshape = TRUE` in a 
@@ -139,9 +139,10 @@ hstats <- function(object, ...) {
 #' @describeIn hstats Default hstats method.
 #' @export
 hstats.default <- function(object, X, v = NULL,
-                           pred_fun = stats::predict, n_max = 500L, 
-                           w = NULL, pairwise_m = 5L, threeway_m = 0L,
-                           quant_approx = NULL, eps = 1e-10, verbose = TRUE, ...) {
+                           pred_fun = stats::predict, 
+                           pairwise_m = 5L, threeway_m = 0L,
+                           quant_approx = NULL, eps = 1e-10, 
+                           n_max = 500L, w = NULL, verbose = TRUE, ...) {
   stopifnot(
     is.matrix(X) || is.data.frame(X),
     is.function(pred_fun)
@@ -275,19 +276,20 @@ hstats.default <- function(object, X, v = NULL,
 #' @export
 hstats.ranger <- function(object, X, v = NULL,
                           pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
-                          n_max = 500L, w = NULL, pairwise_m = 5L, threeway_m = 0L,
-                          quant_approx = NULL, eps = 1e-10, verbose = TRUE, ...) {
+                          pairwise_m = 5L, threeway_m = 0L,
+                          quant_approx = NULL, eps = 1e-10, 
+                          n_max = 500L, w = NULL, verbose = TRUE, ...) {
   hstats.default(
     object = object,
     X = X,
     v = v,
     pred_fun = pred_fun,
-    n_max = n_max,
-    w = w,
     pairwise_m = pairwise_m,
     threeway_m = threeway_m,
     quant_approx = quant_approx, 
     eps = eps,
+    n_max = n_max,
+    w = w,
     verbose = verbose,
     ...
   )
@@ -297,8 +299,9 @@ hstats.ranger <- function(object, X, v = NULL,
 #' @export
 hstats.Learner <- function(object, X, v = NULL,
                            pred_fun = NULL,
-                           n_max = 500L, w = NULL, pairwise_m = 5L, threeway_m = 0L, 
-                           quant_approx = NULL, eps = 1e-10, verbose = TRUE, ...) {
+                           pairwise_m = 5L, threeway_m = 0L, 
+                           quant_approx = NULL, eps = 1e-10, 
+                           n_max = 500L, w = NULL, verbose = TRUE, ...) {
   if (is.null(pred_fun)) {
     pred_fun <- mlr3_pred_fun(object, X = X)
   }
@@ -307,12 +310,12 @@ hstats.Learner <- function(object, X, v = NULL,
     X = X,
     v = v,
     pred_fun = pred_fun,
-    n_max = n_max,
-    w = w,
     pairwise_m = pairwise_m,
     threeway_m = threeway_m,
     quant_approx = quant_approx,
     eps = eps,
+    n_max = n_max,
+    w = w,
     verbose = verbose,
     ...
   )
@@ -323,20 +326,21 @@ hstats.Learner <- function(object, X, v = NULL,
 hstats.explainer <- function(object, X = object[["data"]],
                              v = NULL,
                              pred_fun = object[["predict_function"]],
-                             n_max = 500L, w = object[["weights"]], 
                              pairwise_m = 5L, threeway_m = 0L,
-                             quant_approx = NULL, eps = 1e-10, verbose = TRUE, ...) {
+                             quant_approx = NULL, eps = 1e-10, 
+                             n_max = 500L, w = object[["weights"]], 
+                             verbose = TRUE, ...) {
   hstats.default(
     object = object[["model"]],
     X = X,
     v = v,
     pred_fun = pred_fun,
-    n_max = n_max,
-    w = w,
     pairwise_m = pairwise_m,
     threeway_m = threeway_m,
     quant_approx = quant_approx,
     eps = eps,
+    n_max = n_max,
+    w = w,
     verbose = verbose,
     ...
   )
diff --git a/R/ice.R b/R/ice.R
index 83a0d3a6..f5581015 100644
--- a/R/ice.R
+++ b/R/ice.R
@@ -144,7 +144,7 @@ ice.ranger <- function(object, v, X,
                        BY = NULL, grid = NULL, grid_size = 49L,
                        trim = c(0.01, 0.99),
                        strategy = c("uniform", "quantile"), na.rm = TRUE,
-                       n_max = 100, ...) {
+                       n_max = 100L, ...) {
   ice.default(
     object = object,
     v = v,
@@ -194,7 +194,7 @@ ice.explainer <- function(object, v = v, X = object[["data"]],
                           BY = NULL, grid = NULL, grid_size = 49L,
                           trim = c(0.01, 0.99),
                           strategy = c("uniform", "quantile"), na.rm = TRUE,
-                          n_max = 100, ...) {
+                          n_max = 100L, ...) {
   ice.default(
     object = object[["model"]],
     v = v,
diff --git a/man/average_loss.Rd b/man/average_loss.Rd
index 443c1e9c..b5009091 100644
--- a/man/average_loss.Rd
+++ b/man/average_loss.Rd
@@ -29,6 +29,7 @@ average_loss(object, ...)
   y,
   pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
   loss = "squared_error",
+  agg_cols = FALSE,
   BY = NULL,
   by_size = 4L,
   w = NULL,
@@ -41,6 +42,7 @@ average_loss(object, ...)
   y,
   pred_fun = NULL,
   loss = "squared_error",
+  agg_cols = FALSE,
   BY = NULL,
   by_size = 4L,
   w = NULL,
@@ -53,6 +55,7 @@ average_loss(object, ...)
   y = object[["y"]],
   pred_fun = object[["predict_function"]],
   loss = "squared_error",
+  agg_cols = FALSE,
   BY = NULL,
   by_size = 4L,
   w = object[["weights"]],
diff --git a/man/hstats.Rd b/man/hstats.Rd
index 9e6f478b..a2bebb79 100644
--- a/man/hstats.Rd
+++ b/man/hstats.Rd
@@ -15,12 +15,12 @@ hstats(object, ...)
   X,
   v = NULL,
   pred_fun = stats::predict,
-  n_max = 500L,
-  w = NULL,
   pairwise_m = 5L,
   threeway_m = 0L,
   quant_approx = NULL,
   eps = 1e-10,
+  n_max = 500L,
+  w = NULL,
   verbose = TRUE,
   ...
 )
@@ -30,12 +30,12 @@ hstats(object, ...)
   X,
   v = NULL,
   pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
-  n_max = 500L,
-  w = NULL,
   pairwise_m = 5L,
   threeway_m = 0L,
   quant_approx = NULL,
   eps = 1e-10,
+  n_max = 500L,
+  w = NULL,
   verbose = TRUE,
   ...
 )
@@ -45,12 +45,12 @@ hstats(object, ...)
   X,
   v = NULL,
   pred_fun = NULL,
-  n_max = 500L,
-  w = NULL,
   pairwise_m = 5L,
   threeway_m = 0L,
   quant_approx = NULL,
   eps = 1e-10,
+  n_max = 500L,
+  w = NULL,
   verbose = TRUE,
   ...
 )
@@ -60,12 +60,12 @@ hstats(object, ...)
   X = object[["data"]],
   v = NULL,
   pred_fun = object[["predict_function"]],
-  n_max = 500L,
-  w = object[["weights"]],
   pairwise_m = 5L,
   threeway_m = 0L,
   quant_approx = NULL,
   eps = 1e-10,
+  n_max = 500L,
+  w = object[["weights"]],
   verbose = TRUE,
   ...
 )
@@ -89,11 +89,6 @@ model \code{object}, its second argument a data structure like \code{X}. Additio
 model) can be passed via \code{...}. The default, \code{\link[stats:predict]{stats::predict()}}, will work in
 most cases.}
 
-\item{n_max}{If \code{X} has more than \code{n_max} rows, a random sample of \code{n_max} rows is
-selected from \code{X}. In this case, set a random seed for reproducibility.}
-
-\item{w}{Optional vector of case weights. Can also be a column name of \code{X}.}
-
 \item{pairwise_m}{Number of features for which pairwise statistics are to be
 calculated. The features are selected based on Friedman and Popescu's overall
 interaction strength \eqn{H^2_j}. Set to to 0 to avoid pairwise calculations.
@@ -112,6 +107,11 @@ Note that the quantiles are calculated after subsampling to \code{n_max} rows.}
 
 \item{eps}{Threshold below which numerator values are set to 0. Default is 1e-10.}
 
+\item{n_max}{If \code{X} has more than \code{n_max} rows, a random sample of \code{n_max} rows is
+selected from \code{X}. In this case, set a random seed for reproducibility.}
+
+\item{w}{Optional vector of case weights. Can also be a column name of \code{X}.}
+
 \item{verbose}{Should a progress bar be shown? The default is \code{TRUE}.}
 }
 \value{
diff --git a/man/ice.Rd b/man/ice.Rd
index 8100eeb7..f6ad5842 100644
--- a/man/ice.Rd
+++ b/man/ice.Rd
@@ -36,7 +36,7 @@ ice(object, ...)
   trim = c(0.01, 0.99),
   strategy = c("uniform", "quantile"),
   na.rm = TRUE,
-  n_max = 100,
+  n_max = 100L,
   ...
 )
 
@@ -66,7 +66,7 @@ ice(object, ...)
   trim = c(0.01, 0.99),
   strategy = c("uniform", "quantile"),
   na.rm = TRUE,
-  n_max = 100,
+  n_max = 100L,
   ...
 )
 }