From d605707de1df3450593400fce4c5580d6725c394 Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Fri, 20 Oct 2023 11:14:23 +0200
Subject: [PATCH] split quant_approx into two arguments

---
 NEWS.md                                       |  2 +-
 R/hstats.R                                    | 97 ++++++-------------
 R/partial_dep.R                               |  2 +-
 R/utils_calculate.R                           | 59 +++++++++++
 R/utils_input.R                               | 16 ---
 man/hstats.Rd                                 | 35 ++++---
 packaging.R                                   |  1 -
 .../{test_utils.R => test_calculate.R}        | 53 +++-------
 tests/testthat/test_hstats.R                  |  9 +-
 tests/testthat/test_statistics.R              | 38 ++++++++
 10 files changed, 168 insertions(+), 144 deletions(-)
 rename tests/testthat/{test_utils.R => test_calculate.R} (81%)

diff --git a/NEWS.md b/NEWS.md
index 409c78e3..b0c5a53e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,7 +2,7 @@
 
 ## Major changes
 
-- `hstats()` has received an argument `quant_approx` to speed-up calculations by quantile binning. Dense numeric variables are replaced by midpoints of `quant_approx + 1` uniform quantiles. By default, the value is `NULL` (no approximation). Even relatively high values like 50 will bring a massive speed-up for dense features, mainly for the one-way calculations. Use this option when calculations are slow, or when you want to increase `n_max`.
+- Quantile approximation: `hstats()` now has the option `approx = FALSE`. Set to `TRUE` to replace values of dense numeric columns by `grid_size = 50` quantile midpoints. This will bring a massive speed-up for one-way calculations. Use this option when one-way calculations are slow, or when you want to increase `n_max`.
 - `hstats()`: `n_max` has been increased from 300 to 500 rows. This will make estimates of H statistics more stable at the price of longer run time. Reduce to 300 for the old behaviour.
 - `hstats()`: Three-way interactions are not anymore calculated by default. Set `threeway_m` to 5 for the old behaviour.
 - Revised plots: The colors and color palettes have changed and can now also be controlled via global options. For instance, to change the fill color of all bars, set `options(hstats.fill = new value)`. Value labels are more clear, and there are more options. Varying color/fill scales now use viridis (inferno). This can be modified on the fly or via `options(hstats.viridis_args = list(...))`.
diff --git a/R/hstats.R b/R/hstats.R
index 5d554eab..58c2c1e7 100644
--- a/R/hstats.R
+++ b/R/hstats.R
@@ -41,14 +41,16 @@
 #' @param threeway_m Like `pairwise_m`, but controls the feature count for 
 #'   three-way interactions. Cannot be larger than `pairwise_m`. 
 #'   To save computation time, the default is 0.
-#' @param quant_approx Integer. Dense numeric variables in `X` are replaced by midpoints 
-#'   of `quant_approx + 1` uniform quantiles. By default, the value is `NULL` 
-#'   (no approximation). Even relatively high values like 50 will bring a massive 
-#'   speed-up for dense features, mainly for one-way statistics. 
-#'   Note that the quantiles are calculated after subsampling to `n_max` rows.
-#' @param eps Threshold below which numerator values are set to 0. Default is 1e-10.
+#' @param approx Should quantile approximation be applied to dense numeric features?
+#'   The default is `FALSE`. Setting this option to `TRUE` brings a massive speed-up
+#'   for one-way calculations. It can, e.g., be used when the number of features is
+#'   very large.
+#' @param grid_size Integer controlling the number of quantile midpoints used to
+#'   approximate dense numerics. The quantile midpoints are calculated after
+#'   subampling via `n_max`. Only relevant if `approx = TRUE`.
 #' @param n_max If `X` has more than `n_max` rows, a random sample of `n_max` rows is
 #'   selected from `X`. In this case, set a random seed for reproducibility.
+#' @param eps Threshold below which numerator values are set to 0. Default is 1e-10.
 #' @param w Optional vector of case weights. Can also be a column name of `X`.
 #' @param verbose Should a progress bar be shown? The default is `TRUE`.
 #' @param ... Additional arguments passed to `pred_fun(object, X, ...)`, 
@@ -141,8 +143,9 @@ hstats <- function(object, ...) {
 hstats.default <- function(object, X, v = NULL,
                            pred_fun = stats::predict, 
                            pairwise_m = 5L, threeway_m = 0L,
-                           quant_approx = NULL, eps = 1e-10, 
-                           n_max = 500L, w = NULL, verbose = TRUE, ...) {
+                           approx = FALSE, grid_size = 50L, 
+                           n_max = 500L, eps = 1e-10, 
+                           w = NULL, verbose = TRUE, ...) {
   stopifnot(
     is.matrix(X) || is.data.frame(X),
     is.function(pred_fun)
@@ -180,8 +183,8 @@ hstats.default <- function(object, X, v = NULL,
   }
   
   # Quantile approximation to speedup things for dense features
-  if (!is.null(quant_approx)) {
-    X <- approx_matrix_or_df(X = X, v = v, m = quant_approx)
+  if (isTRUE(approx)) {
+    X <- approx_matrix_or_df(X = X, v = v, m = grid_size)
   }
   
   # Predictions ("F" in Friedman and Popescu) always calculated (cheap)
@@ -277,8 +280,9 @@ hstats.default <- function(object, X, v = NULL,
 hstats.ranger <- function(object, X, v = NULL,
                           pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
                           pairwise_m = 5L, threeway_m = 0L,
-                          quant_approx = NULL, eps = 1e-10, 
-                          n_max = 500L, w = NULL, verbose = TRUE, ...) {
+                          approx = FALSE, grid_size = 50L, 
+                          n_max = 500L, eps = 1e-10,
+                          w = NULL, verbose = TRUE, ...) {
   hstats.default(
     object = object,
     X = X,
@@ -286,9 +290,10 @@ hstats.ranger <- function(object, X, v = NULL,
     pred_fun = pred_fun,
     pairwise_m = pairwise_m,
     threeway_m = threeway_m,
-    quant_approx = quant_approx, 
-    eps = eps,
+    approx = approx,
+    grid_size = grid_size,
     n_max = n_max,
+    eps = eps,
     w = w,
     verbose = verbose,
     ...
@@ -300,8 +305,9 @@ hstats.ranger <- function(object, X, v = NULL,
 hstats.Learner <- function(object, X, v = NULL,
                            pred_fun = NULL,
                            pairwise_m = 5L, threeway_m = 0L, 
-                           quant_approx = NULL, eps = 1e-10, 
-                           n_max = 500L, w = NULL, verbose = TRUE, ...) {
+                           approx = FALSE, grid_size = 50L, 
+                           n_max = 500L, eps = 1e-10, 
+                           w = NULL, verbose = TRUE, ...) {
   if (is.null(pred_fun)) {
     pred_fun <- mlr3_pred_fun(object, X = X)
   }
@@ -312,9 +318,10 @@ hstats.Learner <- function(object, X, v = NULL,
     pred_fun = pred_fun,
     pairwise_m = pairwise_m,
     threeway_m = threeway_m,
-    quant_approx = quant_approx,
-    eps = eps,
+    approx = approx,
+    grid_size = grid_size,
     n_max = n_max,
+    eps = eps,
     w = w,
     verbose = verbose,
     ...
@@ -327,9 +334,9 @@ hstats.explainer <- function(object, X = object[["data"]],
                              v = NULL,
                              pred_fun = object[["predict_function"]],
                              pairwise_m = 5L, threeway_m = 0L,
-                             quant_approx = NULL, eps = 1e-10, 
-                             n_max = 500L, w = object[["weights"]], 
-                             verbose = TRUE, ...) {
+                             approx = FALSE, grid_size = 50L, 
+                             n_max = 500L, eps = 1e-10, 
+                             w = object[["weights"]], verbose = TRUE, ...) {
   hstats.default(
     object = object[["model"]],
     X = X,
@@ -337,9 +344,10 @@ hstats.explainer <- function(object, X = object[["data"]],
     pred_fun = pred_fun,
     pairwise_m = pairwise_m,
     threeway_m = threeway_m,
-    quant_approx = quant_approx,
-    eps = eps,
+    approx = approx,
+    grid_size = grid_size,
     n_max = n_max,
+    eps = eps,
     w = w,
     verbose = verbose,
     ...
@@ -548,46 +556,3 @@ get_v <- function(H, m) {
   }
   v[v %in% v_cand]
 }
-
-#' Approximate Vector
-#' 
-#' Internal function. Approximates values by the average of the two closest quantiles.
-#' 
-#' @noRd
-#' @keywords internal
-#' 
-#' @param x A vector or factor.
-#' @param m Number of unique values.
-#' @returns An approximation of `x` (or `x` if non-numeric or discrete).
-approx_vector <- function(x, m = 25L) {
-  if (!is.numeric(x) || length(unique(x)) <= m) {
-    return(x)
-  }
-  p <- seq(0, 1, length.out = m + 1L)
-  q <- unique(stats::quantile(x, probs = p, names = FALSE, na.rm = TRUE))
-  mids <- (q[-length(q)] + q[-1L]) / 2
-  return(mids[findInterval(x, q, rightmost.closed = TRUE)])
-}
-
-#' Approximate df or Matrix
-#' 
-#' Internal function. Calls `approx_vector()` to each column in matrix or data.frame.
-#' 
-#' @noRd
-#' @keywords internal
-#' 
-#' @param X A matrix or data.frame.
-#' @param m Number of unique values.
-#' @returns An approximation of `X` (or `X` if non-numeric or discrete).
-approx_matrix_or_df <- function(X, v = colnames(X), m = 25L) {
-  stopifnot(
-    m >= 2L,
-    is.data.frame(X) || is.matrix(X)
-  )
-  if (is.data.frame(X)) {
-    X[v] <- lapply(X[v], FUN = approx_vector, m = m)  
-  } else {  # Matrix
-    X[, v] <- apply(X[, v, drop = FALSE], MARGIN = 2L, FUN = approx_vector, m = m)  
-  }
-  return(X)
-}
diff --git a/R/partial_dep.R b/R/partial_dep.R
index 0ed60f89..73b4b5f0 100644
--- a/R/partial_dep.R
+++ b/R/partial_dep.R
@@ -29,8 +29,8 @@
 #' A partial dependence plot (PDP) plots the values of \eqn{\hat F_s(\mathbf{x}_s)}
 #' over a grid of evaluation points \eqn{\mathbf{x}_s}.
 #' 
-#' @inheritParams hstats
 #' @inheritParams multivariate_grid
+#' @inheritParams hstats
 #' @param v One or more column names over which you want to calculate the partial
 #'   dependence.
 #' @param grid Evaluation grid. A vector (if `length(v) == 1L`), or a matrix/data.frame 
diff --git a/R/utils_calculate.R b/R/utils_calculate.R
index d15b8933..1ea70a54 100644
--- a/R/utils_calculate.R
+++ b/R/utils_calculate.R
@@ -112,3 +112,62 @@ wcenter <- function(x, w = NULL) {
   # sweep(x, MARGIN = 2L, STATS = wcolMeans(x, w = w))  # Slower
   x - matrix(wcolMeans(x, w = w), nrow = nrow(x), ncol = ncol(x), byrow = TRUE)
 }
+
+#' Bin into Quantiles
+#' 
+#' Internal function. Applies [cut()] to quantile breaks.
+#' 
+#' @noRd
+#' @keywords internal
+#' 
+#' @param x A numeric vector.
+#' @param m Number of intervals.
+#' @returns A factor, representing binned `x`.
+qcut <- function(x, m) {
+  p <- seq(0, 1, length.out = m + 1L)
+  g <- stats::quantile(x, probs = p, names = FALSE, type = 1L, na.rm = TRUE)
+  cut(x, breaks = unique(g), include.lowest = TRUE)
+}
+
+#' Approximate Vector
+#' 
+#' Internal function. Approximates values by the average of the two closest quantiles.
+#' 
+#' @noRd
+#' @keywords internal
+#' 
+#' @param x A vector or factor.
+#' @param m Number of unique values.
+#' @returns An approximation of `x` (or `x` if non-numeric or discrete).
+approx_vector <- function(x, m = 50L) {
+  if (!is.numeric(x) || length(unique(x)) <= m) {
+    return(x)
+  }
+  p <- seq(0, 1, length.out = m + 1L)
+  q <- unique(stats::quantile(x, probs = p, names = FALSE, na.rm = TRUE))
+  mids <- (q[-length(q)] + q[-1L]) / 2
+  return(mids[findInterval(x, q, rightmost.closed = TRUE)])
+}
+
+#' Approximate df or Matrix
+#' 
+#' Internal function. Calls `approx_vector()` to each column in matrix or data.frame.
+#' 
+#' @noRd
+#' @keywords internal
+#' 
+#' @param X A matrix or data.frame.
+#' @param m Number of unique values.
+#' @returns An approximation of `X` (or `X` if non-numeric or discrete).
+approx_matrix_or_df <- function(X, v = colnames(X), m = 50L) {
+  stopifnot(
+    m >= 2L,
+    is.data.frame(X) || is.matrix(X)
+  )
+  if (is.data.frame(X)) {
+    X[v] <- lapply(X[v], FUN = approx_vector, m = m)  
+  } else {  # Matrix
+    X[, v] <- apply(X[, v, drop = FALSE], MARGIN = 2L, FUN = approx_vector, m = m)  
+  }
+  return(X)
+}
diff --git a/R/utils_input.R b/R/utils_input.R
index ac671a9c..c59d5027 100644
--- a/R/utils_input.R
+++ b/R/utils_input.R
@@ -1,19 +1,3 @@
-#' Bin into Quantiles
-#' 
-#' Internal function. Applies [cut()] to quantile breaks.
-#' 
-#' @noRd
-#' @keywords internal
-#' 
-#' @param x A numeric vector.
-#' @param m Number of intervals.
-#' @returns A factor, representing binned `x`.
-qcut <- function(x, m) {
-  p <- seq(0, 1, length.out = m + 1L)
-  g <- stats::quantile(x, probs = p, names = FALSE, type = 1L, na.rm = TRUE)
-  cut(x, breaks = unique(g), include.lowest = TRUE)
-}
-
 #' Prepares Group BY Variable
 #' 
 #' Internal function that prepares a BY variable or BY column name.
diff --git a/man/hstats.Rd b/man/hstats.Rd
index a2bebb79..10fb6507 100644
--- a/man/hstats.Rd
+++ b/man/hstats.Rd
@@ -17,9 +17,10 @@ hstats(object, ...)
   pred_fun = stats::predict,
   pairwise_m = 5L,
   threeway_m = 0L,
-  quant_approx = NULL,
-  eps = 1e-10,
+  approx = FALSE,
+  grid_size = 50L,
   n_max = 500L,
+  eps = 1e-10,
   w = NULL,
   verbose = TRUE,
   ...
@@ -32,9 +33,10 @@ hstats(object, ...)
   pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
   pairwise_m = 5L,
   threeway_m = 0L,
-  quant_approx = NULL,
-  eps = 1e-10,
+  approx = FALSE,
+  grid_size = 50L,
   n_max = 500L,
+  eps = 1e-10,
   w = NULL,
   verbose = TRUE,
   ...
@@ -47,9 +49,10 @@ hstats(object, ...)
   pred_fun = NULL,
   pairwise_m = 5L,
   threeway_m = 0L,
-  quant_approx = NULL,
-  eps = 1e-10,
+  approx = FALSE,
+  grid_size = 50L,
   n_max = 500L,
+  eps = 1e-10,
   w = NULL,
   verbose = TRUE,
   ...
@@ -62,9 +65,10 @@ hstats(object, ...)
   pred_fun = object[["predict_function"]],
   pairwise_m = 5L,
   threeway_m = 0L,
-  quant_approx = NULL,
-  eps = 1e-10,
+  approx = FALSE,
+  grid_size = 50L,
   n_max = 500L,
+  eps = 1e-10,
   w = object[["weights"]],
   verbose = TRUE,
   ...
@@ -99,17 +103,20 @@ strongest variable names is taken. This can lead to very long run-times.}
 three-way interactions. Cannot be larger than \code{pairwise_m}.
 To save computation time, the default is 0.}
 
-\item{quant_approx}{Integer. Dense numeric variables in \code{X} are replaced by midpoints
-of \code{quant_approx + 1} uniform quantiles. By default, the value is \code{NULL}
-(no approximation). Even relatively high values like 50 will bring a massive
-speed-up for dense features, mainly for one-way statistics.
-Note that the quantiles are calculated after subsampling to \code{n_max} rows.}
+\item{approx}{Should quantile approximation be applied to dense numeric features?
+The default is \code{FALSE}. Setting this option to \code{TRUE} brings a massive speed-up
+for one-way calculations. It can, e.g., be used when the number of features is
+very large.}
 
-\item{eps}{Threshold below which numerator values are set to 0. Default is 1e-10.}
+\item{grid_size}{Integer controlling the number of quantile midpoints used to
+approximate dense numerics. The quantile midpoints are calculated after
+subampling via \code{n_max}. Only relevant if \code{approx = TRUE}.}
 
 \item{n_max}{If \code{X} has more than \code{n_max} rows, a random sample of \code{n_max} rows is
 selected from \code{X}. In this case, set a random seed for reproducibility.}
 
+\item{eps}{Threshold below which numerator values are set to 0. Default is 1e-10.}
+
 \item{w}{Optional vector of case weights. Can also be a column name of \code{X}.}
 
 \item{verbose}{Should a progress bar be shown? The default is \code{TRUE}.}
diff --git a/packaging.R b/packaging.R
index b7be2b63..18f66c91 100644
--- a/packaging.R
+++ b/packaging.R
@@ -83,7 +83,6 @@ build()
 # build(binary = TRUE)
 install(upgrade = FALSE)
 
-
 # Run only if package is public(!) and should go to CRAN
 if (FALSE) {
   check_win_devel()
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_calculate.R
similarity index 81%
rename from tests/testthat/test_utils.R
rename to tests/testthat/test_calculate.R
index b8c2e7b8..3f0e26aa 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_calculate.R
@@ -122,43 +122,6 @@ test_that("wcenter() works for vectors", {
   expect_equal(wcenter(x, w = w), xpected)
 })
 
-test_that("poor_man_stack() works (test could be improved", {
-  y <- c("a", "b", "c")
-  z <- c("aa", "bb", "cc")
-  X <- data.frame(x = 1:3, y = y, z = z)
-  out <- poor_man_stack(X, to_stack = c("y", "z"))
-  xpected <- data.frame(
-    x = rep(1:3, times = 2L), 
-    varying_ = factor(rep(c("y", "z"), each = 3L)),
-    value_ = c(y, z)
-  )
-  expect_equal(out, xpected)
-  
-  expect_error(poor_man_stack(cbind(a = 1:3, b = 2:4), to_stack = "b"))
-})
-
-test_that("mat2df() works (test could be improved)", {
-  mat <- cbind(y = 1:2, z = c(0.5, 0.5))
-  rownames(mat) <- letters[seq_len(nrow(mat))]
-  out <- mat2df(mat)
-  rownames(out) <- NULL
-  xpected <- data.frame(
-    id_ = "Overall", 
-    variable_ = factor(c("a", "b", "a", "b")),
-    varying_ = factor(c("y", "y", "z", "z")),
-    value_ = c(1, 2, 0.5, 0.5),
-    stringsAsFactors = FALSE
-  )
-  expect_equal(out, xpected)
-  
-  mat_no_names <- mat
-  colnames(mat_no_names) <- NULL
-  expect_equal(unique(mat2df(mat_no_names)$varying_), factor(c("y1", "y2")))
-  
-  expect_error(mat2df(head(iris)))
-  expect_error(mat2df(1:4))
-})
-
 test_that("qcut() works (test should be improved)", {
   x <- 1:100
   expect_equal(levels(qcut(x, m = 2)), c("[1,50]", "(50,100]"))
@@ -169,11 +132,6 @@ test_that("qcut() works with missings", {
   expect_true(is.na(qcut(c(NA, 1:9), m = 2)[1L]))
 })
 
-test_that("approx_vector() works with missings", {
-  expect_equal(approx_vector(c(NA, "A", "B"), m = 2), c(NA, "A", "B"))
-  expect_true(is.na(approx_vector(c(NA, 1:9), m = 2)[1L]))
-})
-
 test_that("approx_matrix_or_df works as expected", {
   expect_equal(approx_matrix_or_df(iris, m = 200L), iris)
   expect_false(identical(r <- approx_matrix_or_df(iris, m = 5L), iris))
@@ -184,4 +142,15 @@ test_that("approx_matrix_or_df works as expected", {
   expect_equal(approx_matrix_or_df(ir, m = 200L), ir)
   expect_false(identical(approx_matrix_or_df(ir, m = 5L), ir))
   expect_equal(length(unique(r[, "Sepal.Width"])), 5L)
+  
+  X <- cbind(dense = 1:20, discrete = rep(1:2, each = 10))
+  expect_equal(
+    apply(approx_matrix_or_df(X, m = 5L), 2L, function(x) length(unique(x))), 
+    c(dense = 5L, discrete = 2L)
+  )
+})
+
+test_that("approx_vector() works with missings", {
+  expect_equal(approx_vector(c(NA, "A", "B"), m = 2), c(NA, "A", "B"))
+  expect_true(is.na(approx_vector(c(NA, 1:9), m = 2)[1L]))
 })
diff --git a/tests/testthat/test_hstats.R b/tests/testthat/test_hstats.R
index 1c10a1a4..bdc35a4b 100644
--- a/tests/testthat/test_hstats.R
+++ b/tests/testthat/test_hstats.R
@@ -20,7 +20,9 @@ test_that("Additive models show 0 interactions (univariate)", {
   expect_message(plot(h2_overall(s, zero = FALSE)))
   
   # With quantile approximation
-  s <- hstats(fit, X = iris[-1L], verbose = FALSE, threeway_m = 5L, quant_approx = 5L)
+  s <- hstats(
+    fit, X = iris[-1L], verbose = FALSE, threeway_m = 5L, approx = TRUE, grid_size = 5L,
+  )
   expect_null(h2_pairwise(s, zero = FALSE)$M)
 })
 
@@ -72,7 +74,7 @@ test_that("Non-additive models show interactions > 0 (one interaction)", {
   expect_null(h2_threeway(s, zero = FALSE)$M)
   
   # With quantile approximation
-  s <- hstats(fit, X = iris[-1L], verbose = FALSE, quant_approx = 5L)
+  s <- hstats(fit, X = iris[-1L], verbose = FALSE, approx = TRUE, grid_size = 5L)
   expect_true(h2(s)$M > 0)
 })
 
@@ -312,7 +314,8 @@ test_that("matrix case works as well", {
     v = colnames(iris[2:4]), 
     pred_fun = pred_fun, 
     verbose = FALSE, 
-    quant_approx = 5L
+    approx = TRUE,
+    grid_size = 20L
   )
   expect_equal(c(h2_overall(s)$M), c(0, 0, 0))
 })
diff --git a/tests/testthat/test_statistics.R b/tests/testthat/test_statistics.R
index a43d4232..06662b8c 100644
--- a/tests/testthat/test_statistics.R
+++ b/tests/testthat/test_statistics.R
@@ -1,3 +1,41 @@
+
+test_that("poor_man_stack() works (test could be improved", {
+  y <- c("a", "b", "c")
+  z <- c("aa", "bb", "cc")
+  X <- data.frame(x = 1:3, y = y, z = z)
+  out <- poor_man_stack(X, to_stack = c("y", "z"))
+  xpected <- data.frame(
+    x = rep(1:3, times = 2L), 
+    varying_ = factor(rep(c("y", "z"), each = 3L)),
+    value_ = c(y, z)
+  )
+  expect_equal(out, xpected)
+  
+  expect_error(poor_man_stack(cbind(a = 1:3, b = 2:4), to_stack = "b"))
+})
+
+test_that("mat2df() works (test could be improved)", {
+  mat <- cbind(y = 1:2, z = c(0.5, 0.5))
+  rownames(mat) <- letters[seq_len(nrow(mat))]
+  out <- mat2df(mat)
+  rownames(out) <- NULL
+  xpected <- data.frame(
+    id_ = "Overall", 
+    variable_ = factor(c("a", "b", "a", "b")),
+    varying_ = factor(c("y", "y", "z", "z")),
+    value_ = c(1, 2, 0.5, 0.5),
+    stringsAsFactors = FALSE
+  )
+  expect_equal(out, xpected)
+  
+  mat_no_names <- mat
+  colnames(mat_no_names) <- NULL
+  expect_equal(unique(mat2df(mat_no_names)$varying_), factor(c("y1", "y2")))
+  
+  expect_error(mat2df(head(iris)))
+  expect_error(mat2df(1:4))
+})
+
 test_that("postprocess() works for matrix input", {
   num <- cbind(a = 1:3, b = c(1, 1, 1))
   denom <- cbind(a = 1:3, b = 1:3)