From 7190f453710c4aa546db6ab9f23516ccaee03960 Mon Sep 17 00:00:00 2001 From: Marc Becker <33069354+be-marc@users.noreply.github.com> Date: Sat, 31 Aug 2024 10:50:25 +0200 Subject: [PATCH] docs: autotest (#1128) * refactor: autotest * chore: return * docs: update * chore: add link * docs: render * docs: add tasks --- R/mlr_test_helpers.R | 3 +- inst/testthat/helper_autotest.R | 203 ++++++++++++++++++++++++-------- man/mlr_test_helpers.Rd | 1 + tests/testthat/test_autotest.R | 27 +++++ 4 files changed, 183 insertions(+), 51 deletions(-) create mode 100644 tests/testthat/test_autotest.R diff --git a/R/mlr_test_helpers.R b/R/mlr_test_helpers.R index 7604bd003..35568ca3c 100644 --- a/R/mlr_test_helpers.R +++ b/R/mlr_test_helpers.R @@ -24,6 +24,7 @@ #' the task, learner and prediction of the returned `result`. #' #' For example usages you can look at the autotests in various mlr3 source repositories such as mlr3learners. +#' More information can be found in the `inst/testthat/autotest.R` file. #' #' **Parameters**: #' @@ -42,7 +43,7 @@ #' Whether to check that running the learner twice with the same seed should result in identical predictions. #' Default is `TRUE`. #' * `configure_learner` (`function(learner, task)`)\cr -#' Before running a `learner` on a `task`, this function allows to change its parameter values depending on the input task. +#' Before running a `learner` on a `task`, this function allows to change its parameter values depending on the input task. #' #' @section run_paramtest(): #' diff --git a/inst/testthat/helper_autotest.R b/inst/testthat/helper_autotest.R index 007ba6823..4993c35f5 100644 --- a/inst/testthat/helper_autotest.R +++ b/inst/testthat/helper_autotest.R @@ -1,17 +1,38 @@ -# Learner autotest suite -# -# `run_experiment(task, learner)` runs a single experiment. -# Returns a list with success flag "status" (`logical(1)`), -# "experiment" (partially constructed experiment), and "error" -# (informative error message). -# -# `run_autotest(learner)` generates multiple tasks, depending on the properties of the learner. -# and tests the learner on each task, with each predict type. -# To debug, simply run `result = run_autotest(learner)` and proceed with investigating -# the task, learner and prediction of the returned `result`. +#' @title Learner Autotest Suite +#' +#' @description +#' The autotest suite is a collection of functions to test learners in a standardized way. +#' Extension packages need to specialize the S3 methods in the file. # -# NB: Extension packages need to specialize the S3 methods in the file. +#' @details +#' `run_autotest(learner)` generates multiple tasks, depending on the properties of the learner and tests the learner on each task, with each predict type. +#' Calls `generate_tasks()` to generate tasks and `run_experiment()` to run the experiments. +#' See `generate_tasks()` for a list of tasks that are generated. +#' To debug, simply run `result = run_autotest(learner)` and proceed with investigating he task, learner and prediction of the returned `result`. +#' +#' `run_experiment(task, learner)` runs a single experiment. +#' Calls `train()` and `predict()` on the learner and checks the prediction with `score()`. +#' The prediction is checked with `sanity_check()`. +#' +#' `generate_tasks(learner)` generates multiple tasks for a given learner. +#' Calls `generate_data()` and `generate_generic_tasks()` to generate tasks with different feature types. +#' +#' @noRd +NULL +#' @title Generate Tasks for a Learner +#' +#' @description +#' Generates multiple tasks for a given [Learner], based on its properties. +#' +#' @param learner [Learner]\cr +#' Learner to generate tasks for. +#' @param proto [Task]\cr +#' Prototype task to generate tasks from. +#' +#' @return (List of [Task]s). +#' +#' @noRd generate_generic_tasks = function(learner, proto) { tasks = list() n = proto$nrow @@ -76,6 +97,20 @@ generate_generic_tasks = function(learner, proto) { }) } +#' @title Generate Data for a Learner +#' +#' @description +#' Generates data for a given [Learner], based on its supported feature types. +#' Data is created for logical, integer, numeric, character, factor, ordered, and POSIXct features. +#' +#' @param learner [Learner]\cr +#' Learner to generate data for. +#' @param N `integer(1)`\cr +#' Number of rows of generated data. +#' +#' @return [data.table::data.table()] +#' +#' @noRd generate_data = function(learner, N) { generate_feature = function(type) { switch(type, @@ -96,14 +131,22 @@ generate_data = function(learner, N) { #' #' @description #' Generates multiple tasks for a given [Learner], based on its properties. -#' This function is primarily used for unit tests, but can also assist while -#' writing custom learners. +#' This function is primarily used for unit tests, but can also assist while writing custom learners. +#' The following tasks are created: +#' * `feat_single_*`: Tasks with a single feature type. +#' * `feat_all_*`: Task with all supported feature types. +#' * `missings_*`: Task with missing values. +#' * `utf8_feature_names_*`: Task with non-ascii feature names. +#' * `sanity`: Task with a simple dataset to check if the learner is working. +#' * `sanity_reordered`: Task with the same dataset as `sanity`, but with reordered columns. +#' * `sanity_switched`: Task with the same dataset as `sanity`, but with the positive class switched. #' -#' @param learner :: [Learner]. -#' @param N :: `integer(1)`\cr +#' @param learner [Learner]\cr +#' Learner to generate tasks for. +#' @param N `integer(1)`\cr #' Number of rows of generated tasks. #' -#' @return (List of [Task]s). +#' @return `list` of [Task]s #' @keywords internal #' @export #' @examples @@ -184,6 +227,17 @@ generate_tasks.LearnerRegr = function(learner, N = 30L) { } registerS3method("generate_tasks", "LearnerRegr", generate_tasks.LearnerRegr) +#' @title Sanity Check for Predictions +#' +#' @description +#' Checks the sanity of a prediction. +#' +#' @param prediction [Prediction]\cr +#' Prediction to check. +#' +#' @return (`logical(1)`). +#' +#' @noRd sanity_check = function(prediction, ...) { UseMethod("sanity_check") } @@ -199,7 +253,34 @@ sanity_check.PredictionRegr = function(prediction, ...) { } registerS3method("sanity_check", "LearnerRegr", sanity_check.PredictionRegr) + +#' @title Run a Single Learner Test +#' +#' @description +#' Runs a single experiment with a given task and learner. +#' +#' @param task [Task]\cr +#' Task to run the experiment on. +#' @param learner [Learner]\cr +#' Learner to run the experiment with. +#' @param seed `integer(1)`\cr +#' Seed to use for the experiment. +#' If `NULL`, a random seed is generated. +#' @param configure_learner `function(learner, task)`\cr +#' Function to configure the learner before training. +#' Useful when learner settings need to be adjusted for a specific task. +#' +#' @return `list` with the following elements: +#' - `ok` (`logical(1)`): Success flag. +#' - `learner` ([Learner]): Learner used for the experiment. +#' - `prediction` ([Prediction]): Prediction object. +#' - `error` (`character()`): Error message if `ok` is `FALSE`. +#' - `seed` (`integer(1)`): Seed used for the experiment. +#' +#' @noRd run_experiment = function(task, learner, seed = NULL, configure_learner = NULL) { + + # function to collect error message and objects err = function(info, ...) { info = sprintf(info, ...) list( @@ -210,6 +291,7 @@ run_experiment = function(task, learner, seed = NULL, configure_learner = NULL) ) } + # seed handling if (is.null(seed)) { seed = sample.int(floor(.Machine$integer.max / 2L), 1L) } @@ -230,31 +312,27 @@ run_experiment = function(task, learner, seed = NULL, configure_learner = NULL) } prediction = NULL score = NULL - learner$encapsulate = c(train = "evaluate", predict = "evaluate") + # check train stage = "train()" + ok = try(learner$train(task), silent = TRUE) if (inherits(ok, "try-error")) { return(err(as.character(ok))) } - log = learner$log[stage == "train"] - if ("error" %in% log$class) { - return(err("train log has errors: %s", mlr3misc::str_collapse(log[class == "error", msg]))) - } if (is.null(learner$model)) { return(err("model is NULL")) } + # check predict stage = "predict()" prediction = try(learner$predict(task), silent = TRUE) - if (inherits(ok, "try-error")) { + if (inherits(prediction, "try-error")) { + ok = prediction + prediction = NULL return(err(as.character(ok))) } - log = learner$log[stage == "predict"] - if ("error" %in% log$class) { - return(err("predict log has errors: %s", mlr3misc::str_collapse(log[class == "error", msg]))) - } msg = checkmate::check_class(prediction, "Prediction") if (!isTRUE(msg)) { return(err(msg)) @@ -294,8 +372,9 @@ run_experiment = function(task, learner, seed = NULL, configure_learner = NULL) } } - + # check score stage = "score()" + score = try( prediction$score(mlr3::default_measures(learner$task_type), task = task, @@ -303,7 +382,9 @@ run_experiment = function(task, learner, seed = NULL, configure_learner = NULL) train_set = task$row_ids ), silent = TRUE) if (inherits(score, "try-error")) { - return(err(as.character(score))) + ok = score + score = NULL + return(err(as.character(ok))) } msg = checkmate::check_numeric(score, any.missing = FALSE) if (!isTRUE(msg)) { @@ -311,11 +392,11 @@ run_experiment = function(task, learner, seed = NULL, configure_learner = NULL) } # run sanity check on sanity task - if (startsWith(task$id, "sanity") && ! - sanity_check(prediction, task = task, learner = learner, train_set = task$row_ids)) { + if (startsWith(task$id, "sanity") && !sanity_check(prediction, task = task, learner = learner, train_set = task$row_ids)) { return(err("sanity check failed")) } + # check importance, selected_features and oob_error methods if (startsWith(task$id, "feat_all")) { if ("importance" %in% learner$properties) { importance = learner$importance() @@ -352,6 +433,37 @@ run_experiment = function(task, learner, seed = NULL, configure_learner = NULL) return(list(ok = TRUE, learner = learner, prediction = prediction, error = character(), seed = seed)) } +#' @title Run Autotest for a Learner +#' +#' @description +#' Runs a series of experiments with a given learner on multiple tasks. +#' +#' @param learner ([Learner])\cr +#' The learner to test. +#' @param N (`integer(1)`)\cr +#' Number of rows of generated tasks. +#' @param exclude (`character()`)\cr +#' Regular expression to exclude tasks from the test. +#' Run `generate_tasks(learner)` to see all available tasks. +#' @param predict_types (`character()`)\cr +#' Predict types to test. +#' @param check_replicable (`logical(1)`)\cr +#' Check if the results are replicable. +#' @param configure_learner (`function(learner, task)`)\cr +#' Function to configure the learner before training. +#' Useful when learner settings need to be adjusted for a specific task. +#' +#' @return If the test was successful, `TRUE` is returned. +#' If the test failed, a `list` with the following elements is returned: +#' - `ok` (`logical(1)`): Success flag. +#' - `seed` (`integer(1)`): Seed used for the experiment. +#' - `task` ([Task]): Task used for the experiment. +#' - `learner` ([Learner]): Learner used for the experiment. +#' - `prediction` ([Prediction]): Prediction object. +#' - `score` (`numeric(1)`): Score of the prediction. +#' - `error` (`character()`): Error message if `ok` is `FALSE`. +# +#' @noRd run_autotest = function(learner, N = 30L, exclude = NULL, predict_types = learner$predict_types, check_replicable = TRUE, configure_learner = NULL) { # nolint if (!is.null(configure_learner)) { checkmate::assert_function(configure_learner, args = c("learner", "task")) @@ -359,11 +471,11 @@ run_autotest = function(learner, N = 30L, exclude = NULL, predict_types = learne learner = learner$clone(deep = TRUE) id = learner$id tasks = generate_tasks(learner, N = N) + if (!is.null(exclude)) { tasks = tasks[!grepl(exclude, names(tasks))] } - sanity_runs = list() make_err = function(msg, ...) { run$ok = FALSE @@ -371,11 +483,6 @@ run_autotest = function(learner, N = 30L, exclude = NULL, predict_types = learne run } - # param_tags = unique(unlist(learner$param_set$tags)) - # if (!test_subset(param_tags, mlr_reflections$learner_param_tags)) { - # return(list(ok = FALSE, error = "Invalid parameter tag(s), check `mlr_reflections$learner_param_tags`.")) - # } - for (task in tasks) { for (predict_type in predict_types) { learner$id = sprintf("%s:%s", id, predict_type) @@ -415,26 +522,22 @@ run_autotest = function(learner, N = 30L, exclude = NULL, predict_types = learne } } - - return(TRUE) } #' @title Check Parameters of mlr3 Learners -#' @description Checks parameters of mlr3learners against parameters defined in -#' the upstream functions of the respective learners. +#' +#' @description +#' Checks parameters of mlr3learners against parameters defined in the upstream functions of the respective learners. #' #' @details -#' Some learners do not have all of their parameters stored within the learner -#' function that is called within `.train()`. Sometimes learners come with a -#' "control" function, e.g. [glmnet::glmnet.control()]. Such need to be checked -#' as well since they make up the full ParamSet of the respective learner. +#' Some learners do not have all of their parameters stored within the learner function that is called within `.train()`. +#' Sometimes learners come with a "control" function, e.g. [glmnet::glmnet.control()]. +#' Such need to be checked as well since they make up the full ParamSet of the respective learner. #' -#' To work nicely with the defined ParamSet, certain parameters need to be -#' excluded because these are only present in either the "control" object or the -#' actual top-level function call. Such exclusions should go into argument -#' `exclude` with a comment for the reason of the exclusion. See examples for -#' more information. +#' To work nicely with the defined ParamSet, certain parameters need to be excluded because these are only present in either the "control" object or the actual top-level function call. +#' Such exclusions should go into argument `exclude` with a comment for the reason of the exclusion. +#' See examples for more information. #' #' @param learner ([mlr3::Learner])\cr #' The constructed learner. diff --git a/man/mlr_test_helpers.Rd b/man/mlr_test_helpers.Rd index 0a008043f..9f099eede 100644 --- a/man/mlr_test_helpers.Rd +++ b/man/mlr_test_helpers.Rd @@ -29,6 +29,7 @@ To debug, simply run \code{result = run_autotest(learner)} and proceed with inve the task, learner and prediction of the returned \code{result}. For example usages you can look at the autotests in various mlr3 source repositories such as mlr3learners. +More information can be found in the \code{inst/testthat/autotest.R} file. \strong{Parameters}: \itemize{ diff --git a/tests/testthat/test_autotest.R b/tests/testthat/test_autotest.R new file mode 100644 index 000000000..fab0a7f03 --- /dev/null +++ b/tests/testthat/test_autotest.R @@ -0,0 +1,27 @@ +test_that("autotest catches error in train", { + learner = lrn("classif.debug", error_train = 1) + task = tsk("spam") + + result = run_experiment(task, learner) + expect_false(result$ok) + expect_integer(result$seed) + expect_task(result$task) + expect_learner(result$learner) + expect_null(result$prediction) + expect_null(result$score) + expect_string(result$error) +}) + +test_that("autotest catches error in predict", { + learner = lrn("classif.debug", error_predict = 1) + task = tsk("spam") + + result = run_experiment(task, learner) + expect_false(result$ok) + expect_integer(result$seed) + expect_task(result$task) + expect_learner(result$learner) + expect_null(result$prediction) + expect_null(result$score) + expect_string(result$error) +})