From e90632a9a04bb0b80a769dacd564ed024cd03738 Mon Sep 17 00:00:00 2001 From: Marc Becker <33069354+be-marc@users.noreply.github.com> Date: Thu, 29 Aug 2024 20:27:00 +0200 Subject: [PATCH 1/4] feat: set blas threads to 1 (#1133) * feat: set blas threads to 1 * rely on RhpcBLASctl if installed --------- Co-authored-by: Michel Lang --- R/worker.R | 10 ++++++++++ man-roxygen/section_parallelization.R | 2 ++ man/Learner.Rd | 2 +- man/benchmark.Rd | 2 ++ man/resample.Rd | 2 ++ 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/R/worker.R b/R/worker.R index d8543e43a..3ee363ebc 100644 --- a/R/worker.R +++ b/R/worker.R @@ -268,6 +268,16 @@ workhorse = function(iteration, task, learner, resampling, param_values = NULL, old_blas_threads = RhpcBLASctl::blas_get_num_procs() on.exit(RhpcBLASctl::blas_set_num_threads(old_blas_threads), add = TRUE) RhpcBLASctl::blas_set_num_threads(1) + } else { # try the bare minimum to disable threading of the most popular blas implementations + old_blas = Sys.getenv("OPENBLAS_NUM_THREADS") + old_mkl = Sys.getenv("MKL_NUM_THREADS") + Sys.setenv(OPENBLAS_NUM_THREADS = 1) + Sys.setenv(MKL_NUM_THREADS = 1) + + on.exit({ + Sys.setenv(OPENBLAS_NUM_THREADS = old_blas) + Sys.setenv(MKL_NUM_THREADS = old_mkl) + }, add = TRUE) } } # restore logger thresholds diff --git a/man-roxygen/section_parallelization.R b/man-roxygen/section_parallelization.R index 5d2d1f926..6c4fed578 100644 --- a/man-roxygen/section_parallelization.R +++ b/man-roxygen/section_parallelization.R @@ -4,3 +4,5 @@ #' One job is one resampling iteration, and all jobs are send to an apply function #' from \CRANpkg{future.apply} in a single batch. #' To select a parallel backend, use [future::plan()]. +#' More on parallelization can be found in the book: +#' \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html} diff --git a/man/Learner.Rd b/man/Learner.Rd index 4381c1758..29da44fa9 100644 --- a/man/Learner.Rd +++ b/man/Learner.Rd @@ -300,7 +300,7 @@ Set of hyperparameters.} Controls how to execute the code in internal train and predict methods. Must be a named character vector with names \code{"train"} and \code{"predict"}. Possible values are \code{"none"}, \code{"try"}, \code{"evaluate"} (requires package \CRANpkg{evaluate}) and \code{"callr"} (requires package \CRANpkg{callr}). -When encapsulation is activated, a fallback learner must be set. +When encapsulation is activated, a fallback learner must be set, If no learner is set in \verb{$fallback}, the default fallback learner is used (see \code{mlr_reflections$task_types}). See \code{\link[mlr3misc:encapsulate]{mlr3misc::encapsulate()}} for more details.} diff --git a/man/benchmark.Rd b/man/benchmark.Rd index 2715e223d..9cfc995f7 100644 --- a/man/benchmark.Rd +++ b/man/benchmark.Rd @@ -99,6 +99,8 @@ This function can be parallelized with the \CRANpkg{future} package. One job is one resampling iteration, and all jobs are send to an apply function from \CRANpkg{future.apply} in a single batch. To select a parallel backend, use \code{\link[future:plan]{future::plan()}}. +More on parallelization can be found in the book: +\url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html} } \section{Progress Bars}{ diff --git a/man/resample.Rd b/man/resample.Rd index b89e9be03..b972108ef 100644 --- a/man/resample.Rd +++ b/man/resample.Rd @@ -100,6 +100,8 @@ This function can be parallelized with the \CRANpkg{future} package. One job is one resampling iteration, and all jobs are send to an apply function from \CRANpkg{future.apply} in a single batch. To select a parallel backend, use \code{\link[future:plan]{future::plan()}}. +More on parallelization can be found in the book: +\url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html} } \section{Progress Bars}{ From 57b6109355ae5219d735a89c188d1b987ae2ff41 Mon Sep 17 00:00:00 2001 From: Marc Becker <33069354+be-marc@users.noreply.github.com> Date: Thu, 29 Aug 2024 20:35:09 +0200 Subject: [PATCH 2/4] fix: check type of column roles (#1131) * fix: check type of column roles * repair checks for tasks with arbitrary row ids --------- Co-authored-by: Michel Lang --- R/Task.R | 14 ++++++++++++++ tests/testthat/test_Task.R | 19 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/R/Task.R b/R/Task.R index a5f5bf110..a32cbf077 100644 --- a/R/Task.R +++ b/R/Task.R @@ -1195,6 +1195,20 @@ task_check_col_roles = function(self, new_roles) { } } + # check weights + if (length(new_roles[["weight"]])) { + weights = self$backend$data(self$backend$rownames, cols = new_roles[["weight"]]) + assert_numeric(weights[[1L]], lower = 0, any.missing = FALSE, .var.name = names(weights)) + } + + # check name + if (length(new_roles[["name"]])) { + row_names = self$backend$data(self$backend$rownames, cols = new_roles[["name"]]) + if (!is.character(row_names[[1L]]) && !is.factor(row_names[[1L]])) { + stopf("Assertion on '%s' failed: Must be of type 'character' or 'factor', not %s", names(row_names), class(row_names[[1]])) + } + } + if (inherits(self, "TaskSupervised")) { if (length(new_roles$target) == 0L) { stopf("Supervised tasks need at least one target column") diff --git a/tests/testthat/test_Task.R b/tests/testthat/test_Task.R index f2c47d11b..1fe8b493e 100644 --- a/tests/testthat/test_Task.R +++ b/tests/testthat/test_Task.R @@ -283,6 +283,25 @@ test_that("groups/weights work", { }, "up to one") }) +test_that("col roles are valid", { + b = as_data_backend(data.table( + y = runif(20), + logical = sample(c(TRUE, FALSE), 20, replace = TRUE), + numeric = runif(20), + integer = sample(1:3, 20, replace = TRUE), + factor = factor(sample(letters[1:3], 20, replace = TRUE)))) + task = TaskRegr$new("test", b, target = "y") + + # weight + expect_error(task$set_col_roles("logical", roles = "weight"), "type") + expect_error(task$set_col_roles("factor", roles = "weight"), "type") + + # name + expect_error(task$set_col_roles("logical", roles = "name"), "type") + expect_error(task$set_col_roles("integer", roles = "name"), "type") + expect_error(task$set_col_roles("numeric", roles = "name"), "type") +}) + test_that("ordered factors (#95)", { df = data.frame( x = c(1, 2, 3), From f07c045943005a26a8a10d06f1cf50589b3b5eff Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 30 Aug 2024 10:53:34 +0200 Subject: [PATCH 3/4] feat: set default fallback with set_fallback --- DESCRIPTION | 1 + R/Learner.R | 9 ++---- R/set_fallback.R | 45 ++++++++++++++++++++++++++++++ man/set_fallback.Rd | 20 +++++++++++++ tests/testthat/test_set_fallback.R | 29 +++++++++++++++++++ 5 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 R/set_fallback.R create mode 100644 man/set_fallback.Rd create mode 100644 tests/testthat/test_set_fallback.R diff --git a/DESCRIPTION b/DESCRIPTION index ca7492a28..da2312a96 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -196,6 +196,7 @@ Collate: 'predict.R' 'reexports.R' 'resample.R' + 'set_fallback.R' 'set_threads.R' 'set_validate.R' 'task_converters.R' diff --git a/R/Learner.R b/R/Learner.R index b3ff1840e..126d89581 100644 --- a/R/Learner.R +++ b/R/Learner.R @@ -567,13 +567,8 @@ Learner = R6Class("Learner", assert_names(names(rhs), subset.of = c("train", "predict")) private$.encapsulate = insert_named(default, rhs) - if (is.null(private$.fallback)) { - # if there is no fallback, we get a default one from the reflections table - fallback_id = mlr_reflections$learner_fallback[[self$task_type]] - if (!is.null(fallback_id)) { - self$fallback = lrn(fallback_id, predict_type = self$predict_type) - } - } + # if there is no fallback, we get a default one from the reflections table + if (is.null(private$.fallback)) set_fallback(self) }, #' @field fallback ([Learner])\cr diff --git a/R/set_fallback.R b/R/set_fallback.R new file mode 100644 index 000000000..cc7d11c69 --- /dev/null +++ b/R/set_fallback.R @@ -0,0 +1,45 @@ +#' @title Set a Fallback Learner +#' +#' @description +#' Set a fallback learner for a given learner. +#' The function searches for a suitable fallback learner based on the task type. +#' Additional checks are performed to ensure that the fallback learner supports the predict type. +#' +#' @param learner [Learner]\cr +#' The learner for which a fallback learner should be set. +#' +#' @return +#' Returns the learner itself, but modified **by reference**. +set_fallback = function(learner) { + assert_learner(learner) + + # search for suitable fallback learner + fallback_id = mlr_reflections$learner_fallback[[learner$task_type]] + + if (is.null(fallback_id)) { + stopf("No fallback learner available for task type '%s'.", learner$task_type) + } + + fallback = lrn(fallback_id) + + # set predict type + if (learner$predict_type %nin% fallback$predict_types) { + stopf("Fallback learner '%s' does not support predict type '%s'.", fallback_id, learner$predict_type) + } + + fallback$predict_type = learner$predict_type + + # set quantiles + if (learner$predict_type == "quantiles") { + + if (is.null(learner$quantiles) || is.null(learner$quantile_response)) { + stopf("Cannot set quantiles for fallback learner. Set `$quantiles` and `$quantile_response` in %s.", learner$id) + } + + fallback$quantiles = learner$quantiles + fallback$quantile_response = learner$quantile_response + } + + learner$fallback = fallback + return(learner) +} diff --git a/man/set_fallback.Rd b/man/set_fallback.Rd new file mode 100644 index 000000000..635a13d03 --- /dev/null +++ b/man/set_fallback.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/set_fallback.R +\name{set_fallback} +\alias{set_fallback} +\title{Set a Fallback Learner} +\usage{ +set_fallback(learner) +} +\arguments{ +\item{learner}{\link{Learner}\cr +The learner for which a fallback learner should be set.} +} +\value{ +Returns the learner itself, but modified \strong{by reference}. +} +\description{ +Set a fallback learner for a given learner. +The function searches for a suitable fallback learner based on the task type. +Additional checks are performed to ensure that the fallback learner supports the predict type. +} diff --git a/tests/testthat/test_set_fallback.R b/tests/testthat/test_set_fallback.R new file mode 100644 index 000000000..85610fefa --- /dev/null +++ b/tests/testthat/test_set_fallback.R @@ -0,0 +1,29 @@ +test_that("set_fallback() works", { + learner = lrn("classif.rpart") + set_fallback(learner) + + expect_class(learner, "LearnerClassifRpart") + expect_class(learner$fallback, "LearnerClassifFeatureless") + expect_equal(learner$fallback$predict_type, "response") + + learner = lrn("classif.rpart", predict_type = "prob") + set_fallback(learner) + + expect_class(learner, "LearnerClassifRpart") + expect_class(learner$fallback, "LearnerClassifFeatureless") + expect_equal(learner$fallback$predict_type, "prob") + + learner = lrn("regr.rpart") + set_fallback(learner) + + expect_class(learner, "LearnerRegrRpart") + expect_class(learner$fallback, "LearnerRegrFeatureless") + expect_equal(learner$fallback$predict_type, "response") + + learner = lrn("regr.debug", predict_type = "se") + set_fallback(learner) + + expect_class(learner, "LearnerRegrDebug") + expect_class(learner$fallback, "LearnerRegrFeatureless") + expect_equal(learner$fallback$predict_type, "se") +}) From beeb1b3366d801469d9406e10783312051c274f9 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 30 Aug 2024 11:03:25 +0200 Subject: [PATCH 4/4] chore: news --- NEWS.md | 1 + pkgdown/_pkgdown.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 86a157ec9..7541656b3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,6 +20,7 @@ * feat: Add option to calculate the mean of the true values on the train set in `msr("regr.rsq")`. * feat: Default fallback learner is set when encapsulation is activated. * feat: Learners classif.debug and regr.debug have new methods `$importance()` and `$selected_features()` for testing, also in downstream packages +* feat: Set default fallback with `set_fallback()`. # mlr3 0.20.2 diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 41461b0e7..b72a2dbae 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -78,6 +78,7 @@ reference: - starts_with("mlr_learners") - as_learner - HotstartStack + - set_fallback - title: Measures contents: - starts_with("mlr_measures")