From e90632a9a04bb0b80a769dacd564ed024cd03738 Mon Sep 17 00:00:00 2001
From: Marc Becker <33069354+be-marc@users.noreply.github.com>
Date: Thu, 29 Aug 2024 20:27:00 +0200
Subject: [PATCH 1/4] feat: set blas threads to 1 (#1133)

* feat: set blas threads to 1

* rely on RhpcBLASctl if installed

---------

Co-authored-by: Michel Lang <michellang@gmail.com>
---
 R/worker.R                            | 10 ++++++++++
 man-roxygen/section_parallelization.R |  2 ++
 man/Learner.Rd                        |  2 +-
 man/benchmark.Rd                      |  2 ++
 man/resample.Rd                       |  2 ++
 5 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/R/worker.R b/R/worker.R
index d8543e43a..3ee363ebc 100644
--- a/R/worker.R
+++ b/R/worker.R
@@ -268,6 +268,16 @@ workhorse = function(iteration, task, learner, resampling, param_values = NULL,
       old_blas_threads = RhpcBLASctl::blas_get_num_procs()
       on.exit(RhpcBLASctl::blas_set_num_threads(old_blas_threads), add = TRUE)
       RhpcBLASctl::blas_set_num_threads(1)
+    } else { # try the bare minimum to disable threading of the most popular blas implementations
+      old_blas = Sys.getenv("OPENBLAS_NUM_THREADS")
+      old_mkl = Sys.getenv("MKL_NUM_THREADS")
+      Sys.setenv(OPENBLAS_NUM_THREADS = 1)
+      Sys.setenv(MKL_NUM_THREADS = 1)
+
+      on.exit({
+        Sys.setenv(OPENBLAS_NUM_THREADS = old_blas)
+        Sys.setenv(MKL_NUM_THREADS = old_mkl)
+      }, add = TRUE)
     }
   }
   # restore logger thresholds
diff --git a/man-roxygen/section_parallelization.R b/man-roxygen/section_parallelization.R
index 5d2d1f926..6c4fed578 100644
--- a/man-roxygen/section_parallelization.R
+++ b/man-roxygen/section_parallelization.R
@@ -4,3 +4,5 @@
 #' One job is one resampling iteration, and all jobs are send to an apply function
 #' from \CRANpkg{future.apply} in a single batch.
 #' To select a parallel backend, use [future::plan()].
+#' More on parallelization can be found in the book:
+#' \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html}
diff --git a/man/Learner.Rd b/man/Learner.Rd
index 4381c1758..29da44fa9 100644
--- a/man/Learner.Rd
+++ b/man/Learner.Rd
@@ -300,7 +300,7 @@ Set of hyperparameters.}
 Controls how to execute the code in internal train and predict methods.
 Must be a named character vector with names \code{"train"} and \code{"predict"}.
 Possible values are \code{"none"}, \code{"try"}, \code{"evaluate"} (requires package \CRANpkg{evaluate}) and \code{"callr"} (requires package \CRANpkg{callr}).
-When encapsulation is activated, a fallback learner must be set.
+When encapsulation is activated, a fallback learner must be set,
 If no learner is set in \verb{$fallback}, the default fallback learner is used (see \code{mlr_reflections$task_types}).
 See \code{\link[mlr3misc:encapsulate]{mlr3misc::encapsulate()}} for more details.}
 
diff --git a/man/benchmark.Rd b/man/benchmark.Rd
index 2715e223d..9cfc995f7 100644
--- a/man/benchmark.Rd
+++ b/man/benchmark.Rd
@@ -99,6 +99,8 @@ This function can be parallelized with the \CRANpkg{future} package.
 One job is one resampling iteration, and all jobs are send to an apply function
 from \CRANpkg{future.apply} in a single batch.
 To select a parallel backend, use \code{\link[future:plan]{future::plan()}}.
+More on parallelization can be found in the book:
+\url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html}
 }
 
 \section{Progress Bars}{
diff --git a/man/resample.Rd b/man/resample.Rd
index b89e9be03..b972108ef 100644
--- a/man/resample.Rd
+++ b/man/resample.Rd
@@ -100,6 +100,8 @@ This function can be parallelized with the \CRANpkg{future} package.
 One job is one resampling iteration, and all jobs are send to an apply function
 from \CRANpkg{future.apply} in a single batch.
 To select a parallel backend, use \code{\link[future:plan]{future::plan()}}.
+More on parallelization can be found in the book:
+\url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html}
 }
 
 \section{Progress Bars}{

From 57b6109355ae5219d735a89c188d1b987ae2ff41 Mon Sep 17 00:00:00 2001
From: Marc Becker <33069354+be-marc@users.noreply.github.com>
Date: Thu, 29 Aug 2024 20:35:09 +0200
Subject: [PATCH 2/4] fix: check type of column roles (#1131)

* fix: check type of column roles

* repair checks for tasks with arbitrary row ids

---------

Co-authored-by: Michel Lang <michellang@gmail.com>
---
 R/Task.R                   | 14 ++++++++++++++
 tests/testthat/test_Task.R | 19 +++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/R/Task.R b/R/Task.R
index a5f5bf110..a32cbf077 100644
--- a/R/Task.R
+++ b/R/Task.R
@@ -1195,6 +1195,20 @@ task_check_col_roles = function(self, new_roles) {
     }
   }
 
+  # check weights
+  if (length(new_roles[["weight"]])) {
+    weights = self$backend$data(self$backend$rownames, cols = new_roles[["weight"]])
+    assert_numeric(weights[[1L]], lower = 0, any.missing = FALSE, .var.name = names(weights))
+  }
+
+  # check name
+  if (length(new_roles[["name"]])) {
+    row_names = self$backend$data(self$backend$rownames, cols = new_roles[["name"]])
+    if (!is.character(row_names[[1L]]) && !is.factor(row_names[[1L]])) {
+      stopf("Assertion on '%s' failed: Must be of type 'character' or 'factor', not %s", names(row_names), class(row_names[[1]]))
+    }
+  }
+
   if (inherits(self, "TaskSupervised")) {
     if (length(new_roles$target) == 0L) {
       stopf("Supervised tasks need at least one target column")
diff --git a/tests/testthat/test_Task.R b/tests/testthat/test_Task.R
index f2c47d11b..1fe8b493e 100644
--- a/tests/testthat/test_Task.R
+++ b/tests/testthat/test_Task.R
@@ -283,6 +283,25 @@ test_that("groups/weights work", {
   }, "up to one")
 })
 
+test_that("col roles are valid", {
+  b = as_data_backend(data.table(
+    y = runif(20),
+    logical = sample(c(TRUE, FALSE), 20, replace = TRUE),
+    numeric = runif(20),
+    integer = sample(1:3, 20, replace = TRUE),
+    factor = factor(sample(letters[1:3], 20, replace = TRUE))))
+  task = TaskRegr$new("test", b, target = "y")
+
+  # weight
+  expect_error(task$set_col_roles("logical", roles = "weight"), "type")
+  expect_error(task$set_col_roles("factor", roles = "weight"), "type")
+
+  # name
+  expect_error(task$set_col_roles("logical", roles = "name"), "type")
+  expect_error(task$set_col_roles("integer", roles = "name"), "type")
+  expect_error(task$set_col_roles("numeric", roles = "name"), "type")
+})
+
 test_that("ordered factors (#95)", {
   df = data.frame(
     x = c(1, 2, 3),

From f07c045943005a26a8a10d06f1cf50589b3b5eff Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Fri, 30 Aug 2024 10:53:34 +0200
Subject: [PATCH 3/4] feat: set default fallback with set_fallback

---
 DESCRIPTION                        |  1 +
 R/Learner.R                        |  9 ++----
 R/set_fallback.R                   | 45 ++++++++++++++++++++++++++++++
 man/set_fallback.Rd                | 20 +++++++++++++
 tests/testthat/test_set_fallback.R | 29 +++++++++++++++++++
 5 files changed, 97 insertions(+), 7 deletions(-)
 create mode 100644 R/set_fallback.R
 create mode 100644 man/set_fallback.Rd
 create mode 100644 tests/testthat/test_set_fallback.R

diff --git a/DESCRIPTION b/DESCRIPTION
index ca7492a28..da2312a96 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -196,6 +196,7 @@ Collate:
     'predict.R'
     'reexports.R'
     'resample.R'
+    'set_fallback.R'
     'set_threads.R'
     'set_validate.R'
     'task_converters.R'
diff --git a/R/Learner.R b/R/Learner.R
index b3ff1840e..126d89581 100644
--- a/R/Learner.R
+++ b/R/Learner.R
@@ -567,13 +567,8 @@ Learner = R6Class("Learner",
       assert_names(names(rhs), subset.of = c("train", "predict"))
       private$.encapsulate = insert_named(default, rhs)
 
-      if (is.null(private$.fallback)) {
-        # if there is no fallback, we get a default one from the reflections table
-        fallback_id = mlr_reflections$learner_fallback[[self$task_type]]
-        if (!is.null(fallback_id)) {
-          self$fallback = lrn(fallback_id, predict_type = self$predict_type)
-        }
-      }
+      # if there is no fallback, we get a default one from the reflections table
+      if (is.null(private$.fallback)) set_fallback(self)
     },
 
     #' @field fallback ([Learner])\cr
diff --git a/R/set_fallback.R b/R/set_fallback.R
new file mode 100644
index 000000000..cc7d11c69
--- /dev/null
+++ b/R/set_fallback.R
@@ -0,0 +1,45 @@
+#' @title Set a Fallback Learner
+#'
+#' @description
+#' Set a fallback learner for a given learner.
+#' The function searches for a suitable fallback learner based on the task type.
+#' Additional checks are performed to ensure that the fallback learner supports the predict type.
+#'
+#' @param learner [Learner]\cr
+#'  The learner for which a fallback learner should be set.
+#'
+#' @return
+#' Returns the learner itself, but modified **by reference**.
+set_fallback = function(learner) {
+  assert_learner(learner)
+
+  # search for suitable fallback learner
+  fallback_id = mlr_reflections$learner_fallback[[learner$task_type]]
+
+  if (is.null(fallback_id)) {
+    stopf("No fallback learner available for task type '%s'.", learner$task_type)
+  }
+
+  fallback = lrn(fallback_id)
+
+  # set predict type
+  if (learner$predict_type %nin% fallback$predict_types) {
+    stopf("Fallback learner '%s' does not support predict type '%s'.", fallback_id, learner$predict_type)
+  }
+
+  fallback$predict_type = learner$predict_type
+
+  # set quantiles
+  if (learner$predict_type == "quantiles") {
+
+    if (is.null(learner$quantiles) || is.null(learner$quantile_response)) {
+      stopf("Cannot set quantiles for fallback learner. Set `$quantiles` and `$quantile_response` in %s.", learner$id)
+    }
+
+    fallback$quantiles = learner$quantiles
+    fallback$quantile_response = learner$quantile_response
+  }
+
+  learner$fallback = fallback
+  return(learner)
+}
diff --git a/man/set_fallback.Rd b/man/set_fallback.Rd
new file mode 100644
index 000000000..635a13d03
--- /dev/null
+++ b/man/set_fallback.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/set_fallback.R
+\name{set_fallback}
+\alias{set_fallback}
+\title{Set a Fallback Learner}
+\usage{
+set_fallback(learner)
+}
+\arguments{
+\item{learner}{\link{Learner}\cr
+The learner for which a fallback learner should be set.}
+}
+\value{
+Returns the learner itself, but modified \strong{by reference}.
+}
+\description{
+Set a fallback learner for a given learner.
+The function searches for a suitable fallback learner based on the task type.
+Additional checks are performed to ensure that the fallback learner supports the predict type.
+}
diff --git a/tests/testthat/test_set_fallback.R b/tests/testthat/test_set_fallback.R
new file mode 100644
index 000000000..85610fefa
--- /dev/null
+++ b/tests/testthat/test_set_fallback.R
@@ -0,0 +1,29 @@
+test_that("set_fallback() works", {
+  learner = lrn("classif.rpart")
+  set_fallback(learner)
+
+  expect_class(learner, "LearnerClassifRpart")
+  expect_class(learner$fallback, "LearnerClassifFeatureless")
+  expect_equal(learner$fallback$predict_type, "response")
+
+  learner = lrn("classif.rpart", predict_type = "prob")
+  set_fallback(learner)
+
+  expect_class(learner, "LearnerClassifRpart")
+  expect_class(learner$fallback, "LearnerClassifFeatureless")
+  expect_equal(learner$fallback$predict_type, "prob")
+
+  learner = lrn("regr.rpart")
+  set_fallback(learner)
+
+  expect_class(learner, "LearnerRegrRpart")
+  expect_class(learner$fallback, "LearnerRegrFeatureless")
+  expect_equal(learner$fallback$predict_type, "response")
+
+  learner = lrn("regr.debug", predict_type = "se")
+  set_fallback(learner)
+
+  expect_class(learner, "LearnerRegrDebug")
+  expect_class(learner$fallback, "LearnerRegrFeatureless")
+  expect_equal(learner$fallback$predict_type, "se")
+})

From beeb1b3366d801469d9406e10783312051c274f9 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Fri, 30 Aug 2024 11:03:25 +0200
Subject: [PATCH 4/4] chore: news

---
 NEWS.md              | 1 +
 pkgdown/_pkgdown.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 86a157ec9..7541656b3 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -20,6 +20,7 @@
 * feat: Add option to calculate the mean of the true values on the train set in `msr("regr.rsq")`.
 * feat: Default fallback learner is set when encapsulation is activated.
 * feat: Learners classif.debug and regr.debug have new methods `$importance()` and `$selected_features()` for testing, also in downstream packages
+* feat: Set default fallback with `set_fallback()`.
 
 # mlr3 0.20.2
 
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index 41461b0e7..b72a2dbae 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -78,6 +78,7 @@ reference:
       - starts_with("mlr_learners")
       - as_learner
       - HotstartStack
+      - set_fallback
   - title: Measures
     contents:
       - starts_with("mlr_measures")