diff --git a/DESCRIPTION b/DESCRIPTION index d83c3d5e4..0a9bc9e62 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -156,7 +156,7 @@ Collate: 'TaskGeneratorSpirals.R' 'TaskGeneratorXor.R' 'TaskRegr.R' - 'TaskRegr_boston_housing.R' + 'TaskRegr_ames_housing.R' 'TaskRegr_mtcars.R' 'TaskUnsupervised.R' 'as_benchmark_result.R' diff --git a/NEWS.md b/NEWS.md index 5a65e7e34..2fe2a8002 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # mlr3 (development version) * fix: Quantiles must not ascend with probabilities. +* refactor: Replace `tsk("boston_housing")` with `tsk("ames_housing")`. # mlr3 0.21.1 diff --git a/R/PredictionRegr.R b/R/PredictionRegr.R index 62a599aff..53ceb792c 100644 --- a/R/PredictionRegr.R +++ b/R/PredictionRegr.R @@ -10,7 +10,7 @@ #' @template seealso_prediction #' @export #' @examples -#' task = tsk("boston_housing") +#' task = tsk("ames_housing") #' learner = lrn("regr.featureless", predict_type = "se") #' p = learner$train(task)$predict(task) #' p$predict_types diff --git a/R/Task.R b/R/Task.R index c0bc73fd5..da439e3ec 100644 --- a/R/Task.R +++ b/R/Task.R @@ -16,7 +16,7 @@ #' For example, for a classification task a single column must be marked as target column, and others as features. #' #' Predefined (toy) tasks are stored in the [dictionary][mlr3misc::Dictionary] [mlr_tasks], -#' e.g. [`penguins`][mlr_tasks_penguins] or [`boston_housing`][mlr_tasks_boston_housing]. +#' e.g. [`penguins`][mlr_tasks_penguins] or [`ames_housing`][mlr_tasks_ames_housing]. #' More toy tasks can be found in the dictionary after loading \CRANpkg{mlr3data}. #' #' @template param_id diff --git a/R/TaskRegr_ames_housing.R b/R/TaskRegr_ames_housing.R new file mode 100644 index 000000000..dfd14ceef --- /dev/null +++ b/R/TaskRegr_ames_housing.R @@ -0,0 +1,35 @@ +#' @title House Sales in Ames, Iowa +#' +#' @name ames_housing +#' @format [R6::R6Class] inheriting from [TaskRegr]. +#' @aliases mlr_tasks_ames_housing +#' +#' @description +#' A regression task to predict house sale prices for Ames, Iowa. +#' This is the processed version from the [AmesHousing::make_ames()] package. +#' +#' Contains 80 features and 2930 observations. +#' Target column is `"Sale_Price"`. +#' +#' @section Construction: +#' ``` +#' mlr_tasks$get("ames_housing") +#' tsk("ames_housing") +#' ``` +#' +#' @section Meta Information: +#' `r rd_info(tsk("ames_housing"))` +#' +#' @template seealso_task +NULL + +load_task_ames_housing = function(id = "ames_housing") { + b = as_data_backend(readRDS(system.file("extdata", "ames_housing.rds", package = "mlr3"))) + task = mlr3::TaskRegr$new(id, b, target = "Sale_Price", label = "Ames House Sales") + b$hash = task$man = "mlr3::mlr_tasks_ames_housing" + task +} + +#' @include mlr_tasks.R +mlr_tasks$add("ames_housing", load_task_ames_housing) + diff --git a/R/TaskRegr_boston_housing.R b/R/TaskRegr_boston_housing.R deleted file mode 100644 index ec2866343..000000000 --- a/R/TaskRegr_boston_housing.R +++ /dev/null @@ -1,33 +0,0 @@ -#' @title Boston Housing Regression Task -#' -#' @name mlr_tasks_boston_housing -#' @format [R6::R6Class] inheriting from [TaskRegr]. -#' @include mlr_tasks.R -#' -#' -#' @description -#' A regression task for the [mlbench::BostonHousing2] data set. -#' This is the corrected data using the corrected median value (`cmedv`) as target. -#' The uncorrected target (`medv`) is removed from the data. -#' -#' @section Construction: -#' ``` -#' mlr_tasks$get("boston_housing") -#' tsk("boston_housing") -#' ``` -#' -#' @section Meta Information: -#' `r rd_info(tsk("boston_housing"))` -#' -#' @template seealso_task -NULL - -load_task_boston_housing = function(id = "boston_housing") { - b = as_data_backend(remove_named(load_dataset("BostonHousing2", "mlbench"), "medv")) - task = TaskRegr$new(id, b, target = "cmedv", label = "Boston Housing Prices") - b$hash = task$man = "mlr3::mlr_tasks_boston_housing" - task -} - -#' @include mlr_tasks.R -mlr_tasks$add("boston_housing", load_task_boston_housing) diff --git a/R/partition.R b/R/partition.R index 908566bfa..f115338f5 100644 --- a/R/partition.R +++ b/R/partition.R @@ -12,7 +12,7 @@ #' @export #' @examples #' # regression task partitioned into training and test set -#' task = tsk("boston_housing") +#' task = tsk("ames_housing") #' split = partition(task, ratio = 0.5) #' data = data.frame( #' y = c(task$truth(split$train), task$truth(split$test)), diff --git a/inst/extdata/ames_housing.R b/inst/extdata/ames_housing.R new file mode 100644 index 000000000..2dfba4791 --- /dev/null +++ b/inst/extdata/ames_housing.R @@ -0,0 +1,3 @@ +root = rprojroot::find_package_root_file() +data = setDT(AmesHousing::make_ames()) +saveRDS(data, file = file.path(root, "inst", "extdata", "ames_housing.rds"), version = 2L) diff --git a/inst/extdata/ames_housing.rds b/inst/extdata/ames_housing.rds new file mode 100644 index 000000000..ad9d8e5f7 Binary files /dev/null and b/inst/extdata/ames_housing.rds differ diff --git a/man/PredictionRegr.Rd b/man/PredictionRegr.Rd index dc8a02e3d..e3ae6eb2f 100644 --- a/man/PredictionRegr.Rd +++ b/man/PredictionRegr.Rd @@ -9,7 +9,7 @@ the predicted response and standard error. Additionally, probability distributions implemented in package \code{distr6} are supported. } \examples{ -task = tsk("boston_housing") +task = tsk("ames_housing") learner = lrn("regr.featureless", predict_type = "se") p = learner$train(task)$predict(task) p$predict_types diff --git a/man/Task.Rd b/man/Task.Rd index bfa5279a8..1e8b81002 100644 --- a/man/Task.Rd +++ b/man/Task.Rd @@ -17,7 +17,7 @@ For example, for a classification task a single column must be marked as target } Predefined (toy) tasks are stored in the \link[mlr3misc:Dictionary]{dictionary} \link{mlr_tasks}, -e.g. \code{\link[=mlr_tasks_penguins]{penguins}} or \code{\link[=mlr_tasks_boston_housing]{boston_housing}}. +e.g. \code{\link[=mlr_tasks_penguins]{penguins}} or \code{\link[=mlr_tasks_ames_housing]{ames_housing}}. More toy tasks can be found in the dictionary after loading \CRANpkg{mlr3data}. } \section{S3 methods}{ @@ -92,8 +92,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/TaskClassif.Rd b/man/TaskClassif.Rd index 207607fa3..1f540d1b9 100644 --- a/man/TaskClassif.Rd +++ b/man/TaskClassif.Rd @@ -50,8 +50,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/TaskRegr.Rd b/man/TaskRegr.Rd index 4648223ea..bf96bb1f8 100644 --- a/man/TaskRegr.Rd +++ b/man/TaskRegr.Rd @@ -40,8 +40,8 @@ Other Task: \code{\link{TaskClassif}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/TaskSupervised.Rd b/man/TaskSupervised.Rd index 27c4df854..e3936ba7e 100644 --- a/man/TaskSupervised.Rd +++ b/man/TaskSupervised.Rd @@ -35,8 +35,8 @@ Other Task: \code{\link{TaskClassif}}, \code{\link{TaskRegr}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/TaskUnsupervised.Rd b/man/TaskUnsupervised.Rd index 488f8acee..e94e1d9e7 100644 --- a/man/TaskUnsupervised.Rd +++ b/man/TaskUnsupervised.Rd @@ -31,8 +31,8 @@ Other Task: \code{\link{TaskClassif}}, \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/ames_housing.Rd b/man/ames_housing.Rd new file mode 100644 index 000000000..e7ba22559 --- /dev/null +++ b/man/ames_housing.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TaskRegr_ames_housing.R +\name{ames_housing} +\alias{ames_housing} +\alias{mlr_tasks_ames_housing} +\title{House Sales in Ames, Iowa} +\format{ +\link[R6:R6Class]{R6::R6Class} inheriting from \link{TaskRegr}. +} +\description{ +A regression task to predict house sale prices for Ames, Iowa. +This is the processed version from the \code{\link[AmesHousing:make_ames]{AmesHousing::make_ames()}} package. + +Contains 80 features and 2930 observations. +Target column is \code{"Sale_Price"}. +} +\section{Construction}{ + + +\if{html}{\out{
}}\preformatted{mlr_tasks$get("ames_housing") +tsk("ames_housing") +}\if{html}{\out{
}} +} + +\section{Meta Information}{ + +\itemize{ +\item Task type: \dQuote{regr} +\item Dimensions: 2930x81 +\item Properties: - +\item Has Missings: \code{FALSE} +\item Target: \dQuote{Sale_Price} +\item Features: \dQuote{Alley}, \dQuote{Bedroom_AbvGr}, \dQuote{Bldg_Type}, \dQuote{BsmtFin_SF_1}, \dQuote{BsmtFin_SF_2}, \dQuote{BsmtFin_Type_1}, \dQuote{BsmtFin_Type_2}, \dQuote{Bsmt_Cond}, \dQuote{Bsmt_Exposure}, \dQuote{Bsmt_Full_Bath}, \dQuote{Bsmt_Half_Bath}, \dQuote{Bsmt_Qual}, \dQuote{Bsmt_Unf_SF}, \dQuote{Central_Air}, \dQuote{Condition_1}, \dQuote{Condition_2}, \dQuote{Electrical}, \dQuote{Enclosed_Porch}, \dQuote{Exter_Cond}, \dQuote{Exter_Qual}, \dQuote{Exterior_1st}, \dQuote{Exterior_2nd}, \dQuote{Fence}, \dQuote{Fireplace_Qu}, \dQuote{Fireplaces}, \dQuote{First_Flr_SF}, \dQuote{Foundation}, \dQuote{Full_Bath}, \dQuote{Functional}, \dQuote{Garage_Area}, \dQuote{Garage_Cars}, \dQuote{Garage_Cond}, \dQuote{Garage_Finish}, \dQuote{Garage_Qual}, \dQuote{Garage_Type}, \dQuote{Gr_Liv_Area}, \dQuote{Half_Bath}, \dQuote{Heating}, \dQuote{Heating_QC}, \dQuote{House_Style}, \dQuote{Kitchen_AbvGr}, \dQuote{Kitchen_Qual}, \dQuote{Land_Contour}, \dQuote{Land_Slope}, \dQuote{Latitude}, \dQuote{Longitude}, \dQuote{Lot_Area}, \dQuote{Lot_Config}, \dQuote{Lot_Frontage}, \dQuote{Lot_Shape}, \dQuote{Low_Qual_Fin_SF}, \dQuote{MS_SubClass}, \dQuote{MS_Zoning}, \dQuote{Mas_Vnr_Area}, \dQuote{Mas_Vnr_Type}, \dQuote{Misc_Feature}, \dQuote{Misc_Val}, \dQuote{Mo_Sold}, \dQuote{Neighborhood}, \dQuote{Open_Porch_SF}, \dQuote{Overall_Cond}, \dQuote{Overall_Qual}, \dQuote{Paved_Drive}, \dQuote{Pool_Area}, \dQuote{Pool_QC}, \dQuote{Roof_Matl}, \dQuote{Roof_Style}, \dQuote{Sale_Condition}, \dQuote{Sale_Type}, \dQuote{Screen_Porch}, \dQuote{Second_Flr_SF}, \dQuote{Street}, \dQuote{Three_season_porch}, \dQuote{TotRms_AbvGrd}, \dQuote{Total_Bsmt_SF}, \dQuote{Utilities}, \dQuote{Wood_Deck_SF}, \dQuote{Year_Built}, \dQuote{Year_Remod_Add}, \dQuote{Year_Sold} +} +} + +\seealso{ +\itemize{ +\item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}: +\url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html} +\item Package \CRANpkg{mlr3data} for more toy tasks. +\item Package \CRANpkg{mlr3oml} for downloading tasks from \url{https://www.openml.org}. +\item Package \CRANpkg{mlr3viz} for some generic visualizations. +\item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Task]{Tasks}: \link{mlr_tasks} +\item \code{as.data.table(mlr_tasks)} for a table of available \link[=Task]{Tasks} in the running session (depending on the loaded packages). +\item \CRANpkg{mlr3fselect} and \CRANpkg{mlr3filters} for feature selection and feature filtering. +\item Extension packages for additional task types: +\itemize{ +\item Unsupervised clustering: \CRANpkg{mlr3cluster} +\item Probabilistic supervised regression and survival analysis: \url{https://mlr3proba.mlr-org.com/}. +} +} + +Other Task: +\code{\link{Task}}, +\code{\link{TaskClassif}}, +\code{\link{TaskRegr}}, +\code{\link{TaskSupervised}}, +\code{\link{TaskUnsupervised}}, +\code{\link{mlr_tasks}}, +\code{\link{mlr_tasks_breast_cancer}}, +\code{\link{mlr_tasks_german_credit}}, +\code{\link{mlr_tasks_iris}}, +\code{\link{mlr_tasks_mtcars}}, +\code{\link{mlr_tasks_penguins}}, +\code{\link{mlr_tasks_pima}}, +\code{\link{mlr_tasks_sonar}}, +\code{\link{mlr_tasks_spam}}, +\code{\link{mlr_tasks_wine}}, +\code{\link{mlr_tasks_zoo}} +} +\concept{Task} diff --git a/man/mlr_tasks.Rd b/man/mlr_tasks.Rd index b0a203136..a6fdd74fc 100644 --- a/man/mlr_tasks.Rd +++ b/man/mlr_tasks.Rd @@ -72,7 +72,7 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, -\code{\link{mlr_tasks_boston_housing}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/mlr_tasks_boston_housing.Rd b/man/mlr_tasks_boston_housing.Rd deleted file mode 100644 index bd1884624..000000000 --- a/man/mlr_tasks_boston_housing.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/TaskRegr_boston_housing.R -\name{mlr_tasks_boston_housing} -\alias{mlr_tasks_boston_housing} -\title{Boston Housing Regression Task} -\format{ -\link[R6:R6Class]{R6::R6Class} inheriting from \link{TaskRegr}. -} -\description{ -A regression task for the \link[mlbench:BostonHousing]{mlbench::BostonHousing2} data set. -This is the corrected data using the corrected median value (\code{cmedv}) as target. -The uncorrected target (\code{medv}) is removed from the data. -} -\section{Construction}{ - - -\if{html}{\out{
}}\preformatted{mlr_tasks$get("boston_housing") -tsk("boston_housing") -}\if{html}{\out{
}} -} - -\section{Meta Information}{ - -\itemize{ -\item Task type: \dQuote{regr} -\item Dimensions: 506x18 -\item Properties: - -\item Has Missings: \code{FALSE} -\item Target: \dQuote{cmedv} -\item Features: \dQuote{age}, \dQuote{b}, \dQuote{chas}, \dQuote{crim}, \dQuote{dis}, \dQuote{indus}, \dQuote{lat}, \dQuote{lon}, \dQuote{lstat}, \dQuote{nox}, \dQuote{ptratio}, \dQuote{rad}, \dQuote{rm}, \dQuote{tax}, \dQuote{town}, \dQuote{tract}, \dQuote{zn} -} -} - -\seealso{ -\itemize{ -\item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}: -\url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html} -\item Package \CRANpkg{mlr3data} for more toy tasks. -\item Package \CRANpkg{mlr3oml} for downloading tasks from \url{https://www.openml.org}. -\item Package \CRANpkg{mlr3viz} for some generic visualizations. -\item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Task]{Tasks}: \link{mlr_tasks} -\item \code{as.data.table(mlr_tasks)} for a table of available \link[=Task]{Tasks} in the running session (depending on the loaded packages). -\item \CRANpkg{mlr3fselect} and \CRANpkg{mlr3filters} for feature selection and feature filtering. -\item Extension packages for additional task types: -\itemize{ -\item Unsupervised clustering: \CRANpkg{mlr3cluster} -\item Probabilistic supervised regression and survival analysis: \url{https://mlr3proba.mlr-org.com/}. -} -} - -Other Task: -\code{\link{Task}}, -\code{\link{TaskClassif}}, -\code{\link{TaskRegr}}, -\code{\link{TaskSupervised}}, -\code{\link{TaskUnsupervised}}, -\code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_breast_cancer}}, -\code{\link{mlr_tasks_german_credit}}, -\code{\link{mlr_tasks_iris}}, -\code{\link{mlr_tasks_mtcars}}, -\code{\link{mlr_tasks_penguins}}, -\code{\link{mlr_tasks_pima}}, -\code{\link{mlr_tasks_sonar}}, -\code{\link{mlr_tasks_spam}}, -\code{\link{mlr_tasks_wine}}, -\code{\link{mlr_tasks_zoo}} -} -\concept{Task} diff --git a/man/mlr_tasks_breast_cancer.Rd b/man/mlr_tasks_breast_cancer.Rd index 40315a0ce..881fe404a 100644 --- a/man/mlr_tasks_breast_cancer.Rd +++ b/man/mlr_tasks_breast_cancer.Rd @@ -60,8 +60,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, \code{\link{mlr_tasks_mtcars}}, diff --git a/man/mlr_tasks_german_credit.Rd b/man/mlr_tasks_german_credit.Rd index 0187302e0..345e7d24d 100644 --- a/man/mlr_tasks_german_credit.Rd +++ b/man/mlr_tasks_german_credit.Rd @@ -83,8 +83,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_iris}}, \code{\link{mlr_tasks_mtcars}}, diff --git a/man/mlr_tasks_iris.Rd b/man/mlr_tasks_iris.Rd index 324a8afa1..eb5bb4df8 100644 --- a/man/mlr_tasks_iris.Rd +++ b/man/mlr_tasks_iris.Rd @@ -61,8 +61,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_mtcars}}, diff --git a/man/mlr_tasks_mtcars.Rd b/man/mlr_tasks_mtcars.Rd index a20ada95e..c2c488b24 100644 --- a/man/mlr_tasks_mtcars.Rd +++ b/man/mlr_tasks_mtcars.Rd @@ -54,8 +54,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/mlr_tasks_penguins.Rd b/man/mlr_tasks_penguins.Rd index 8a35b4228..619ae0f9c 100644 --- a/man/mlr_tasks_penguins.Rd +++ b/man/mlr_tasks_penguins.Rd @@ -73,8 +73,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/mlr_tasks_pima.Rd b/man/mlr_tasks_pima.Rd index c09365db6..bec66c6ae 100644 --- a/man/mlr_tasks_pima.Rd +++ b/man/mlr_tasks_pima.Rd @@ -54,8 +54,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/mlr_tasks_sonar.Rd b/man/mlr_tasks_sonar.Rd index 0d3119e0e..18b2cf488 100644 --- a/man/mlr_tasks_sonar.Rd +++ b/man/mlr_tasks_sonar.Rd @@ -54,8 +54,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/mlr_tasks_spam.Rd b/man/mlr_tasks_spam.Rd index 7ac015fc3..f8d5d143c 100644 --- a/man/mlr_tasks_spam.Rd +++ b/man/mlr_tasks_spam.Rd @@ -72,8 +72,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/mlr_tasks_wine.Rd b/man/mlr_tasks_wine.Rd index 1df0f59a3..8f07ade33 100644 --- a/man/mlr_tasks_wine.Rd +++ b/man/mlr_tasks_wine.Rd @@ -67,8 +67,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/mlr_tasks_zoo.Rd b/man/mlr_tasks_zoo.Rd index 6babf04be..115df635c 100644 --- a/man/mlr_tasks_zoo.Rd +++ b/man/mlr_tasks_zoo.Rd @@ -54,8 +54,8 @@ Other Task: \code{\link{TaskRegr}}, \code{\link{TaskSupervised}}, \code{\link{TaskUnsupervised}}, +\code{\link{ames_housing}}, \code{\link{mlr_tasks}}, -\code{\link{mlr_tasks_boston_housing}}, \code{\link{mlr_tasks_breast_cancer}}, \code{\link{mlr_tasks_german_credit}}, \code{\link{mlr_tasks_iris}}, diff --git a/man/partition.Rd b/man/partition.Rd index da8cc415d..935c79855 100644 --- a/man/partition.Rd +++ b/man/partition.Rd @@ -20,7 +20,7 @@ Creates a split of the row ids of a \link{Task} into a training and a test set, } \examples{ # regression task partitioned into training and test set -task = tsk("boston_housing") +task = tsk("ames_housing") split = partition(task, ratio = 0.5) data = data.frame( y = c(task$truth(split$train), task$truth(split$test)), diff --git a/tests/testthat/test_Learner.R b/tests/testthat/test_Learner.R index 76611bf25..0c3c0e414 100644 --- a/tests/testthat/test_Learner.R +++ b/tests/testthat/test_Learner.R @@ -16,7 +16,7 @@ test_that("clone", { }) test_that("Learners are called with invoke / small footprint of call", { - task = tsk("boston_housing") + task = tsk("ames_housing") learner = lrn("regr.rpart") learner$train(task) call = as.character(learner$model$call) @@ -27,7 +27,7 @@ test_that("Learners are called with invoke / small footprint of call", { }) test_that("Extra data slots of learners are kept / reset", { - task = tsk("boston_housing") + task = tsk("ames_housing") learner = lrn("regr.rpart") learner$train(task) learner$state$foo = "bar" @@ -101,7 +101,7 @@ test_that("train task is properly cloned (#383)", { }) test_that("predict on newdata works / regr", { - task = tsk("boston_housing") + task = tsk("ames_housing") train = which(seq_len(task$nrow) %% 2 == 0L) test = setdiff(seq_len(task$nrow), train) @@ -117,7 +117,7 @@ test_that("predict on newdata works / regr", { test_that("predict on newdata works / no target column", { - task = tsk("boston_housing") + task = tsk("ames_housing") train = which(seq_len(task$nrow) %% 2 == 0L) test = setdiff(seq_len(task$nrow), train) @@ -246,7 +246,7 @@ test_that("fallback learner is deep cloned (#511)", { }) test_that("learner cannot be trained with TuneToken present", { - task = tsk("boston_housing") + task = tsk("ames_housing") learner = lrn("regr.rpart", cp = paradox::to_tune(0.1, 0.3)) expect_error(learner$train(task), regexp = " cannot be trained with TuneToken present in hyperparameter: cp", @@ -307,17 +307,17 @@ test_that("Error on missing data (#413)", { }) test_that("Task prototype is stored in state", { - task = tsk("boston_housing") + task = tsk("ames_housing") learner = lrn("regr.rpart") learner$train(task) prototype = learner$state$data_prototype - expect_data_table(prototype, nrows = 0, ncols = 18) + expect_data_table(prototype, nrows = 0, ncols = 82) expect_names(names(prototype), permutation.of = c(task$feature_names, task$target_names)) }) test_that("Models can be replaced", { - task = tsk("boston_housing") + task = tsk("ames_housing") learner = lrn("regr.featureless") learner$train(task) diff --git a/tests/testthat/test_MeasureRegr.R b/tests/testthat/test_MeasureRegr.R index 0e9dcf760..a43509986 100644 --- a/tests/testthat/test_MeasureRegr.R +++ b/tests/testthat/test_MeasureRegr.R @@ -1,6 +1,6 @@ test_that("Regression measures", { keys = mlr_measures$keys("^regr\\.") - task = tsk("boston_housing") + task = tsk("ames_housing") learner = lrn("regr.rpart") learner$train(task) p = learner$predict(task) diff --git a/tests/testthat/test_PredictionRegr.R b/tests/testthat/test_PredictionRegr.R index fe615a188..fa6911395 100644 --- a/tests/testthat/test_PredictionRegr.R +++ b/tests/testthat/test_PredictionRegr.R @@ -1,12 +1,12 @@ test_that("Construction", { - task = tsk("boston_housing") + task = tsk("ames_housing") p = PredictionRegr$new(row_ids = task$row_ids, truth = task$truth(), response = task$truth()) expect_prediction(p) expect_prediction_regr(p) }) test_that("Internally constructed Prediction", { - task = tsk("boston_housing") + task = tsk("ames_housing") lrn = lrn("regr.featureless") lrn$predict_type = "se" p = lrn$train(task)$predict(task) @@ -16,7 +16,7 @@ test_that("Internally constructed Prediction", { test_that("c", { - task = tsk("boston_housing") + task = tsk("ames_housing") lrn = lrn("regr.featureless") lrn$predict_type = "se" rr = resample(task, lrn, rsmp("cv", folds = 3)) @@ -39,7 +39,7 @@ test_that("c", { }) test_that("c drops se (#250)", { - task = tsk("boston_housing") + task = tsk("ames_housing") lrn = lrn("regr.featureless") rr = resample(task, lrn, rsmp("cv", folds = 3)) diff --git a/tests/testthat/test_Resampling.R b/tests/testthat/test_Resampling.R index bce5a6cc5..6631f7a06 100644 --- a/tests/testthat/test_Resampling.R +++ b/tests/testthat/test_Resampling.R @@ -1,6 +1,6 @@ test_that("re-instantiating", { t1 = tsk("iris") - t2 = tsk("boston_housing") + t2 = tsk("ames_housing") r = rsmp("cv", folds = 2) expect_resampling(r$instantiate(t1), task = t1) diff --git a/tests/testthat/test_Task.R b/tests/testthat/test_Task.R index eb74f95de..acd83724b 100644 --- a/tests/testthat/test_Task.R +++ b/tests/testthat/test_Task.R @@ -1,15 +1,15 @@ test_that("Feature columns can be reordered", { - bh = load_dataset("BostonHousing", "mlbench") - bh$medv = NULL + ah = readRDS(system.file("extdata", "ames_housing.rds", package = "mlr3")) + ah$Sale_Price = NULL - task = tsk("boston_housing") - task$col_roles$feature = setdiff(names(bh), "cmedv") + task = tsk("ames_housing") + task$col_roles$feature = setdiff(names(ah), "Sale_Price") - expect_equal(task$feature_names, setdiff(names(bh), "cmedv")) - expect_equal(names(task$data(rows = 1)), c("cmedv", setdiff(names(bh), "cmedv"))) + expect_equal(task$feature_names, setdiff(names(ah), "Sale_Price")) + expect_equal(names(task$data(rows = 1)), c("Sale_Price", setdiff(names(ah), "Sale_Price"))) task$col_roles$feature = shuffle(task$col_roles$feature) - expect_equal(names(task$data(rows = 1)), c("cmedv", task$col_roles$feature)) + expect_equal(names(task$data(rows = 1)), c("Sale_Price", task$col_roles$feature)) }) test_that("Task duplicates rows", { diff --git a/tests/testthat/test_TaskRegr.R b/tests/testthat/test_TaskRegr.R index a71efb7de..137970a09 100644 --- a/tests/testthat/test_TaskRegr.R +++ b/tests/testthat/test_TaskRegr.R @@ -1,5 +1,5 @@ test_that("Basic ops on BostonHousing task", { - task = tsk("boston_housing") + task = tsk("ames_housing") expect_task(task) expect_task_supervised(task) expect_task_regr(task) diff --git a/tests/testthat/test_benchmark.R b/tests/testthat/test_benchmark.R index d4f712e89..1812331bb 100644 --- a/tests/testthat/test_benchmark.R +++ b/tests/testthat/test_benchmark.R @@ -161,7 +161,7 @@ test_that("predict_type is checked", { }) test_that("custom resampling (#245)", { - task_boston = tsk("boston_housing") + task_boston = tsk("ames_housing") lrn = lrn("regr.featureless") rdesc = rsmp("custom") @@ -329,7 +329,7 @@ test_that("disable cloning", { test_that("task and learner assertions", { grid = benchmark_grid( - tasks = tsks(c("iris", "boston_housing")), + tasks = tsks(c("iris", "ames_housing")), learners = lrn("classif.rpart"), resamplings = rsmp("holdout") ) diff --git a/tests/testthat/test_convert_task.R b/tests/testthat/test_convert_task.R index 411010b75..37e2fe876 100644 --- a/tests/testthat/test_convert_task.R +++ b/tests/testthat/test_convert_task.R @@ -1,12 +1,12 @@ test_that("convert_task - Regr -> Regr", { - task = mlr_tasks$get("boston_housing") - result = convert_task(task, target = "age", drop_original_target = TRUE) + task = tsk("ames_housing") + result = convert_task(task, target = "Mas_Vnr_Area", drop_original_target = TRUE) expect_class(result, "TaskRegr") expect_task(result) - expect_true(result$col_roles$target == "age") - expect_true(all(result$feature_names != "age")) - expect_true(all(result$feature_names != "cmedv")) + expect_true(result$col_roles$target == "Mas_Vnr_Area") + expect_true(all(result$feature_names != "Mas_Vnr_Area")) + expect_true(all(result$feature_names != "Sale_Price")) expect_true(all(unlist(imap(result$row_roles, .f = function(z, x) { all(result$row_roles[[x]] == task$row_roles[[x]]) @@ -19,14 +19,14 @@ test_that("convert_task - Regr -> Regr", { }) test_that("convert_task - Regr -> Classif", { - task = mlr_tasks$get("boston_housing") - result = convert_task(task, target = "chas", new_type = "classif", drop_original_target = TRUE) + task = tsk("ames_housing") + result = convert_task(task, target = "Alley", new_type = "classif", drop_original_target = TRUE) expect_class(result, "TaskClassif") expect_task(result) - expect_true(result$col_roles$target == "chas") - expect_true(all(result$feature_names != "chas")) - expect_true(all(result$feature_names != "cmedv")) + expect_true(result$col_roles$target == "Alley") + expect_true(all(result$feature_names != "Alley")) + expect_true(all(result$feature_names != "Sale_Price")) expect_true(all(unlist(imap(result$row_roles, .f = function(z, x) { all(result$row_roles[[x]] == task$row_roles[[x]]) @@ -39,7 +39,7 @@ test_that("convert_task - Regr -> Classif", { }) test_that("convert_task - Classif -> Regr", { - task = mlr_tasks$get("iris") + task = tsk("iris") result = convert_task(task, target = "Sepal.Width", new_type = "regr", drop_original_target = TRUE) expect_class(result, "TaskRegr") @@ -59,18 +59,18 @@ test_that("convert_task - Classif -> Regr", { }) test_that("convert_task - same target", { - task = tsk("boston_housing") - task$col_roles$feature = setdiff(task$col_roles$feature, "lat") + task = tsk("ames_housing") + task$col_roles$feature = setdiff(task$col_roles$feature, "Latitue") results = list( - convert_task(task, target = "cmedv", new_type = "regr", drop_original_target = TRUE), - convert_task(task, target = "cmedv", new_type = "regr", drop_original_target = FALSE) + convert_task(task, target = "Sale_Price", new_type = "regr", drop_original_target = TRUE), + convert_task(task, target = "Sale_Price", new_type = "regr", drop_original_target = FALSE) ) for (result in results) { expect_class(result, "TaskRegr") expect_task(result) - expect_true(result$col_roles$target == "cmedv") + expect_true(result$col_roles$target == "Sale_Price") expect_true(all(unlist(imap(result$row_roles, .f = function(z, x) { all(result$row_roles[[x]] == task$row_roles[[x]]) @@ -88,19 +88,19 @@ test_that("convert_task - same target", { }) test_that("convert task - general checks", { - btask = mlr_tasks$get("boston_housing") - itask = mlr_tasks$get("iris") + btask = tsk("ames_housing") + itask = tsk("iris") # target does not exist - expect_error(convert_task(btask, target = "cmedv2")) + expect_error(convert_task(btask, target = "Sale_Price2")) # target class does not match - expect_error(convert_task(btask, target = "cmedv", new_type = "classif")) - expect_error(convert_task(itask, target = "Sepal.Length", new_type = "classif")) + expect_error(convert_task(btask, target = "Sale_Price", new_type = "classif")) + expect_error(convert_task(itask, target = "Total_Bsmt_SF", new_type = "classif")) }) test_that("convert_task reconstructs task", { - task = mlr_tasks$get("iris") + task = tsk("iris") tsk = convert_task(task) tsk$man = "mlr3::mlr_tasks_iris" suppressWarnings(expect_equal(task, tsk, ignore_attr = TRUE)) diff --git a/tests/testthat/test_mlr_reflections.R b/tests/testthat/test_mlr_reflections.R index caec52f92..693b0640e 100644 --- a/tests/testthat/test_mlr_reflections.R +++ b/tests/testthat/test_mlr_reflections.R @@ -35,19 +35,19 @@ measure = msr("classif.ce") test_that("assertions work", { expect_learner(assert_learner(learner, task)) - expect_error(assert_learner(learner, tsk("boston_housing")), "must have task type") + expect_error(assert_learner(learner, tsk("ames_housing")), "must have task type") expect_null(assert_task_learner(task, learner)) - expect_error(assert_task_learner(tsk("boston_housing"), learner), "not match type") + expect_error(assert_task_learner(tsk("ames_housing"), learner), "not match type") expect_measure(assert_measure(measure, task, learner)) - expect_error(assert_measure(measure, tsk("boston_housing"), learner), "is not compatible") + expect_error(assert_measure(measure, tsk("ames_housing"), learner), "is not compatible") expect_error(assert_measure(measure, task, lrn("regr.rpart")), "is not compatible") at = learner class(at) = c("AutoTuner", "Learner", "R6") expect_learner(assert_learner(at, task)) - expect_error(assert_learner(at, tsk("boston_housing")), "must have task type") + expect_error(assert_learner(at, tsk("ames_housing")), "must have task type") expect_null(assert_task_learner(task, at)) - expect_error(assert_task_learner(tsk("boston_housing"), at)) + expect_error(assert_task_learner(tsk("ames_housing"), at)) }) test_that("train and predict works", { @@ -79,7 +79,7 @@ test_that("benchmark works", { expect_data_table(tab, nrows = 1L) expect_names(names(tab), type = "unique", identical.to = c("nr", "resample_result", "task_id", "learner_id", "resampling_id", "iters", "classif.ce")) - grid = benchmark_grid(list(task, tsk("mtcars"), tsk("boston_housing")), learner, rsmp("cv", folds = 3)) + grid = benchmark_grid(list(task, tsk("mtcars"), tsk("ames_housing")), learner, rsmp("cv", folds = 3)) expect_error(benchmark(grid), "Multiple task types detected") }) diff --git a/tests/testthat/test_predict.R b/tests/testthat/test_predict.R index 1594811c1..5ee97ae4d 100644 --- a/tests/testthat/test_predict.R +++ b/tests/testthat/test_predict.R @@ -50,8 +50,8 @@ test_that("missing predictions are handled gracefully / regr", { test_that("predict_newdata with weights (#519)", { - task = tsk("boston_housing") - task$set_col_roles("nox", "weight") + task = tsk("ames_housing") + task$set_col_roles("Bsmt_Half_Bath", "weight") learner = lrn("regr.featureless") learner$train(task) expect_prediction(learner$predict(task)) @@ -60,7 +60,7 @@ test_that("predict_newdata with weights (#519)", { expect_prediction(learner$predict_newdata(task$data())) # w weights - expect_prediction(learner$predict_newdata(task$data(cols = c(task$target_names, task$feature_names, "nox")))) + expect_prediction(learner$predict_newdata(task$data(cols = c(task$target_names, task$feature_names, "Bsmt_Half_Bath")))) }) test_that("parallel predict works", { @@ -78,3 +78,6 @@ test_that("parallel predict works", { expect_equal(as.data.table(p1), as.data.table(p2)) }) + + +