From 147c7063f50081cddf396b40ed5d2ab96a0175aa Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Fri, 7 Feb 2025 06:43:17 +0100 Subject: [PATCH 1/2] feat(learner): initialize shuffle to TRUE --- NEWS.md | 1 + R/paramset_torchlearner.R | 2 +- man-roxygen/paramset_torchlearner.R | 4 ++-- man/mlr_learners_torch.Rd | 4 ++-- man/mlr_pipeops_torch_model.Rd | 4 ++-- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index bafe4ad5..09962b16 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,7 @@ * feat: `PipeOpBlock` should no longer create ID clashes with other PipeOps in the graph (#260) * fix: `device` is no longer part of the `dataset` which allows for parallel dataloading on GPUs. +* feat: The `shuffle` parameter during model training is now initialized to `TRUE`. # mlr3torch 0.1.2 diff --git a/R/paramset_torchlearner.R b/R/paramset_torchlearner.R index 098743dd..0af5d74d 100644 --- a/R/paramset_torchlearner.R +++ b/R/paramset_torchlearner.R @@ -69,7 +69,7 @@ paramset_torchlearner = function(task_type) { min_delta = p_dbl(lower = 0, tags = c("train", "required"), init = 0), # dataloader parameters batch_size = p_int(tags = c("train", "predict", "required"), lower = 1L), - shuffle = p_lgl(tags = "train", default = FALSE), + shuffle = p_lgl(tags = "train", default = FALSE, init = TRUE), sampler = p_uty(tags = c("train", "predict")), batch_sampler = p_uty(tags = c("train", "predict")), num_workers = p_int(lower = 0, default = 0, tags = c("train", "predict")), diff --git a/man-roxygen/paramset_torchlearner.R b/man-roxygen/paramset_torchlearner.R index 20a0ec04..91073935 100644 --- a/man-roxygen/paramset_torchlearner.R +++ b/man-roxygen/paramset_torchlearner.R @@ -63,8 +63,8 @@ #' * `batch_size` :: `integer(1)`\cr #' The batch size (required). #' * `shuffle` :: `logical(1)`\cr -#' Whether to shuffle the instances in the dataset. Default is `FALSE`. -#' This does not impact validation. +#' Whether to shuffle the instances in the dataset. This is initialized to`FALSE`, +#' which differs from the default (`TRUE`). #' * `sampler` :: [`torch::sampler`]\cr #' Object that defines how the dataloader draw samples. #' * `batch_sampler` :: [`torch::sampler`]\cr diff --git a/man/mlr_learners_torch.Rd b/man/mlr_learners_torch.Rd index 303f0760..3ab7cd81 100644 --- a/man/mlr_learners_torch.Rd +++ b/man/mlr_learners_torch.Rd @@ -125,8 +125,8 @@ Is initialized to 0. \item \code{batch_size} :: \code{integer(1)}\cr The batch size (required). \item \code{shuffle} :: \code{logical(1)}\cr -Whether to shuffle the instances in the dataset. Default is \code{FALSE}. -This does not impact validation. +Whether to shuffle the instances in the dataset. This is initialized to\code{FALSE}, +which differs from the default (\code{TRUE}). \item \code{sampler} :: \code{\link[torch:sampler]{torch::sampler}}\cr Object that defines how the dataloader draw samples. \item \code{batch_sampler} :: \code{\link[torch:sampler]{torch::sampler}}\cr diff --git a/man/mlr_pipeops_torch_model.Rd b/man/mlr_pipeops_torch_model.Rd index 7b7438d8..65436179 100644 --- a/man/mlr_pipeops_torch_model.Rd +++ b/man/mlr_pipeops_torch_model.Rd @@ -80,8 +80,8 @@ Is initialized to 0. \item \code{batch_size} :: \code{integer(1)}\cr The batch size (required). \item \code{shuffle} :: \code{logical(1)}\cr -Whether to shuffle the instances in the dataset. Default is \code{FALSE}. -This does not impact validation. +Whether to shuffle the instances in the dataset. This is initialized to\code{FALSE}, +which differs from the default (\code{TRUE}). \item \code{sampler} :: \code{\link[torch:sampler]{torch::sampler}}\cr Object that defines how the dataloader draw samples. \item \code{batch_sampler} :: \code{\link[torch:sampler]{torch::sampler}}\cr From 1e3395ef0e302f568edba3e3f4fb250f438a40a2 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Fri, 7 Feb 2025 06:47:05 +0100 Subject: [PATCH 2/2] typo --- man-roxygen/paramset_torchlearner.R | 4 ++-- man/mlr_learners_torch.Rd | 4 ++-- man/mlr_pipeops_torch_model.Rd | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/man-roxygen/paramset_torchlearner.R b/man-roxygen/paramset_torchlearner.R index 91073935..46e62e5c 100644 --- a/man-roxygen/paramset_torchlearner.R +++ b/man-roxygen/paramset_torchlearner.R @@ -63,8 +63,8 @@ #' * `batch_size` :: `integer(1)`\cr #' The batch size (required). #' * `shuffle` :: `logical(1)`\cr -#' Whether to shuffle the instances in the dataset. This is initialized to`FALSE`, -#' which differs from the default (`TRUE`). +#' Whether to shuffle the instances in the dataset. This is initialized to `TRUE`, +#' which differs from the default (`FALSE`). #' * `sampler` :: [`torch::sampler`]\cr #' Object that defines how the dataloader draw samples. #' * `batch_sampler` :: [`torch::sampler`]\cr diff --git a/man/mlr_learners_torch.Rd b/man/mlr_learners_torch.Rd index 3ab7cd81..da3f05b6 100644 --- a/man/mlr_learners_torch.Rd +++ b/man/mlr_learners_torch.Rd @@ -125,8 +125,8 @@ Is initialized to 0. \item \code{batch_size} :: \code{integer(1)}\cr The batch size (required). \item \code{shuffle} :: \code{logical(1)}\cr -Whether to shuffle the instances in the dataset. This is initialized to\code{FALSE}, -which differs from the default (\code{TRUE}). +Whether to shuffle the instances in the dataset. This is initialized to \code{TRUE}, +which differs from the default (\code{FALSE}). \item \code{sampler} :: \code{\link[torch:sampler]{torch::sampler}}\cr Object that defines how the dataloader draw samples. \item \code{batch_sampler} :: \code{\link[torch:sampler]{torch::sampler}}\cr diff --git a/man/mlr_pipeops_torch_model.Rd b/man/mlr_pipeops_torch_model.Rd index 65436179..2e5875be 100644 --- a/man/mlr_pipeops_torch_model.Rd +++ b/man/mlr_pipeops_torch_model.Rd @@ -80,8 +80,8 @@ Is initialized to 0. \item \code{batch_size} :: \code{integer(1)}\cr The batch size (required). \item \code{shuffle} :: \code{logical(1)}\cr -Whether to shuffle the instances in the dataset. This is initialized to\code{FALSE}, -which differs from the default (\code{TRUE}). +Whether to shuffle the instances in the dataset. This is initialized to \code{TRUE}, +which differs from the default (\code{FALSE}). \item \code{sampler} :: \code{\link[torch:sampler]{torch::sampler}}\cr Object that defines how the dataloader draw samples. \item \code{batch_sampler} :: \code{\link[torch:sampler]{torch::sampler}}\cr