From a9329554f96a9dd7cd5bfb7ace234e3652de5442 Mon Sep 17 00:00:00 2001 From: Carson Zhang Date: Tue, 7 Jan 2025 10:30:36 -0600 Subject: [PATCH] passes local check --- R/TaskClassif_cifar.R | 8 +++-- R/TaskClassif_melanoma.R | 1 + attic/task_manual_construct-cifar.R | 52 ----------------------------- attic/try-CallbackSetUnfreeze.R | 37 -------------------- attic/try-Select.R | 27 --------------- data-raw/cifar.R | 32 ++++++++++-------- man/mlr_tasks_cifar.Rd | 4 +-- 7 files changed, 26 insertions(+), 135 deletions(-) delete mode 100644 attic/task_manual_construct-cifar.R delete mode 100644 attic/try-CallbackSetUnfreeze.R delete mode 100644 attic/try-Select.R diff --git a/R/TaskClassif_cifar.R b/R/TaskClassif_cifar.R index 246fba07..2a5639d9 100644 --- a/R/TaskClassif_cifar.R +++ b/R/TaskClassif_cifar.R @@ -6,8 +6,8 @@ #' @include aaa.R #' #' @description -#' The CIFAR-10 and CIFAR-100 subsets of the 80 million tiny images dataset. -#' The data is obtained from [`torchvision::cifar10_dataset()`] or [`torchvision::cifar100:dataset()`]. +#' The CIFAR-10 and CIFAR-100 subsets of the 80 million tiny images dataset. TODO: explain the subsets. explain the difference. +#' The data is obtained from [`torchvision::cifar10_dataset()`] (or `torchvision::cifar100_dataset()`). #' #' @section Construction: #' ``` @@ -22,7 +22,7 @@ #' #' @references #' `r format_bib("cifar2009")` -#' @examplesIf torch::torch_is_installed() +#' @examples #' task_cifar10 = tsk("cifar10") #' task_cifar100 = tsk("cifar100") #' print(task_cifar10) @@ -117,6 +117,8 @@ constructor_cifar100 = function(path) { labels = c(d_train$y, d_test$y) images = array(NA, dim = c(60000, 3, 32, 32)) + # original data has channel dimension at the end + perm_idx = c(1, 4, 2, 3) images[1:50000, , , ] = aperm(d_train$x, perm_idx, resize = TRUE) images[50001:60000, , , ] = aperm(d_test$x, perm_idx, resize = TRUE) diff --git a/R/TaskClassif_melanoma.R b/R/TaskClassif_melanoma.R index 3c4c5531..d3a62063 100644 --- a/R/TaskClassif_melanoma.R +++ b/R/TaskClassif_melanoma.R @@ -37,6 +37,7 @@ #' #' @references #' `r format_bib("melanoma2021")` +#' @examples #' task = tsk("melanoma") #' task NULL diff --git a/attic/task_manual_construct-cifar.R b/attic/task_manual_construct-cifar.R deleted file mode 100644 index 5e6a4dff..00000000 --- a/attic/task_manual_construct-cifar.R +++ /dev/null @@ -1,52 +0,0 @@ -devtools::load_all() -withr::local_options(mlr3torch.cache = TRUE) - -constructor_cifar10 = function(path) { - require_namespaces("torchvision") - - ds_train = torchvision::cifar10_dataset(root = file.path(path), download = TRUE, train = TRUE) - - browser() - - -} - -constructor_cifar10(path <- file.path(get_cache_dir(), "datasets", "cifar10")) - -devtools::load_all() -library(here) - -path <- here("cache") - -cifar_ds_train <- torchvision::cifar10_dataset(root = file.path(path), train = TRUE) -cifar_ds_test <- torchvision::cifar10_dataset(root = file.path(path), download = FALSE, train = TRUE) - -# path: the full path to the batch binary file -# i: the "global" index (1 to 60k) of the image -# so, the lazy version of this data needs to store - # file name - # offset -read_img_from_batch = function(path, i) { - img = array(dim = c(32, 32, 3)) - - con = file(path, open = "rb") - - on.exit({close(con)}, add = TRUE) - - label = readBin(con, integer(), size = 1, n = 1, endian = "big") - - r = as.integer(readBin(con, raw(), size = 1, n = 1024, endian = "big")) - g = as.integer(readBin(con, raw(), size = 1, n = 1024, endian = "big")) - b = as.integer(readBin(con, raw(), size = 1, n = 1024, endian = "big")) - - img[,,1] = matrix(r, ncol = 32, byrow = TRUE) - img[,,2] = matrix(g, ncol = 32, byrow = TRUE) - img[,,3] = matrix(b, ncol = 32, byrow = TRUE) - - list(img = img, label = label) -} - -# first: remove the response (handled separately) -dd = as_data_descriptor(cifar_ds_train, list(x = c(NA, 3, 32, 32))) - -img10 = read_img_from_batch(file.path(path, "cifar-10-batches-bin", "data_batch_2.bin"), 1) diff --git a/attic/try-CallbackSetUnfreeze.R b/attic/try-CallbackSetUnfreeze.R deleted file mode 100644 index 8fef25ab..00000000 --- a/attic/try-CallbackSetUnfreeze.R +++ /dev/null @@ -1,37 +0,0 @@ -devtools::load_all() - -task = tsk("iris") - -mlp = lrn("classif.mlp", - epochs = 10, batch_size = 150, neurons = c(100, 200, 300) -) - -# sela = selector_all() -# sela(mlp$network$modules) - -mlp$train(task) - -# do this for each element in the parameters list -mlp$model$network$modules[["9"]]$parameters[[1]]$requires_grad_(TRUE) -mlp$model$network$modules[["9"]]$parameters[[2]]$requires_grad_(TRUE) - - -# construct a NN as a graph -module_1 = nn_linear(in_features = 3, out_features = 4, bias = TRUE) -activation = nn_sigmoid() -module_2 = nn_linear(4, 3, bias = TRUE) -softmax = nn_softmax(2) - -po_module_1 = po("module_1", module = module_1) -po_activation = po("module", id = "activation", activation) -po_module_2 = po("module_2", module = module_2) -po_softmax = po("module", id = "softmax", module = softmax) - -module_graph = po_module_1 %>>% - po_activation %>>% - po_module_2 %>>% - po_softmax - -module_graph$plot(html = TRUE) - -module_graph \ No newline at end of file diff --git a/attic/try-Select.R b/attic/try-Select.R deleted file mode 100644 index 81ba380b..00000000 --- a/attic/try-Select.R +++ /dev/null @@ -1,27 +0,0 @@ - -n_epochs = 10 - -task = tsk("iris") - -mlp = lrn("classif.mlp", - epochs = 10, batch_size = 150, neurons = c(100, 200, 300) -) -mlp$train(task) - -names(mlp$network$parameters) - -sela = select_all() -sela(names(mlp$network$parameters)) - -selg = select_grep("weight") -selg(names(mlp$network$parameters)) - -seln = select_name("0.weight") -seln(names(mlp$network$parameters)) - -seli = select_invert(select_name("0.weight")) -seli(names(mlp$network$parameters)) - -seln = select_none() -seln(names(mlp$network$parameters)) - diff --git a/data-raw/cifar.R b/data-raw/cifar.R index 1bbdc104..f0d6517a 100644 --- a/data-raw/cifar.R +++ b/data-raw/cifar.R @@ -8,14 +8,16 @@ library(torchvision) constructor_cifar10 = function(path) { require_namespaces("torchvision") - tv_ds_train = torchvision::cifar10_dataset(root = path, train = TRUE, download = TRUE) - tv_data_train = tv_ds_train$.getitem(1:50000) + d_train = torchvision::cifar10_dataset(root = path, train = TRUE, download = TRUE) - tv_ds_test = torchvision::cifar10_dataset(root = path, train = FALSE, download = FALSE) - tv_data_test = tv_ds_test$.getitem(1:10000) + d_test = torchvision::cifar10_dataset(root = path, train = FALSE, download = FALSE) - labels = c(tv_data_train$y, tv_data_test$y) - images = array(c(tv_data_train$x, tv_data_test$x), dim = c(60000, 32, 32, 3)) + labels = c(d_train$y, d_test$y) + images = array(NA, dim = c(60000, 3, 32, 32)) + # original data has channel dimension at the end + perm_idx = c(1, 4, 2, 3) + images[1:50000, , , ] = aperm(d_train$x, perm_idx, resize = TRUE) + images[50001:60000, , , ] = aperm(d_test$x, perm_idx, resize = TRUE) class_names = readLines(file.path(path, "cifar-10-batches-bin", "batches.meta.txt")) class_names = class_names[class_names != ""] @@ -47,7 +49,7 @@ cifar10_ds_generator = torch::dataset( cifar10_ds = cifar10_ds_generator(data$images) -dd = as_data_descriptor(cifar10_ds, list(x = c(NA, 32, 32, 3))) +dd = as_data_descriptor(cifar10_ds, list(x = c(NA, 3, 32, 32))) lt = lazy_tensor(dd) tsk_dt = data.table( @@ -73,14 +75,16 @@ path = file.path(get_cache_dir(), "datasets", "cifar100", "raw") constructor_cifar100 = function(path) { require_namespaces("torchvision") - tv_ds_train = torchvision::cifar100_dataset(root = path, train = TRUE, download = TRUE) - tv_data_train = tv_ds_train$.getitem(1:50000) + d_train = torchvision::cifar100_dataset(root = path, train = TRUE, download = TRUE) - tv_ds_test = torchvision::cifar100_dataset(root = path, train = FALSE, download = FALSE) - tv_data_test = tv_ds_test$.getitem(1:10000) + d_test = torchvision::cifar100_dataset(root = path, train = FALSE, download = FALSE) - labels = c(tv_data_train$y, tv_data_test$y) - images = array(c(tv_data_train$x, tv_data_test$x), dim = c(60000, 32, 32, 3)) + labels = c(d_train$y, d_test$y) + images = array(NA, dim = c(60000, 3, 32, 32)) + # original data has channel dimension at the end + perm_idx = c(1, 4, 2, 3) + images[1:50000, , , ] = aperm(d_train$x, perm_idx, resize = TRUE) + images[50001:60000, , , ] = aperm(d_test$x, perm_idx, resize = TRUE) class_names = readLines(file.path(path, "cifar-100-binary", "fine_label_names.txt")) @@ -107,7 +111,7 @@ cifar100_ds_generator = torch::dataset( cifar100_ds = cifar100_ds_generator(data$images) -dd = as_data_descriptor(cifar100_ds, list(x = c(NA, 32, 32, 3))) +dd = as_data_descriptor(cifar100_ds, list(x = c(NA, 3, 32, 32))) lt = lazy_tensor(dd) dt = data.table( diff --git a/man/mlr_tasks_cifar.Rd b/man/mlr_tasks_cifar.Rd index a343bd76..224c3391 100644 --- a/man/mlr_tasks_cifar.Rd +++ b/man/mlr_tasks_cifar.Rd @@ -7,8 +7,8 @@ \link[R6:R6Class]{R6::R6Class} inheriting from \link[mlr3:TaskClassif]{mlr3::TaskClassif}. } \description{ -The CIFAR-10 and CIFAR-100 subsets of the 80 million tiny images dataset. -The data is obtained from \code{\link[torchvision:cifar10_dataset]{torchvision::cifar10_dataset()}} or \code{\link[torchvision:cifar100:dataset]{torchvision::cifar100:dataset()}}. +The CIFAR-10 and CIFAR-100 subsets of the 80 million tiny images dataset. TODO: explain the subsets. explain the difference. +The data is obtained from \code{\link[torchvision:cifar10_dataset]{torchvision::cifar10_dataset()}} (or \code{torchvision::cifar100_dataset()}). } \section{Construction}{