From a9329554f96a9dd7cd5bfb7ace234e3652de5442 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Tue, 7 Jan 2025 10:30:36 -0600
Subject: [PATCH] passes local check

---
 R/TaskClassif_cifar.R               |  8 +++--
 R/TaskClassif_melanoma.R            |  1 +
 attic/task_manual_construct-cifar.R | 52 -----------------------------
 attic/try-CallbackSetUnfreeze.R     | 37 --------------------
 attic/try-Select.R                  | 27 ---------------
 data-raw/cifar.R                    | 32 ++++++++++--------
 man/mlr_tasks_cifar.Rd              |  4 +--
 7 files changed, 26 insertions(+), 135 deletions(-)
 delete mode 100644 attic/task_manual_construct-cifar.R
 delete mode 100644 attic/try-CallbackSetUnfreeze.R
 delete mode 100644 attic/try-Select.R

diff --git a/R/TaskClassif_cifar.R b/R/TaskClassif_cifar.R
index 246fba07..2a5639d9 100644
--- a/R/TaskClassif_cifar.R
+++ b/R/TaskClassif_cifar.R
@@ -6,8 +6,8 @@
 #' @include aaa.R
 #'
 #' @description
-#' The CIFAR-10 and CIFAR-100 subsets of the 80 million tiny images dataset.
-#' The data is obtained from [`torchvision::cifar10_dataset()`]  or [`torchvision::cifar100:dataset()`].
+#' The CIFAR-10 and CIFAR-100 subsets of the 80 million tiny images dataset. TODO: explain the subsets. explain the difference.
+#' The data is obtained from [`torchvision::cifar10_dataset()`] (or `torchvision::cifar100_dataset()`).
 #'
 #' @section Construction:
 #' ```
@@ -22,7 +22,7 @@
 #'
 #' @references
 #' `r format_bib("cifar2009")`
-#' @examplesIf torch::torch_is_installed()
+#' @examples
 #' task_cifar10 = tsk("cifar10")
 #' task_cifar100 = tsk("cifar100")
 #' print(task_cifar10)
@@ -117,6 +117,8 @@ constructor_cifar100 = function(path) {
 
   labels = c(d_train$y, d_test$y)
   images = array(NA, dim = c(60000, 3, 32, 32))
+  # original data has channel dimension at the end
+  perm_idx = c(1, 4, 2, 3)
   images[1:50000, , , ] = aperm(d_train$x, perm_idx, resize = TRUE)
   images[50001:60000, , , ] = aperm(d_test$x, perm_idx, resize = TRUE)
 
diff --git a/R/TaskClassif_melanoma.R b/R/TaskClassif_melanoma.R
index 3c4c5531..d3a62063 100644
--- a/R/TaskClassif_melanoma.R
+++ b/R/TaskClassif_melanoma.R
@@ -37,6 +37,7 @@
 #'
 #' @references
 #' `r format_bib("melanoma2021")`
+#' @examples
 #' task = tsk("melanoma")
 #' task
 NULL
diff --git a/attic/task_manual_construct-cifar.R b/attic/task_manual_construct-cifar.R
deleted file mode 100644
index 5e6a4dff..00000000
--- a/attic/task_manual_construct-cifar.R
+++ /dev/null
@@ -1,52 +0,0 @@
-devtools::load_all()
-withr::local_options(mlr3torch.cache = TRUE)
-
-constructor_cifar10 = function(path) {
-    require_namespaces("torchvision")
-
-    ds_train = torchvision::cifar10_dataset(root = file.path(path), download = TRUE, train = TRUE)
-
-    browser()
-
-
-}
-
-constructor_cifar10(path <- file.path(get_cache_dir(), "datasets", "cifar10"))
-
-devtools::load_all()
-library(here)
-
-path <- here("cache")
-
-cifar_ds_train <- torchvision::cifar10_dataset(root = file.path(path), train = TRUE)
-cifar_ds_test <- torchvision::cifar10_dataset(root = file.path(path), download = FALSE, train = TRUE)
-
-# path: the full path to the batch binary file
-# i: the "global" index (1 to 60k) of the image
-# so, the lazy version of this data needs to store
-  # file name
-  # offset
-read_img_from_batch = function(path, i) {
-  img = array(dim = c(32, 32, 3))
-  
-  con = file(path, open = "rb")
-
-  on.exit({close(con)}, add = TRUE)
-
-  label = readBin(con, integer(), size = 1, n = 1, endian = "big")
-
-  r = as.integer(readBin(con, raw(), size = 1, n = 1024, endian = "big"))
-  g = as.integer(readBin(con, raw(), size = 1, n = 1024, endian = "big"))
-  b = as.integer(readBin(con, raw(), size = 1, n = 1024, endian = "big"))
-
-  img[,,1] = matrix(r, ncol = 32, byrow = TRUE)
-  img[,,2] = matrix(g, ncol = 32, byrow = TRUE)
-  img[,,3] = matrix(b, ncol = 32, byrow = TRUE)
-
-  list(img = img, label = label)
-}
-
-# first: remove the response (handled separately)
-dd = as_data_descriptor(cifar_ds_train, list(x = c(NA, 3, 32, 32)))
-
-img10 = read_img_from_batch(file.path(path, "cifar-10-batches-bin", "data_batch_2.bin"), 1)
diff --git a/attic/try-CallbackSetUnfreeze.R b/attic/try-CallbackSetUnfreeze.R
deleted file mode 100644
index 8fef25ab..00000000
--- a/attic/try-CallbackSetUnfreeze.R
+++ /dev/null
@@ -1,37 +0,0 @@
-devtools::load_all()
-
-task = tsk("iris")
-
-mlp = lrn("classif.mlp",
-          epochs = 10, batch_size = 150, neurons = c(100, 200, 300)
-)
-
-# sela = selector_all()
-# sela(mlp$network$modules)
-
-mlp$train(task)
-
-# do this for each element in the parameters list
-mlp$model$network$modules[["9"]]$parameters[[1]]$requires_grad_(TRUE)
-mlp$model$network$modules[["9"]]$parameters[[2]]$requires_grad_(TRUE)
-
-
-# construct a NN as a graph
-module_1 = nn_linear(in_features = 3, out_features = 4, bias = TRUE)
-activation = nn_sigmoid()
-module_2 = nn_linear(4, 3, bias = TRUE)
-softmax = nn_softmax(2)
-
-po_module_1 = po("module_1", module = module_1)
-po_activation = po("module", id = "activation", activation)
-po_module_2 = po("module_2", module = module_2)
-po_softmax = po("module", id = "softmax", module = softmax)
-
-module_graph = po_module_1 %>>%
-  po_activation %>>%
-  po_module_2 %>>%
-  po_softmax
-
-module_graph$plot(html = TRUE)
-
-module_graph
\ No newline at end of file
diff --git a/attic/try-Select.R b/attic/try-Select.R
deleted file mode 100644
index 81ba380b..00000000
--- a/attic/try-Select.R
+++ /dev/null
@@ -1,27 +0,0 @@
-
-n_epochs = 10
-
-task = tsk("iris")
-
-mlp = lrn("classif.mlp",
-          epochs = 10, batch_size = 150, neurons = c(100, 200, 300)
-)
-mlp$train(task)
-
-names(mlp$network$parameters)
-
-sela = select_all()
-sela(names(mlp$network$parameters))
-
-selg = select_grep("weight")
-selg(names(mlp$network$parameters))
-
-seln = select_name("0.weight")
-seln(names(mlp$network$parameters))
-
-seli = select_invert(select_name("0.weight"))
-seli(names(mlp$network$parameters))
-
-seln = select_none()
-seln(names(mlp$network$parameters))
-
diff --git a/data-raw/cifar.R b/data-raw/cifar.R
index 1bbdc104..f0d6517a 100644
--- a/data-raw/cifar.R
+++ b/data-raw/cifar.R
@@ -8,14 +8,16 @@ library(torchvision)
 constructor_cifar10 = function(path) {
   require_namespaces("torchvision")
 
-  tv_ds_train = torchvision::cifar10_dataset(root = path, train = TRUE, download = TRUE)
-  tv_data_train = tv_ds_train$.getitem(1:50000)
+  d_train = torchvision::cifar10_dataset(root = path, train = TRUE, download = TRUE)
 
-  tv_ds_test = torchvision::cifar10_dataset(root = path, train = FALSE, download = FALSE)
-  tv_data_test = tv_ds_test$.getitem(1:10000)
+  d_test = torchvision::cifar10_dataset(root = path, train = FALSE, download = FALSE)
 
-  labels = c(tv_data_train$y, tv_data_test$y)
-  images = array(c(tv_data_train$x, tv_data_test$x), dim = c(60000, 32, 32, 3))
+  labels = c(d_train$y, d_test$y)
+  images = array(NA, dim = c(60000, 3, 32, 32))
+  # original data has channel dimension at the end
+  perm_idx = c(1, 4, 2, 3)
+  images[1:50000, , , ] = aperm(d_train$x, perm_idx, resize = TRUE)
+  images[50001:60000, , , ] = aperm(d_test$x, perm_idx, resize = TRUE)
 
   class_names = readLines(file.path(path, "cifar-10-batches-bin", "batches.meta.txt"))
   class_names = class_names[class_names != ""]
@@ -47,7 +49,7 @@ cifar10_ds_generator = torch::dataset(
 
 cifar10_ds = cifar10_ds_generator(data$images)
 
-dd = as_data_descriptor(cifar10_ds, list(x = c(NA, 32, 32, 3)))
+dd = as_data_descriptor(cifar10_ds, list(x = c(NA, 3, 32, 32)))
 lt = lazy_tensor(dd)
 
 tsk_dt = data.table(
@@ -73,14 +75,16 @@ path = file.path(get_cache_dir(), "datasets", "cifar100", "raw")
 constructor_cifar100 = function(path) {
   require_namespaces("torchvision")
 
-  tv_ds_train = torchvision::cifar100_dataset(root = path, train = TRUE, download = TRUE)
-  tv_data_train = tv_ds_train$.getitem(1:50000)
+  d_train = torchvision::cifar100_dataset(root = path, train = TRUE, download = TRUE)
 
-  tv_ds_test = torchvision::cifar100_dataset(root = path, train = FALSE, download = FALSE)
-  tv_data_test = tv_ds_test$.getitem(1:10000)
+  d_test = torchvision::cifar100_dataset(root = path, train = FALSE, download = FALSE)
 
-  labels = c(tv_data_train$y, tv_data_test$y)
-  images = array(c(tv_data_train$x, tv_data_test$x), dim = c(60000, 32, 32, 3))
+  labels = c(d_train$y, d_test$y)
+  images = array(NA, dim = c(60000, 3, 32, 32))
+  # original data has channel dimension at the end
+  perm_idx = c(1, 4, 2, 3)
+  images[1:50000, , , ] = aperm(d_train$x, perm_idx, resize = TRUE)
+  images[50001:60000, , , ] = aperm(d_test$x, perm_idx, resize = TRUE)
 
   class_names = readLines(file.path(path, "cifar-100-binary", "fine_label_names.txt"))
 
@@ -107,7 +111,7 @@ cifar100_ds_generator = torch::dataset(
 
 cifar100_ds = cifar100_ds_generator(data$images)
 
-dd = as_data_descriptor(cifar100_ds, list(x = c(NA, 32, 32, 3)))
+dd = as_data_descriptor(cifar100_ds, list(x = c(NA, 3, 32, 32)))
 lt = lazy_tensor(dd)
 
 dt = data.table(
diff --git a/man/mlr_tasks_cifar.Rd b/man/mlr_tasks_cifar.Rd
index a343bd76..224c3391 100644
--- a/man/mlr_tasks_cifar.Rd
+++ b/man/mlr_tasks_cifar.Rd
@@ -7,8 +7,8 @@
 \link[R6:R6Class]{R6::R6Class} inheriting from \link[mlr3:TaskClassif]{mlr3::TaskClassif}.
 }
 \description{
-The CIFAR-10 and CIFAR-100 subsets of the 80 million tiny images dataset.
-The data is obtained from \code{\link[torchvision:cifar10_dataset]{torchvision::cifar10_dataset()}}  or \code{\link[torchvision:cifar100:dataset]{torchvision::cifar100:dataset()}}.
+The CIFAR-10 and CIFAR-100 subsets of the 80 million tiny images dataset. TODO: explain the subsets. explain the difference.
+The data is obtained from \code{\link[torchvision:cifar10_dataset]{torchvision::cifar10_dataset()}} (or \code{torchvision::cifar100_dataset()}).
 }
 \section{Construction}{