feat: add number of features to result (#94)

* feat: add number of features to result * chore: update news
mlr-org · Dec 16, 2023 · ca800e0 · ca800e0
1 parent 2824c69
commit ca800e0
Show file tree

Hide file tree

Showing 5 changed files with 14 additions and 11 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,6 @@
 # mlr3fselect (development version)
 
+* feat: Add number of features to `instance$result`.
 * feat: Add `ties_method` options `"least_features"` and `"random"` to `ArchiveFSelect$best()`.
 * refactor: Optimize runtime of `ArchiveFSelect$best()` method.
 * feat: Add importance scores to result of `FSelectorRFE`.

diff --git a/R/FSelectInstanceMultiCrit.R b/R/FSelectInstanceMultiCrit.R
@@ -99,6 +99,7 @@ FSelectInstanceMultiCrit = R6Class("FSelectInstanceMultiCrit",
         self$objective$task$feature_names[as.logical(x)]
       })
       xdt[, features := list(features)]
+      xdt[, n_features := map(features, length)]
       assert_data_table(xdt)
       assert_names(names(xdt), must.include = self$search_space$ids())
       assert_data_table(ydt)

diff --git a/R/FSelectInstanceSingleCrit.R b/R/FSelectInstanceSingleCrit.R
@@ -143,6 +143,7 @@ FSelectInstanceSingleCrit = R6Class("FSelectInstanceSingleCrit",
       feature_names = self$objective$task$feature_names
       features = list(feature_names[as.logical(xdt[, feature_names, with = FALSE])])
       xdt[, features := list(features)]
+      xdt[, n_features := length(features[[1L]])]
       assert_data_table(xdt, nrows = 1L)
       assert_names(names(xdt), must.include = self$search_space$ids())
       assert_number(y)

diff --git a/inst/testthat/helper_fselector.R b/inst/testthat/helper_fselector.R
@@ -16,7 +16,7 @@ test_fselector = function(.key, ..., term_evals = NULL, store_models = FALSE) {
   # result checks
   archive = inst$archive
   expect_data_table(inst$result, nrows = 1)
-  expect_names(names(inst$result), must.include = c("x1", "x2", "x3", "x4", "features", "dummy"))
+  expect_names(names(inst$result), must.include = c("x1", "x2", "x3", "x4", "features", "n_features", "dummy"))
   expect_subset(inst$result$features[[1]], c("x1", "x2", "x3", "x4"))
   expect_data_table(inst$result_x_search_space, nrows = 1, ncols = 4, types = "logical")
   expect_names(names(inst$result_x_search_space), identical.to = c("x1", "x2", "x3", "x4"))
@@ -41,7 +41,7 @@ test_fselector_2D = function(.key, ..., term_evals = NULL, store_models = FALSE)
   )
 
   # result checks
-  expect_names(names(inst$result), identical.to = c("x1", "x2", "x3", "x4", "features", "regr.rmse", "regr.mse"))
+  expect_names(names(inst$result), identical.to = c("x1", "x2", "x3", "x4", "features", "n_features", "regr.rmse", "regr.mse"))
   expect_subset(inst$result$features[[1]], c("x1", "x2", "x3", "x4"))
   expect_data_table(inst$result_x_search_space, types = "logical")
   expect_names(names(inst$result_x_search_space), identical.to = c("x1", "x2", "x3", "x4"))

diff --git a/tests/testthat/test_extract_inner_fselect_result.R b/tests/testthat/test_extract_inner_fselect_result.R
@@ -4,7 +4,7 @@ test_that("extract_inner_fselect_results function works with resample and cv", {
 
   irr = extract_inner_fselect_results(rr)
   expect_data_table(irr, nrows = 2)
-  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "task_id", "learner_id", "resampling_id"))
+  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "n_features", "task_id", "learner_id", "resampling_id"))
 })
 
 test_that("extract_inner_fselect_results function works with resample and repeated cv", {
@@ -13,7 +13,7 @@ test_that("extract_inner_fselect_results function works with resample and repeat
 
   irr = extract_inner_fselect_results(rr)
   expect_data_table(irr, nrows = 6)
-  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "task_id", "learner_id", "resampling_id"))
+  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "n_features", "task_id", "learner_id", "resampling_id"))
 })
 
 test_that("extract_inner_fselect_results function works with benchmark and cv", {
@@ -25,7 +25,7 @@ test_that("extract_inner_fselect_results function works with benchmark and cv",
 
   ibmr = extract_inner_fselect_results(bmr)
   expect_data_table(ibmr, nrows = 4)
-  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "task_id", "learner_id", "resampling_id"))
+  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "n_features", "task_id", "learner_id", "resampling_id"))
   expect_equal(unique(ibmr$experiment), c(1, 2))
 })
 
@@ -38,7 +38,7 @@ test_that("extract_inner_fselect_results function works with benchmark and repea
 
   ibmr = extract_inner_fselect_results(bmr)
   expect_data_table(ibmr, nrows = 12)
-  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "task_id", "learner_id", "resampling_id"))
+  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "n_features", "task_id", "learner_id", "resampling_id"))
   expect_equal(unique(ibmr$experiment), c(1, 2))
 })
 
@@ -51,7 +51,7 @@ test_that("extract_inner_fselect_results function works with multiple tasks", {
 
   ibmr = extract_inner_fselect_results(bmr)
   expect_data_table(ibmr, nrows = 8)
-  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", "features", "task_id", "learner_id", "resampling_id"))
+  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", "features", "n_features", "task_id", "learner_id", "resampling_id"))
   expect_equal(unique(ibmr$experiment), c(1, 2, 3, 4))
 })
 
@@ -92,7 +92,7 @@ test_that("extract_inner_fselect_results function works with mixed store instanc
   bmr = benchmark(grid, store_models = TRUE)
 
   ibmr = extract_inner_fselect_results(bmr)
-  expect_data_table(ibmr, nrows = 2, ncols = 11)
+  expect_data_table(ibmr, nrows = 2, ncols = 12)
   expect_equal(unique(ibmr$experiment), 2)
 })
 
@@ -105,7 +105,7 @@ test_that("extract_inner_fselect_results function works with learner and autotun
 
   ibmr = extract_inner_fselect_results(bmr)
   expect_data_table(ibmr, nrows = 2)
-  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "task_id", "learner_id", "resampling_id"))
+  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "n_features", "task_id", "learner_id", "resampling_id"))
   expect_equal(unique(ibmr$experiment), 1)
 })
 
@@ -114,7 +114,7 @@ test_that("extract_inner_fselect_results function works with resample and return
 
   irr = extract_inner_fselect_results(rr, fselect_instance = TRUE)
   expect_data_table(irr, nrows = 2)
-  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "fselect_instance", "task_id", "learner_id", "resampling_id"))
+  expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "n_features", "fselect_instance", "task_id", "learner_id", "resampling_id"))
 })
 
 test_that("extract_inner_fselect_results function works with benchmark and return of instance", {
@@ -126,6 +126,6 @@ test_that("extract_inner_fselect_results function works with benchmark and retur
 
   ibmr = extract_inner_fselect_results(bmr, fselect_instance = TRUE)
   expect_data_table(ibmr, nrows = 4)
-  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "fselect_instance", "task_id", "learner_id", "resampling_id"))
+  expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "features", "n_features", "fselect_instance", "task_id", "learner_id", "resampling_id"))
   expect_equal(unique(ibmr$experiment), c(1, 2))
 })