From a50d0405189e5f85b93d2df91cd03b5b307acc01 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 31 May 2024 11:27:44 +0200 Subject: [PATCH] feat: add result object --- DESCRIPTION | 4 +- R/EnsembleFSResult.R | 75 +++++++++++++++ R/ensemble_fselect.R | 2 +- man/EnsembleFSResult.Rd | 123 +++++++++++++++++++++++++ tests/testthat/test_ensemble_fselect.R | 27 ++++-- 5 files changed, 223 insertions(+), 8 deletions(-) create mode 100644 R/EnsembleFSResult.R create mode 100644 man/EnsembleFSResult.Rd diff --git a/DESCRIPTION b/DESCRIPTION index bc854ec2..2559273d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,8 @@ Imports: lgr, mlr3misc (>= 0.15.0.9000), paradox (>= 1.0.0), - R6 + R6, + stabm Suggests: e1071, genalg, @@ -55,6 +56,7 @@ Collate: 'AutoFSelector.R' 'CallbackBatchFSelect.R' 'ContextBatchFSelect.R' + 'EnsembleFSResult.R' 'FSelectInstanceBatchSingleCrit.R' 'FSelectInstanceBatchMultiCrit.R' 'mlr_fselectors.R' diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R new file mode 100644 index 00000000..80794c0b --- /dev/null +++ b/R/EnsembleFSResult.R @@ -0,0 +1,75 @@ +#' @title Ensemble Feature Selection Result +#' +#' @description +#' The `EnsembleFSResult` stores the results of the ensemble feature selection. +#' The function [ensemble_fselect()] returns an object of this class. +#' +#' @examples +#' \donttest{ +#' efsr = ensemble_fselect( +#' fselector = fs("rfe", n_features = 2, feature_fraction = 0.8), +#' task = tsk("sonar"), +#' learners = lrns(c("classif.rpart", "classif.featureless")), +#' init_resampling = rsmp("subsampling", repeats = 2), +#' inner_resampling = rsmp("cv", folds = 3), +#' measure = msr("classif.ce"), +#' terminator = trm("none") +#' ) +#' +#' # contains the benchmark result +#' efsr$benchmark_result +#' +#' # contains the selected features for each iteration +#' efsr$grid +#' +#' # returns the stability of the selected features +#' efsr$stability(stability_measure = "jaccard") +#' } +EnsembleFSResult = R6Class("EnsembleFSResult", + public = list( + + #' @field benchmark_result (`BenchmarkResult`)\cr + #' The benchmark result object. + benchmark_result = NULL, + + #' @field grid (`data.table`)\cr + #' The grid of feature selection results. + grid = NULL, + + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + #' + #' @param benchmark_result (`BenchmarkResult`)\cr + #' The benchmark result object. + #' @param grid (`data.table`)\cr + #' The grid of feature selection results. + initialize = function(benchmark_result, grid) { + self$benchmark_result = assert_benchmark_result(benchmark_result) + self$grid = assert_data_table(grid) + }, + + #' @description + #' Returns the feature ranking. + feature_ranking = function() { + + }, + + #' @description + #' Calculates the stability of the selected features with the `stabm` package. + #' + #' @param stability_measure (`character(1)`)\cr + #' The stability measure to be used. + #' One of the measures returned by [stabm::listStabilityMeasures()] in lower case. + #' Default is `"jaccard"`. + #' @param ... (`any`)\cr + #' Additional arguments passed to the stability measure function. + stability = function(stability_measure = "jaccard", ...) { + funs = stabm::listStabilityMeasures()$Name + keys = tolower(gsub("stability", "", funs)) + assert_choice(stability_measure, choices = keys) + + fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm")) + fun(self$grid$features, ...) + } + ) +) diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R index 93b4c2ab..2244ce38 100644 --- a/R/ensemble_fselect.R +++ b/R/ensemble_fselect.R @@ -134,5 +134,5 @@ ensemble_fselect = function( set(grid, j = "importance", value = imp_scores) } - grid + EnsembleFSResult$new(bmr, grid) } diff --git a/man/EnsembleFSResult.Rd b/man/EnsembleFSResult.Rd new file mode 100644 index 00000000..6c679277 --- /dev/null +++ b/man/EnsembleFSResult.Rd @@ -0,0 +1,123 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/EnsembleFSResult.R +\name{EnsembleFSResult} +\alias{EnsembleFSResult} +\title{Ensemble Feature Selection Result} +\description{ +The \code{EnsembleFSResult} stores the results of the ensemble feature selection. +The function \code{\link[=ensemble_fselect]{ensemble_fselect()}} returns an object of this class. +} +\examples{ +\donttest{ +efsr = ensemble_fselect( + fselector = fs("rfe", n_features = 2, feature_fraction = 0.8), + task = tsk("sonar"), + learners = lrns(c("classif.rpart", "classif.featureless")), + init_resampling = rsmp("subsampling", repeats = 2), + inner_resampling = rsmp("cv", folds = 3), + measure = msr("classif.ce"), + terminator = trm("none") +) + +# contains the benchmark result +efsr$benchmark_result + +# contains the selected features for each iteration +efsr$grid + +# returns the stability of the selected features +efsr$stability(stability_measure = "jaccard") +} +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{benchmark_result}}{(\code{BenchmarkResult})\cr +The benchmark result object.} + +\item{\code{grid}}{(\code{data.table})\cr +The grid of feature selection results.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-EnsembleFSResult-new}{\code{EnsembleFSResult$new()}} +\item \href{#method-EnsembleFSResult-feature_ranking}{\code{EnsembleFSResult$feature_ranking()}} +\item \href{#method-EnsembleFSResult-stability}{\code{EnsembleFSResult$stability()}} +\item \href{#method-EnsembleFSResult-clone}{\code{EnsembleFSResult$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-EnsembleFSResult-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{EnsembleFSResult$new(benchmark_result, grid)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{benchmark_result}}{(\code{BenchmarkResult})\cr +The benchmark result object.} + +\item{\code{grid}}{(\code{data.table})\cr +The grid of feature selection results.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-EnsembleFSResult-feature_ranking}{}}} +\subsection{Method \code{feature_ranking()}}{ +Returns the feature ranking. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{EnsembleFSResult$feature_ranking()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-EnsembleFSResult-stability}{}}} +\subsection{Method \code{stability()}}{ +Calculates the stability of the selected features with the \code{stabm} package. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{EnsembleFSResult$stability(stability_measure = "jaccard", ...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{stability_measure}}{(\code{character(1)})\cr +The stability measure to be used. +One of the measures returned by \code{\link[stabm:listStabilityMeasures]{stabm::listStabilityMeasures()}} in lower case. +Default is \code{"jaccard"}.} + +\item{\code{...}}{(\code{any})\cr +Additional arguments passed to the stability measure function.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-EnsembleFSResult-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{EnsembleFSResult$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R index bebac790..39be1201 100644 --- a/tests/testthat/test_ensemble_fselect.R +++ b/tests/testthat/test_ensemble_fselect.R @@ -1,5 +1,5 @@ test_that("ensemble feature selection works", { - res = ensemble_fselect( + efsr = ensemble_fselect( fselector = fs("rfe", n_features = 2, feature_fraction = 0.8), task = tsk("sonar"), learners = lrns(c("classif.rpart", "classif.featureless")), @@ -9,11 +9,26 @@ test_that("ensemble feature selection works", { terminator = trm("none") ) - expect_data_table(res, nrows = 4) - expect_list(res$features, any.missing = FALSE, len = 4) - expect_vector(res$n_features, size = 4) - expect_vector(res$classif.ce, size = 4) - expect_list(res$importance, any.missing = FALSE, len = 4) + expect_data_table(efsr$grid, nrows = 4) + expect_list(efsr$grid$features, any.missing = FALSE, len = 4) + expect_vector(efsr$grid$n_features, size = 4) + expect_vector(efsr$grid$classif.ce, size = 4) + expect_list(efsr$grid$importance, any.missing = FALSE, len = 4) + expect_benchmark_result(efsr$benchmark_result) +}) + +test_that("stability method works", { + efsr = ensemble_fselect( + fselector = fs("rfe", n_features = 2, feature_fraction = 0.8), + task = tsk("sonar"), + learners = lrns(c("classif.rpart", "classif.featureless")), + init_resampling = rsmp("subsampling", repeats = 2), + inner_resampling = rsmp("cv", folds = 3), + measure = msr("classif.ce"), + terminator = trm("none") + ) + + expect_number(efsr$stability(stability_measure = "jaccard")) })