From 7ac4b6ca0b3f02a781d24deb85ab7b22d6a6751f Mon Sep 17 00:00:00 2001 From: be-marc Date: Tue, 11 Jun 2024 11:27:40 +0200 Subject: [PATCH] docs: update --- R/EnsembleFSResult.R | 39 ++++++++++++++------------- R/ensemble_fselect.R | 4 ++- man/ensemble_fs_result.Rd | 56 ++++++++++++++++++++------------------- man/ensemble_fselect.Rd | 4 ++- 4 files changed, 55 insertions(+), 48 deletions(-) diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R index e37250f7..54ad93d5 100644 --- a/R/EnsembleFSResult.R +++ b/R/EnsembleFSResult.R @@ -17,27 +17,27 @@ #' #' @examples #' \donttest{ -#' efsr = ensemble_fselect( -#' fselector = fs("rfe", n_features = 2, feature_fraction = 0.8), -#' task = tsk("sonar"), -#' learners = lrns(c("classif.rpart", "classif.featureless")), -#' init_resampling = rsmp("subsampling", repeats = 2), -#' inner_resampling = rsmp("cv", folds = 3), -#' measure = msr("classif.ce"), -#' terminator = trm("none") -#' ) +#' efsr = ensemble_fselect( +#' fselector = fs("rfe", n_features = 2, feature_fraction = 0.8), +#' task = tsk("sonar"), +#' learners = lrns(c("classif.rpart", "classif.featureless")), +#' init_resampling = rsmp("subsampling", repeats = 2), +#' inner_resampling = rsmp("cv", folds = 3), +#' measure = msr("classif.ce"), +#' terminator = trm("none") +#' ) #' -#' # contains the benchmark result -#' efsr$benchmark_result +#' # contains the benchmark result +#' efsr$benchmark_result #' -#' # contains the selected features for each iteration -#' efsr$result +#' # contains the selected features for each iteration +#' efsr$result #' -#' # returns the stability of the selected features -#' efsr$stability(stability_measure = "jaccard") +#' # returns the stability of the selected features +#' efsr$stability(stability_measure = "jaccard") #' -#' # returns a ranking of all features -#' head(efsr$feature_ranking()) +#' # returns a ranking of all features +#' head(efsr$feature_ranking()) #' } EnsembleFSResult = R6Class("EnsembleFSResult", public = list( @@ -149,6 +149,9 @@ EnsembleFSResult = R6Class("EnsembleFSResult", #' Whether to calculate the stability globally or for each learner. #' @param reset_cache (`logical(1)`)\cr #' If `TRUE`, the cached results are ignored. + #' + #' @return A `numeric()` value representing the stability of the selected features. + #' Or a `numeric()` vector with the stability of the selected features for each learner. stability = function(stability_measure = "jaccard", ..., global = TRUE, reset_cache = FALSE) { funs = stabm::listStabilityMeasures()$Name keys = tolower(gsub("stability", "", funs)) @@ -175,8 +178,6 @@ EnsembleFSResult = R6Class("EnsembleFSResult", private$.stability_learner[[stability_measure]] = set_names(tab$score, tab$learner_id) private$.stability_learner[[stability_measure]] } - - } ), diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R index 87942adf..afde7803 100644 --- a/R/ensemble_fselect.R +++ b/R/ensemble_fselect.R @@ -5,6 +5,7 @@ #' @description #' Ensemble feature selection using multiple learners. #' The ensemble feature selection method is designed to identify the most informative features from a given dataset by leveraging multiple machine learning models and resampling techniques. +#' Returns an [EnsembleFSResult]. #' #' @details #' The method begins by applying an initial resampling technique specified by the user, to create **multiple subsamples** from the original dataset. @@ -42,7 +43,7 @@ #' @export #' @examples #' \donttest{ -#' ensemble_fselect( +#' efsr = ensemble_fselect( #' fselector = fs("random_search"), #' task = tsk("sonar"), #' learners = lrns(c("classif.rpart", "classif.featureless")), @@ -51,6 +52,7 @@ #' measure = msr("classif.ce"), #' terminator = trm("evals", n_evals = 10) #' ) +#' efsr #' } ensemble_fselect = function( fselector, diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd index c394dda7..d5720cfb 100644 --- a/man/ensemble_fs_result.Rd +++ b/man/ensemble_fs_result.Rd @@ -25,27 +25,27 @@ Whether to add the learner, task and resampling information from the benchmark r \examples{ \donttest{ -efsr = ensemble_fselect( - fselector = fs("rfe", n_features = 2, feature_fraction = 0.8), - task = tsk("sonar"), - learners = lrns(c("classif.rpart", "classif.featureless")), - init_resampling = rsmp("subsampling", repeats = 2), - inner_resampling = rsmp("cv", folds = 3), - measure = msr("classif.ce"), - terminator = trm("none") -) + efsr = ensemble_fselect( + fselector = fs("rfe", n_features = 2, feature_fraction = 0.8), + task = tsk("sonar"), + learners = lrns(c("classif.rpart", "classif.featureless")), + init_resampling = rsmp("subsampling", repeats = 2), + inner_resampling = rsmp("cv", folds = 3), + measure = msr("classif.ce"), + terminator = trm("none") + ) -# contains the benchmark result -efsr$benchmark_result + # contains the benchmark result + efsr$benchmark_result -# contains the selected features for each iteration -efsr$result + # contains the selected features for each iteration + efsr$result -# returns the stability of the selected features -efsr$stability(stability_measure = "jaccard") + # returns the stability of the selected features + efsr$stability(stability_measure = "jaccard") -# returns a ranking of all features -head(efsr$feature_ranking()) + # returns a ranking of all features + head(efsr$feature_ranking()) } } \section{Public fields}{ @@ -168,19 +168,13 @@ The method to calculate the feature ranking.} \if{html}{\out{}} } \subsection{Details}{ -The feature ranking process is built on the following framework: models -act as voters, features act as candidates, and voters select certain -candidates (features). The primary objective is to compile these selections -into a consensus ranked list of features, effectively forming a committee. -Currently, only \code{"approval_voting"} method is supported, which selects the -candidates/features that have the highest approval score or selection -frequency, i.e. appear the most often. +The feature ranking process is built on the following framework: models act as voters, features act as candidates, and voters select certain candidates (features). +The primary objective is to compile these selections into a consensus ranked list of features, effectively forming a committee. +Currently, only \code{"approval_voting"} method is supported, which selects the candidates/features that have the highest approval score or selection frequency, i.e. appear the most often. } \subsection{Returns}{ -A \link[data.table:data.table]{data.table} listing all the features, -ordered by decreasing inclusion probability scores (depending on the -\code{method}) +A \link[data.table:data.table]{data.table::data.table} listing all the features, ordered by decreasing inclusion probability scores (depending on the \code{method}) } } \if{html}{\out{
}} @@ -194,6 +188,7 @@ When the same stability measure is requested again with different arguments, the \if{html}{\out{
}}\preformatted{EnsembleFSResult$stability( stability_measure = "jaccard", ..., + global = TRUE, reset_cache = FALSE )}\if{html}{\out{
}} } @@ -209,11 +204,18 @@ Default is \code{"jaccard"}.} \item{\code{...}}{(\code{any})\cr Additional arguments passed to the stability measure function.} +\item{\code{global}}{(\code{logical(1)})\cr +Whether to calculate the stability globally or for each learner.} + \item{\code{reset_cache}}{(\code{logical(1)})\cr If \code{TRUE}, the cached results are ignored.} } \if{html}{\out{}} } +\subsection{Returns}{ +A \code{numeric()} value representing the stability of the selected features. +Or a \code{numeric()} vector with the stability of the selected features for each learner. +} } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd index 72dd0ff7..6a8454f1 100644 --- a/man/ensemble_fselect.Rd +++ b/man/ensemble_fselect.Rd @@ -73,6 +73,7 @@ an \link{EnsembleFSResult} object. \description{ Ensemble feature selection using multiple learners. The ensemble feature selection method is designed to identify the most informative features from a given dataset by leveraging multiple machine learning models and resampling techniques. +Returns an \link{EnsembleFSResult}. } \details{ The method begins by applying an initial resampling technique specified by the user, to create \strong{multiple subsamples} from the original dataset. @@ -84,7 +85,7 @@ Results are stored in an \link{EnsembleFSResult}. } \examples{ \donttest{ - ensemble_fselect( + efsr = ensemble_fselect( fselector = fs("random_search"), task = tsk("sonar"), learners = lrns(c("classif.rpart", "classif.featureless")), @@ -93,5 +94,6 @@ Results are stored in an \link{EnsembleFSResult}. measure = msr("classif.ce"), terminator = trm("evals", n_evals = 10) ) + efsr } }