From 0e2f93f7e2ad505a326bbd4490d51d00531b2f36 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 18 Nov 2024 17:57:33 +0100 Subject: [PATCH] update tests --- tests/testthat/test_ensemble_fselect.R | 141 ++++++++++++------------- 1 file changed, 70 insertions(+), 71 deletions(-) diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R index d2806e7f..90e7fbef 100644 --- a/tests/testthat/test_ensemble_fselect.R +++ b/tests/testthat/test_ensemble_fselect.R @@ -1,4 +1,4 @@ -test_that("ensemble feature selection works", { +test_that("efs works", { task = tsk("sonar") with_seed(42, { efsr = ensemble_fselect( @@ -7,6 +7,7 @@ test_that("ensemble feature selection works", { learners = lrns(c("classif.rpart", "classif.featureless")), init_resampling = rsmp("subsampling", repeats = 2), inner_resampling = rsmp("cv", folds = 3), + inner_measure = msr("classif.ce"), measure = msr("classif.ce"), terminator = trm("evals", n_evals = 5) ) @@ -17,8 +18,10 @@ test_that("ensemble feature selection works", { expect_list(efsr$result$features, any.missing = FALSE, len = 4) expect_vector(efsr$result$n_features, size = 4) expect_vector(efsr$result$classif.ce, size = 4) + expect_vector(efsr$result$classif.ce_inner, size = 4) expect_benchmark_result(efsr$benchmark_result) expect_equal(efsr$measure, "classif.ce") + expect_true(efsr$minimize) # classification error expect_equal(efsr$n_learners, 2) expect_equal(efsr$n_resamples, 2) @@ -27,7 +30,7 @@ test_that("ensemble feature selection works", { expect_error(efsr$stability(stability_args = list(20)), "have names") stability = efsr$stability(stability_measure = "jaccard", global = FALSE) expect_numeric(stability, len = 2) - expect_equal(names(stability), c("classif.rpart", "classif.featureless")) + expect_equal(names(stability), c("classif.rpart.fselector", "classif.featureless.fselector")) # default feature ranking feature_ranking = efsr$feature_ranking() @@ -36,7 +39,7 @@ test_that("ensemble feature selection works", { # pareto_front pf = efsr$pareto_front() - expect_data_table(pf, nrows = 3) + expect_data_table(pf, nrows = 2) expect_equal(names(pf), c("n_features", "classif.ce")) pf_pred = efsr$pareto_front(type = "estimated") expect_data_table(pf_pred, nrows = max(efsr$result$n_features)) @@ -52,70 +55,33 @@ test_that("ensemble feature selection works", { # data.table conversion tab = as.data.table(efsr) - expect_equal(names(tab), c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce", "task", "learner", "resampling")) + expect_equal(names(tab), c("learner_id", "resampling_iteration", "classif.ce", + "features", "n_features", "classif.ce_inner", + "task", "learner", "resampling")) + # scores on train and test sets are different (even though same measure used) + assert_true(all(tab$classif.ce != tab$classif.ce_inner)) + + # change to use inner measure + expect_error(efsr$set_active_measure(measure_id = "XYZ"), regexp = "Must be element") + expect_error(efsr$set_active_measure(measure_id = "classif.ce_inner"), regexp = "is missing") + efsr$set_active_measure(measure_id = "classif.ce_inner", minimize = TRUE) + expect_equal(efsr$measure, "classif.ce_inner") + expect_true(efsr$minimize) # classification error + pf2 = efsr$pareto_front() + expect_data_table(pf2, nrows = 3) # pareto front has changed }) -test_that("ensemble feature selection works without benchmark result", { +test_that("efs works with rfe", { task = tsk("sonar") with_seed(42, { efsr = ensemble_fselect( - fselector = fs("random_search"), + fselector = fs("rfe", subset_sizes = c(60, 20, 10, 5)), task = task, learners = lrns(c("classif.rpart", "classif.featureless")), init_resampling = rsmp("subsampling", repeats = 2), inner_resampling = rsmp("cv", folds = 3), - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 3), - store_benchmark_result = FALSE - ) - }) - - expect_character(efsr$man) - expect_data_table(efsr$result, nrows = 4) - expect_list(efsr$result$features, any.missing = FALSE, len = 4) - expect_vector(efsr$result$n_features, size = 4) - expect_vector(efsr$result$classif.ce, size = 4) - expect_null(efsr$benchmark_result) - expect_equal(efsr$measure, "classif.ce") - expect_equal(efsr$n_learners, 2) - expect_equal(efsr$n_resamples, 2) - - # stability - expect_number(efsr$stability(stability_measure = "jaccard")) - stability = efsr$stability(stability_measure = "jaccard", global = FALSE) - expect_numeric(stability, len = 2) - expect_equal(names(stability), c("classif.rpart", "classif.featureless")) - - # default feature ranking - feature_ranking = efsr$feature_ranking() - expect_data_table(feature_ranking, nrows = length(task$feature_names)) - expect_equal(names(feature_ranking), c("feature", "score", "norm_score", "borda_score")) - - # pareto_front - pf = efsr$pareto_front() - expect_data_table(pf, nrows = 3) - expect_equal(names(pf), c("n_features", "classif.ce")) - - # knee_points - kps = efsr$knee_points() - expect_data_table(kps, nrows = 1) - expect_equal(names(kps), c("n_features", "classif.ce")) - - # data.table conversion - tab = as.data.table(efsr) - expect_equal(names(tab), c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce")) -}) - -test_that("ensemble feature selection works with rfe", { - task = tsk("sonar") - with_seed(42, { - efsr = ensemble_fselect( - fselector = fs("rfe", n_features = 2, feature_fraction = 0.8), - task = task, - learners = lrns(c("classif.rpart", "classif.featureless")), - init_resampling = rsmp("subsampling", repeats = 2), - inner_resampling = rsmp("cv", folds = 3), - measure = msr("classif.ce"), + inner_measure = msr("classif.ce"), + measure = msr("classif.acc"), terminator = trm("none") ) }) @@ -124,18 +90,25 @@ test_that("ensemble feature selection works with rfe", { expect_data_table(efsr$result, nrows = 4) expect_list(efsr$result$features, any.missing = FALSE, len = 4) expect_vector(efsr$result$n_features, size = 4) - expect_vector(efsr$result$classif.ce, size = 4) + expect_vector(efsr$result$classif.ce_inner, size = 4) + expect_vector(efsr$result$classif.acc, size = 4) expect_list(efsr$result$importance, any.missing = FALSE, len = 4) expect_benchmark_result(efsr$benchmark_result) - expect_equal(efsr$measure, "classif.ce") + expect_equal(efsr$measure, "classif.acc") + expect_false(efsr$minimize) # accuracy expect_equal(efsr$n_learners, 2) expect_equal(efsr$n_resamples, 2) + # change active measure + efsr$set_active_measure(measure_id = "classif.ce_inner", minimize = TRUE) + expect_equal(efsr$measure, "classif.ce_inner") + expect_true(efsr$minimize) # classification error + # stability expect_number(efsr$stability(stability_measure = "jaccard")) stability = efsr$stability(stability_measure = "jaccard", global = FALSE) expect_numeric(stability, len = 2) - expect_equal(names(stability), c("classif.rpart", "classif.featureless")) + expect_equal(names(stability), c("classif.rpart.fselector", "classif.featureless.fselector")) # default feature ranking feature_ranking = efsr$feature_ranking() @@ -145,25 +118,28 @@ test_that("ensemble feature selection works with rfe", { # pareto_front pf = efsr$pareto_front() expect_data_table(pf, nrows = 4) - expect_equal(names(pf), c("n_features", "classif.ce")) + expect_equal(names(pf), c("n_features", "classif.ce_inner")) pf_pred = efsr$pareto_front(type = "estimated") expect_data_table(pf_pred, nrows = max(efsr$result$n_features)) - expect_equal(names(pf_pred), c("n_features", "classif.ce")) + expect_equal(names(pf_pred), c("n_features", "classif.ce_inner")) # knee_points kps = efsr$knee_points(type = "estimated") expect_data_table(kps, nrows = 1) - expect_equal(names(kps), c("n_features", "classif.ce")) + expect_equal(names(kps), c("n_features", "classif.ce_inner")) # data.table conversion tab = as.data.table(efsr) - expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce", "importance", "task", "learner", "resampling")) + expect_equal(names(tab), c("learner_id", "resampling_iteration", "classif.acc", + "features", "n_features", "classif.ce_inner", + "importance", "task", "learner", "resampling")) }) test_that("EnsembleFSResult initialization", { result = data.table(a = 1, b = 3) expect_error(EnsembleFSResult$new(result = result, features = LETTERS, measure_id = "a"), "missing elements") + errors = c(0.13, 0.24, 0.16, 0.11, 0.25, 0.18, 0.15, 0.1, 0.16) result = data.table( resampling_iteration = c(1, 1, 1, 2, 2, 2, 3, 3, 3), learner_id = rep(c("classif.xgboost", "classif.rpart", "classif.ranger"), 3), @@ -178,7 +154,8 @@ test_that("EnsembleFSResult initialization", { c("V2"), c("V4", "V12"), c("V6", "V15", "V19", "V7")), - classif.ce = c(0.13, 0.24, 0.16, 0.11, 0.25, 0.18, 0.15, 0.1, 0.16) + classif.ce = errors, + classif.acc = 1 - errors ) # a feature set includes "V20" which is not included in input "features" @@ -189,27 +166,49 @@ test_that("EnsembleFSResult initialization", { expect_class(efsr, "EnsembleFSResult") expect_equal(efsr$n_learners, 3) expect_equal(efsr$n_resamples, 3) + expect_equal(efsr$measure, "classif.ce") + expect_true(efsr$minimize) tab = as.data.table(efsr) expect_data_table(tab) expect_equal(names(tab), c("resampling_iteration", "learner_id", "n_features", - "features", "classif.ce")) + "features", "classif.ce", "classif.acc")) + + # non-existent `inner_measure_id` + expect_error(EnsembleFSResult$new( + result = result, features = paste0("V", 1:20), measure_id = "classif.ce", inner_measure_id = "a" + ), "missing") + # `use_inner_measure = TRUE`, no `inner_measure_id` though + expect_error(EnsembleFSResult$new( + result = result, features = paste0("V", 1:20), use_inner_measure = TRUE, measure_id = "classif.ce" + ), "Must be of type") + # `use_inner_measure = TRUE`, and `inner_measure_id` is + efsr2 = EnsembleFSResult$new( + result = result, features = paste0("V", 1:20), use_inner_measure = TRUE, + measure_id = "classif.ce", inner_measure_id = "classif.acc", + minimize = FALSE # now this applies to the inner measure (accuracy) + ) + expect_class(efsr2, "EnsembleFSResult") + expect_equal(efsr2$n_learners, 3) + expect_equal(efsr2$n_resamples, 3) + expect_equal(efsr2$measure, "classif.acc") + expect_false(efsr2$minimize) }) test_that("different callbacks can be set", { callback_test = callback_batch_fselect("mlr3fselect.test", on_eval_before_archive = function(callback, context) { - context$aggregated_performance[, callback_active := context$instance$objective$learner$id == "classif.rpart"] + context$aggregated_performance[, callback_active := context$instance$objective$learner$id == "classif.rpart.fselector"] } ) efsr = ensemble_fselect( - # 4-5 evaluations on sonar - fselector = fs("rfe", n_features = 25, feature_fraction = 0.8), + fselector = fs("rfe", subset_sizes = c(60, 20, 10, 5)), task = tsk("sonar"), learners = lrns(c("classif.rpart", "classif.featureless")), init_resampling = rsmp("subsampling", repeats = 2), inner_resampling = rsmp("cv", folds = 3), - measure = msr("classif.ce"), + inner_measure = msr("classif.ce"), + measure = msr("classif.acc"), terminator = trm("none"), callbacks = list(list(callback_test), list()) )