starting debug

aclai-lab · Mar 4, 2025 · 92e47cf · 92e47cf
1 parent 256a803
commit 92e47cf
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 25 deletions.
diff --git a/src/dataset/prepare_dataset.jl b/src/dataset/prepare_dataset.jl
@@ -414,8 +414,6 @@ function feature_selection_preprocess(
         isnothing(relative_overlap) ? base_params : merge(base_params, (relative_overlap = relative_overlap,))
     end
 
-    @show winparams
-
     total_features = length(features) * length(vnames) * nwindows
     Xinfo = Vector{InfoFeat}(undef, total_features)
     idx = 1

diff --git a/src/filters/univariate/identityfilter.jl b/src/filters/univariate/identityfilter.jl
@@ -21,9 +21,24 @@ function score(
     return score(X, selector)
 end
 
+function score(
+    X::AbstractMatrix,
+    y::AbstractVector{<:Union{String, Symbol}},
+    selector::IdentityFilter
+)
+    return score(X, selector)
+end
+
 function score(
     X::AbstractDataFrame,
     selector::IdentityFilter
 )
     return fill(1.0, ncol(X))
+end
+
+function score(
+    X::AbstractMatrix,
+    selector::IdentityFilter
+)
+    return fill(1.0, size(X, 2))
 end
diff --git a/test/benchmarks/01_FS_Base.jl b/test/benchmarks/01_FS_Base.jl
@@ -228,7 +228,7 @@ function _fsgroup(
     group_before_score::Union{Val{true},Val{false}} = Val(true),
 )::Tuple{Vector{Int},Vector{Vector{Int}},Vector{<:Real},Vector{Vector{<:Real}}}
     g_indices = group_indices_by_column_names(X, aggrby; groups_separator = groups_separator)
-@show g_indices
+
     scores = []
     groups_score = Vector(undef, length(g_indices))
     if group_before_score isa Val{true}
@@ -553,9 +553,6 @@ function feature_selection(
                 # find indices to re-sort the scores of all variables to their
                 #    original position in dataset columns
                 old_sort = sortperm(vcat(g_indices...))
-                @show vcat(vcat(grouped_variable_scores...)[old_sort]...)
-                @show vcat(g_indices[sel_g_indices]...)
-                @show g_indices
                 vcat(vcat(grouped_variable_scores...)[old_sort]...), vcat(g_indices[sel_g_indices]...), g_indices
             end
 

diff --git a/test/benchmarks/02_FS_noPython.jl b/test/benchmarks/02_FS_noPython.jl
@@ -130,7 +130,6 @@ function group_names(
 
     # get unique group names
     ixs = sort([aggrby...])
-    @show unique([sn[ixs] for sn in splitted_names])
     return unique([sn[ixs] for sn in splitted_names])
 end
 function group_names(X::AbstractDataFrame, args...; kwargs...)
@@ -172,7 +171,6 @@ function group_indices_by_column_names(
     groups_separator::AbstractString = _SEPARATOR
 )::Vector{Vector{Int}}
     g_names = group_names(Xnames, aggrby; groups_separator = groups_separator)
-@show g_names
     ixs = sort([aggrby...])
     res = [findall(Xname -> _is_part_of_the_group(cur_g_name, Xname, ixs; groups_separator = groups_separator), Xnames)
             for cur_g_name in g_names]
@@ -225,13 +223,11 @@ function _fsgroup(
     group_before_score::Union{Val{true},Val{false}} = Val(true),
 )::Tuple{Vector{Int},Vector{Vector{Int}},Vector{<:Real},Vector{Vector{<:Real}}}
     g_indices = group_indices_by_column_names(X, aggrby; groups_separator = groups_separator)
-@show g_indices
     scores = []
     groups_score = Vector(undef, length(g_indices))
     if group_before_score isa Val{true}
         # === group and then evaluate score internally to each group ===
         for (i, cur_g_indices) in enumerate(g_indices)
-            @show cur_g_indices
             s = isnothing(y) || SoleFeatures.is_unsupervised(selector) ?
                 SoleFeatures.score(X[:,cur_g_indices], selector) :
                 SoleFeatures.score(X[:,cur_g_indices], y, selector)
@@ -555,7 +551,7 @@ function feature_selection(
                 old_sort = sortperm(vcat(g_indices...))
                 vcat(vcat(grouped_variable_scores...)[old_sort]...), vcat(g_indices[sel_g_indices]...), g_indices
             end
-@show idxes
+
         sort!(idxes)
 
         push!(fs_mid_results, (

diff --git a/test/benchmarks/03_FS_newStruct.jl b/test/benchmarks/03_FS_newStruct.jl
@@ -654,7 +654,7 @@ function feature_selection(
                 # find indices to re-sort the scores of all variables to their
                 # original position in dataset columns
                 old_sort = sortperm(vcat(g_indices...))
-                
+
                 vcat(g_indices[sel_g_indices]...), vcat(vcat(grouped_variable_scores...)[old_sort]...), g_indices
             end
 
@@ -672,18 +672,16 @@ function feature_selection(
 
     dataset_col_slice = 1:size(X, 2)
     for i in 1:length(fs_mid_results)
-        # @show fs_mid_results[i].indices
         dataset_col_slice = dataset_col_slice[fs_mid_results[i].indices]
     end
 
-    # if isa(return_mid_results, Val{true})
+    if isa(return_mid_results, Val{true})
 
-    #     return X[:,dataset_col_slice], (extraction_column_names = extraction_column_names, fs_mid_results = fs_mid_results)
+        return X[:,dataset_col_slice], (extraction_column_names = Xinfo[dataset_col_slice], fs_mid_results = fs_mid_results)
 
-    # else
-        # return X[:,dataset_col_slice]
-    # end
-    return X
+    else
+        return X[:,dataset_col_slice]
+    end
 end
 
 """
@@ -937,16 +935,21 @@ fs_methods = [
 		selector = SoleFeatures.MutualInformationClassif(SoleFeatures.IdentityLimiter()),
 		limiter = SoleFeatures.PercentageLimiter(0.01),
 	),
-	# ( # STEP 3: group results by variable
-	# 	selector = SoleFeatures.IdentityFilter(),
-	# 	limiter = SoleFeatures.IdentityLimiter(),
-	# ),
+	( # STEP 3: group results by variable
+		selector = SoleFeatures.IdentityFilter(),
+		limiter = SoleFeatures.IdentityLimiter(),
+	),
 ]
 
 # prepare dataset for feature selection
 Xdf, Xinfo = @test_nowarn SoleFeatures.feature_selection_preprocess(df; features=ms, type=SoleFeatures.adaptivewindow, nwindows=6, relative_overlap=0.05)
 
 @info "FEATURE SELECTION"
 
-Xm = Matrix(Xdf)
-a=feature_selection(Xm, y, Xinfo, fs_methods = fs_methods, norm = false)
+using BenchmarkTools
+@btime begin
+    Xm = Matrix(Xdf)
+    feature_selection(Xm, y, Xinfo, fs_methods = fs_methods, norm = false)
+end
+
+# 3.212 ms (52923 allocations: 4.37 MiB)