Skip to content

Commit

Permalink
starting debug
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed Mar 4, 2025
1 parent 256a803 commit 92e47cf
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 25 deletions.
2 changes: 0 additions & 2 deletions src/dataset/prepare_dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,6 @@ function feature_selection_preprocess(
isnothing(relative_overlap) ? base_params : merge(base_params, (relative_overlap = relative_overlap,))
end

@show winparams

total_features = length(features) * length(vnames) * nwindows
Xinfo = Vector{InfoFeat}(undef, total_features)
idx = 1
Expand Down
15 changes: 15 additions & 0 deletions src/filters/univariate/identityfilter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,24 @@ function score(
return score(X, selector)
end

function score(
X::AbstractMatrix,
y::AbstractVector{<:Union{String, Symbol}},
selector::IdentityFilter
)
return score(X, selector)
end

function score(
X::AbstractDataFrame,
selector::IdentityFilter
)
return fill(1.0, ncol(X))
end

function score(
X::AbstractMatrix,
selector::IdentityFilter
)
return fill(1.0, size(X, 2))
end
5 changes: 1 addition & 4 deletions test/benchmarks/01_FS_Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ function _fsgroup(
group_before_score::Union{Val{true},Val{false}} = Val(true),
)::Tuple{Vector{Int},Vector{Vector{Int}},Vector{<:Real},Vector{Vector{<:Real}}}
g_indices = group_indices_by_column_names(X, aggrby; groups_separator = groups_separator)
@show g_indices

scores = []
groups_score = Vector(undef, length(g_indices))
if group_before_score isa Val{true}
Expand Down Expand Up @@ -553,9 +553,6 @@ function feature_selection(
# find indices to re-sort the scores of all variables to their
# original position in dataset columns
old_sort = sortperm(vcat(g_indices...))
@show vcat(vcat(grouped_variable_scores...)[old_sort]...)
@show vcat(g_indices[sel_g_indices]...)
@show g_indices
vcat(vcat(grouped_variable_scores...)[old_sort]...), vcat(g_indices[sel_g_indices]...), g_indices
end

Expand Down
6 changes: 1 addition & 5 deletions test/benchmarks/02_FS_noPython.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ function group_names(

# get unique group names
ixs = sort([aggrby...])
@show unique([sn[ixs] for sn in splitted_names])
return unique([sn[ixs] for sn in splitted_names])
end
function group_names(X::AbstractDataFrame, args...; kwargs...)
Expand Down Expand Up @@ -172,7 +171,6 @@ function group_indices_by_column_names(
groups_separator::AbstractString = _SEPARATOR
)::Vector{Vector{Int}}
g_names = group_names(Xnames, aggrby; groups_separator = groups_separator)
@show g_names
ixs = sort([aggrby...])
res = [findall(Xname -> _is_part_of_the_group(cur_g_name, Xname, ixs; groups_separator = groups_separator), Xnames)
for cur_g_name in g_names]
Expand Down Expand Up @@ -225,13 +223,11 @@ function _fsgroup(
group_before_score::Union{Val{true},Val{false}} = Val(true),
)::Tuple{Vector{Int},Vector{Vector{Int}},Vector{<:Real},Vector{Vector{<:Real}}}
g_indices = group_indices_by_column_names(X, aggrby; groups_separator = groups_separator)
@show g_indices
scores = []
groups_score = Vector(undef, length(g_indices))
if group_before_score isa Val{true}
# === group and then evaluate score internally to each group ===
for (i, cur_g_indices) in enumerate(g_indices)
@show cur_g_indices
s = isnothing(y) || SoleFeatures.is_unsupervised(selector) ?
SoleFeatures.score(X[:,cur_g_indices], selector) :
SoleFeatures.score(X[:,cur_g_indices], y, selector)
Expand Down Expand Up @@ -555,7 +551,7 @@ function feature_selection(
old_sort = sortperm(vcat(g_indices...))
vcat(vcat(grouped_variable_scores...)[old_sort]...), vcat(g_indices[sel_g_indices]...), g_indices
end
@show idxes

sort!(idxes)

push!(fs_mid_results, (
Expand Down
31 changes: 17 additions & 14 deletions test/benchmarks/03_FS_newStruct.jl
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ function feature_selection(
# find indices to re-sort the scores of all variables to their
# original position in dataset columns
old_sort = sortperm(vcat(g_indices...))

vcat(g_indices[sel_g_indices]...), vcat(vcat(grouped_variable_scores...)[old_sort]...), g_indices
end

Expand All @@ -672,18 +672,16 @@ function feature_selection(

dataset_col_slice = 1:size(X, 2)
for i in 1:length(fs_mid_results)
# @show fs_mid_results[i].indices
dataset_col_slice = dataset_col_slice[fs_mid_results[i].indices]
end

# if isa(return_mid_results, Val{true})
if isa(return_mid_results, Val{true})

# return X[:,dataset_col_slice], (extraction_column_names = extraction_column_names, fs_mid_results = fs_mid_results)
return X[:,dataset_col_slice], (extraction_column_names = Xinfo[dataset_col_slice], fs_mid_results = fs_mid_results)

# else
# return X[:,dataset_col_slice]
# end
return X
else
return X[:,dataset_col_slice]
end
end

"""
Expand Down Expand Up @@ -937,16 +935,21 @@ fs_methods = [
selector = SoleFeatures.MutualInformationClassif(SoleFeatures.IdentityLimiter()),
limiter = SoleFeatures.PercentageLimiter(0.01),
),
# ( # STEP 3: group results by variable
# selector = SoleFeatures.IdentityFilter(),
# limiter = SoleFeatures.IdentityLimiter(),
# ),
( # STEP 3: group results by variable
selector = SoleFeatures.IdentityFilter(),
limiter = SoleFeatures.IdentityLimiter(),
),
]

# prepare dataset for feature selection
Xdf, Xinfo = @test_nowarn SoleFeatures.feature_selection_preprocess(df; features=ms, type=SoleFeatures.adaptivewindow, nwindows=6, relative_overlap=0.05)

@info "FEATURE SELECTION"

Xm = Matrix(Xdf)
a=feature_selection(Xm, y, Xinfo, fs_methods = fs_methods, norm = false)
using BenchmarkTools
@btime begin
Xm = Matrix(Xdf)
feature_selection(Xm, y, Xinfo, fs_methods = fs_methods, norm = false)
end

# 3.212 ms (52923 allocations: 4.37 MiB)

0 comments on commit 92e47cf

Please sign in to comment.