Skip to content

Commit

Permalink
selection_working
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed Feb 25, 2025
1 parent 8c357f7 commit 239459d
Show file tree
Hide file tree
Showing 16 changed files with 434 additions and 259 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
MLBase = "f0e99cf1-93fa-52ec-9ecc-5026115318e0"
MultiData = "8cc5100c-b3d1-4f82-90cb-0ea93d317aba"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SoleBase = "4475fa32-7023-44a0-aa70-4813b230e492"
Expand All @@ -21,6 +22,7 @@ CategoricalArrays = "0.10"
DataFrames = "1"
HypothesisTests = "0.11"
IterTools = "1"
MultiData = "0.1.4"
OrderedCollections = "1"
Random = "1"
SoleBase = "0.13"
Expand Down
14 changes: 14 additions & 0 deletions TODO
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Da chiedere a Fede:
1- AbstractDataframe o Dataframe?
2- Pull request insieme
3- windowing multidimensione da implementare più avanti: potrebbe essere problematico mettere troppa carne al fuoco

- Verifica che parametri servono a VarianceFilter

### Importante

Ricostruisci il dataset come aggregate, ma gli aggiungi un vettore con i dati delle colonne.

rifai l'export catch22 con function e verifica con string(funzione)

lascia dataframe e lascia in TODO la conversione a matrice
37 changes: 24 additions & 13 deletions src/SoleFeatures.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,39 @@
module SoleFeatures

using SoleBase
using MultiData
using StatsBase, Catch22
using CategoricalArrays, DataFrames
using Random

include("interface.jl")
export AbstractFilterBased
include("utils/utils.jl")

# filters
include("filters/limiter.jl")
export AbstractLimiter
export PercentageLimiter
include("filters/interface.jl")

include("filters/univariate/identityfilter.jl")
include("filters/univariate/mutualinformationclassif.jl")
export MutualInformationClassifRanking
include("filters/univariate/variancefilter.jl")
export VarianceRanking, VarianceThreshold

include("utils/features_set.jl")
export mode_5, mode_10, embedding_dist, acf_timescale, acf_first_min, ami2, trev, outlier_timing_pos
export outlier_timing_neg, whiten_timescale, forecast_error, ami_timescale, high_fluctuation, stretch_decreasing
export stretch_high, entropy_pairs, rs_range, dfa, low_freq_power, centroid_freq, transition_variance, periodicity
export base_set, catch9, catch22_set, complete_set

include("dataset/dataset_structs.jl")
include("dataset/interface.jl")
export Feature

include("dataset/prepare_dataset.jl")
export feature_selection_preprocess

# using MultiData
# using SoleData
# using Reexport
# using LinearAlgebra
Expand All @@ -30,20 +46,17 @@ export feature_selection_preprocess

# # abstracts
# export AbstractFeaturesSelector
# export AbstractFilterBased
# export AbstractWrapperBased
# export AbstractEmbeddedBased
# export AbstractLimiter

# # structs
# export VarianceThreshold
# export VarianceRanking
# export RandomRanking
# export StatisticalAtLeastOnce
# export StatisticalMajority
# export PearsonCorRanking
# export Chi2Ranking
# export Chi2Threshold
# export MutualInformationClassifRanking

# export CompoundStatisticalAtLeastOnce
# export CompoundStatisticalMajority
# export CorrelationFilter
Expand Down Expand Up @@ -87,19 +100,17 @@ export feature_selection_preprocess
# ))
# end

# include("interface.jl")
# include("core.jl")
# # Utils
# include("utils/utils.jl")

# # Filters
# include("filters/limiter.jl")
# include("filters/interface.jl")

# include("filters/univariate/randomfilter.jl")
# include("filters/univariate/statisticalfilter.jl")
# include("filters/univariate/variancefilter.jl")

# include("filters/univariate/chi2filter.jl")
# include("filters/univariate/pearsoncorfilter.jl")
# include("filters/univariate/mutualinformationclassif.jl")

# include("filters/univariate/suplapscorefiler.jl")
# include("filters/univariate/fisherscorefilter.jl")
# include("filters/univariate/utils.jl")
Expand Down
57 changes: 36 additions & 21 deletions src/dataset/dataset_structs.jl → src/dataset/interface.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,33 @@
# ---------------------------------------------------------------------------- #
# abstract types #
# ---------------------------------------------------------------------------- #
# """
# Abstract type for dataset configuration outputs
# """
# abstract type AbstractDatasetConfig end

# """
# Abstract type for dataset outputs
# """
# abstract type AbstractDataset end

# """
# Abstract type for dataset train, test and validation indexing
# """
# abstract type AbstractIndexCollection end

"""
Abstract type for feature struct
"""
abstract type AbstractFeature end

# ---------------------------------------------------------------------------- #
# types #
# ---------------------------------------------------------------------------- #
const VarName = Union{Symbol, String}
const VarNames = Union{Vector{String}, Vector{Symbol}, Nothing}
const FeatNames = Union{Vector{<:Base.Callable}, Nothing}

# ---------------------------------------------------------------------------- #
# dataset #
# ---------------------------------------------------------------------------- #
Expand Down Expand Up @@ -29,25 +59,7 @@ const WIN_PARAMS = Dict(
adaptivewindow => (nwindows = 20, relative_overlap = 0.5)
)

# """
# Abstract type for dataset configuration outputs
# """
# abstract type AbstractDatasetConfig end

# """
# Abstract type for dataset outputs
# """
# abstract type AbstractDataset end

# """
# Abstract type for dataset train, test and validation indexing
# """
# abstract type AbstractIndexCollection end

"""
Abstract type for feature struct
"""
abstract type AbstractFeature end

# """
# DatasetInfo{F<:Base.Callable, R<:Real, I<:Integer, RNG<:AbstractRNG} <: AbstractDatasetConfig
Expand Down Expand Up @@ -262,7 +274,7 @@ A parametric struct that represents a feature extracted from time series data.
```julia
Feature(value::Number, var::Union{Symbol,String}, feats::Symbol, nwin::Integer)
"""
struct Feature{V<:Number, T<:Union{Symbol, String}} <: AbstractFeature
struct Feature{V<:Number, T<:VarName} <: AbstractFeature
value :: V
var :: T
feats :: Symbol
Expand All @@ -275,8 +287,7 @@ struct Feature{V<:Number, T<:Union{Symbol, String}} <: AbstractFeature
end

# Pretty printing
Base.show(io::IO, f::Feature) = print(io,
"Feature($(f.value), $(f.var), $(f.feats), window=$(f.nwin))")
Base.show(io::IO, f::Feature) = print(io, round(f.value, digits=4))

# Value access methods
Base.getproperty(f::Feature, s::Symbol) = getfield(f, s)
Expand Down Expand Up @@ -310,3 +321,7 @@ variable_name(f::Feature) = f.var
feature_type(f::Feature) = f.feats
# Get window number
window_number(f::Feature) = f.nwin

# ---------------------------------------------------------------------------- #
# functions definitions #
# ---------------------------------------------------------------------------- #
8 changes: 4 additions & 4 deletions src/dataset/prepare_dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ parameters defined in `winparams`.
"""
function _treatment(
X::DataFrame,
vnames::AbstractVector{String},
vnames::VarNames,
treatment::Symbol,
features::AbstractVector{<:Base.Callable},
features::FeatNames,
winparams::NamedTuple
)
# check parameters
Expand Down Expand Up @@ -409,8 +409,8 @@ result = feature_selection_preprocess(df,
"""
function feature_selection_preprocess(
X::DataFrame;
vnames::Union{Vector{String}, Vector{Symbol}, Nothing}=nothing,
features::Union{Vector{<:Base.Callable}, Nothing}=nothing,
vnames::VarNames=nothing,
features::FeatNames=nothing,
nwindows::Union{Int, Nothing}=nothing
)
# check parameters
Expand Down
55 changes: 29 additions & 26 deletions src/filters/interface.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,34 @@
# =========================================================================================
# Univariate filters
# ---------------------------------------------------------------------------- #
# univariate filters #
# ---------------------------------------------------------------------------- #
abstract type AbstractUnivariateFilterBased{T<:AbstractLimiter} <: AbstractFilterBased end

abstract type UnivariateFilterBased{T<:AbstractLimiter} <: AbstractFilterBased end
abstract type AbstractVarianceFilter{T<:AbstractLimiter} <: AbstractUnivariateFilterBased{T} end
abstract type AbstractRandomFilter{T<:AbstractLimiter} <: AbstractUnivariateFilterBased{T} end
abstract type AbstractStatisticalFilter{T<:AbstractLimiter} <: AbstractUnivariateFilterBased{T} end
abstract type AbstractChi2Filter{T<:AbstractLimiter} <: AbstractUnivariateFilterBased{T} end
abstract type AbstractPearsonCorFilter{T<:AbstractLimiter} <: AbstractUnivariateFilterBased{T} end
abstract type AbstractMutualInformationClassif{T<:AbstractLimiter} <: AbstractUnivariateFilterBased{T} end
abstract type AbstractSupLaplacianScore{T<:AbstractLimiter} <: AbstractUnivariateFilterBased{T} end
abstract type AbstractFisherScore{T<:AbstractLimiter} <: AbstractUnivariateFilterBased{T} end
abstract type AbstractIdentityFilter{T<:AbstractLimiter} <: AbstractUnivariateFilterBased{T} end

abstract type AbstractVarianceFilter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractRandomFilter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractStatisticalFilter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractChi2Filter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractPearsonCorFilter{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractMutualInformationClassif{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractSupLaplacianScore{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
abstract type AbstractFisherScore{T<:AbstractLimiter} <: UnivariateFilterBased{T} end
is_univariate(::AbstractUnivariateFilterBased) = true

is_univariate(::UnivariateFilterBased) = true
# ---------------------------------------------------------------------------- #
# multivariate filters #
# ---------------------------------------------------------------------------- #
abstract type AbstractMultivariateFilterBased <: AbstractFilterBased end
abstract type AbstractCorrelationFilter <: AbstractMultivariateFilterBased end

is_multivariate(::AbstractMultivariateFilterBased) = true

# ---------------------------------------------------------------------------- #
# functions definitions #
# ---------------------------------------------------------------------------- #
function score(
X::AbstractDataFrame,
selector::UnivariateFilterBased{<:AbstractLimiter}
selector::AbstractUnivariateFilterBased{<:AbstractLimiter}
)
return error("`score` for unsupervised selectors not implemented " *
"for type: $(typeof(selector))")
Expand All @@ -25,38 +37,29 @@ end
function score(
X::AbstractDataFrame,
y::AbstractVector{<:Class},
selector::UnivariateFilterBased{<:AbstractLimiter}
selector::AbstractUnivariateFilterBased{<:AbstractLimiter}
)
return error("`score` for supervised selectors not implemented " *
"for type: $(typeof(selector))")
end

function limiter(selector::UnivariateFilterBased)
function limiter(selector::AbstractUnivariateFilterBased)
!hasproperty(selector, :limiter) &&
throw(ErrorException("`selector` struct not contain `limiter` field"))
return selector.limiter
end

function apply(
X::AbstractDataFrame,
selector::UnivariateFilterBased
selector::AbstractUnivariateFilterBased
)
return limit(score(X, selector), limiter(selector))
end

function apply(
X::AbstractDataFrame,
y::AbstractVector{<:Class},
selector::UnivariateFilterBased
selector::AbstractUnivariateFilterBased
)
return limit(score(X, y, selector), limiter(selector))
end

# =========================================================================================
# Multivariate filters

abstract type MultivariateFilterBased <: AbstractFilterBased end

abstract type AbstractCorrelationFilter <: MultivariateFilterBased end

is_multivariate(::MultivariateFilterBased) = true
Loading

0 comments on commit 239459d

Please sign in to comment.