Skip to content

Commit

Permalink
new dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed Feb 25, 2025
1 parent 239459d commit c81b756
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 83 deletions.
54 changes: 13 additions & 41 deletions src/dataset/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -256,71 +256,43 @@ const WIN_PARAMS = Dict(
# end

"""
Feature{V<:Number, T<:Union{Symbol, String}} <: AbstractFeature
InfoFeat{V<:Number, T<:Union{Symbol, String}} <: AbstractFeature
A parametric struct that represents a feature extracted from time series data.
Holds info on colum dataset, used in feature selection.
# Type Parameters
- `V`: Type of the feature value (must be a subtype of `Number`)
- `T`: Type of the variable name (must be either `Symbol` or `String`)
# Fields
- `value::V`: The numerical value of the feature
- `var::T`: The variable name/identifier
- `feats::Symbol`: The feature extraction function name
- `var::T`: The variable name/identifier
- `nwin::Int`: The window number (must be positive)
# Constructors
```julia
Feature(value::Number, var::Union{Symbol,String}, feats::Symbol, nwin::Integer)
InfoFeat(feats::Symbol, var::Union{Symbol,String}, nwin::Integer)
"""
struct Feature{V<:Number, T<:VarName} <: AbstractFeature
value :: V
var :: T
struct InfoFeat{T<:VarName} <: AbstractFeature
feats :: Symbol
var :: T
nwin :: Int

function Feature(value::Number, var::Union{Symbol,String}, feats::Symbol, nwin::Integer)
function InfoFeat(feats::Symbol, var::VarName, nwin::Int)
nwin > 0 || throw(ArgumentError("Window number must be positive"))
new{typeof(value), typeof(var)}(value, var, feats, nwin)
new{typeof(var)}(feats, var, nwin)
end
end

# Pretty printing
Base.show(io::IO, f::Feature) = print(io, round(f.value, digits=4))

# Value access methods
Base.getproperty(f::Feature, s::Symbol) = getfield(f, s)
Base.propertynames(::Feature) = (:value, :var, :feats, :nwin)

# Conversion methods for NaN handling
Base.convert(::Type{Feature}, x::Missing) = Feature(NaN, :missing, :none, 1)
Base.convert(::Type{Feature}, x::Nothing) = Feature(NaN, :nothing, :none, 1)

# Test if value is NaN
Base.isnan(f::Feature) = isnan(f.value)

# Numeric comparisons
Base.isless(f::Feature, x::Number) = isless(f.value, x)
Base.isless(x::Number, f::Feature) = isless(x, f.value)
Base.isless(f1::Feature, f2::Feature) = isless(f1.value, f2.value)

# Convert to number for arithmetic operations
Base.convert(::Type{Number}, f::Feature) = f.value
Base.convert(::Type{Float64}, f::Feature) = convert(Float64, f.value)

# Forward numeric operations to the value field
for op in (:+, :-, :*, :/, :^)
@eval Base.$op(f::Feature, x::Number) = $op(f.value, x)
@eval Base.$op(x::Number, f::Feature) = $op(x, f.value)
end
Base.getproperty(f::InfoFeat, s::Symbol) = getfield(f, s)
Base.propertynames(::InfoFeat) = (:feats, :var, :nwin)

# Get variable name
variable_name(f::Feature) = f.var
variable_name(f::InfoFeat) = f.var
# Get feature type
feature_type(f::Feature) = f.feats
feature_type(f::InfoFeat) = f.feats
# Get window number
window_number(f::Feature) = f.nwin
window_number(f::InfoFeat) = f.nwin

# ---------------------------------------------------------------------------- #
# functions definitions #
Expand Down
73 changes: 31 additions & 42 deletions src/dataset/prepare_dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,38 +83,23 @@ function _treatment(
n_intervals = winparams.type(max_interval; _wparams...)

# Initialize DataFrame
if treatment == :aggregate # propositional
if n_intervals == 1
valid_X = DataFrame([v => Float64[]
for v in [string(f, "(", v, ")")
for f in features for v in vnames]]
)
else
valid_X = DataFrame([v => Float64[]
for v in [string(f, "(", v, ")w", i)
for f in features for v in vnames
for i in 1:length(n_intervals)]]
)
end

elseif treatment == :reducesize # modal
# valid_X = DataFrame([name => Vector{Float64}[] for name in vnames])
valid_X = DataFrame([name => Vector{Float64}[] for name in vnames])

elseif treatment == :feature_selection
if n_intervals == 1
# valid_X = DataFrame([v => Float64[]
valid_X = DataFrame([v => Feature[]
for v in [string(f, "(", v, ")")
for f in features for v in vnames]]
)
else
# valid_X = DataFrame([v => Float64[]
valid_X = DataFrame([v => Feature[]
for v in [string(f, "(", v, ")w", i)
for f in features for v in vnames
for i in 1:length(n_intervals)]]
)
valid_X = begin
if treatment == :aggregate # propositional
if n_intervals == 1
DataFrame([v => Float64[]
for v in [string(f, "(", v, ")")
for f in features for v in vnames]]
)
else
DataFrame([v => Float64[]
for v in [string(f, "(", v, ")w", i)
for f in features for v in vnames
for i in 1:length(n_intervals)]]
)
end

elseif treatment == :reducesize # modal
DataFrame([name => Vector{Float64}[] for name in vnames])
end
end

Expand All @@ -139,14 +124,6 @@ function _treatment(
fill(NaN, interval_diff)) for col in row
]
)
elseif treatment == :feature_selection
push!(valid_X, vcat([
vcat([
Feature(f(col[r]), vnames[i], Symbol(f), w) for (w, r) in enumerate(row_intervals)],
# if interval_diff is positive, fill the rest with NaN
fill(NaN, interval_diff)) for (i, col) in enumerate(row), f in features
]...)
)
end
end

Expand Down Expand Up @@ -416,12 +393,24 @@ function feature_selection_preprocess(
# check parameters
isnothing(vnames) && (vnames = names(X))
isnothing(features) && (features = DEFAULT_FE.features)
treatment = :feature_selection
treatment = :aggregate
_ = _check_dimensions(X)
if !isnothing(nwindows)
nwindows > 0 || throw(ArgumentError("Number of windows must be positive."))
end
winparams = isnothing(nwindows) ? DEFAULT_FE_WINPARAMS : merge(DEFAULT_FE_WINPARAMS, (nwindows = nwindows,))

_treatment(X, vnames, treatment, features, winparams)
# Xinfo = [v => InfoFeat[f, Symbol(v), i]
# for f in features for v in vnames
# for i in 1:length(nwindows)]

# # Replace the Xinfo creation with:
Xinfo = [
(f, v, i) => InfoFeat(Symbol(f), v, i)
for f in features
for v in vnames
for i in 1:nwindows
]

_treatment(X, vnames, treatment, features, winparams), Xinfo
end
1 change: 1 addition & 0 deletions src/filters/univariate/mutualinformationclassif.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ struct MutualInformationClassif{T <: AbstractLimiter} <: AbstractMutualInformati
end

is_supervised(::AbstractMutualInformationClassif) = true
is_unsupervised(::AbstractMutualInformationClassif) = false

function score(
X::AbstractDataFrame,
Expand Down

0 comments on commit c81b756

Please sign in to comment.