Skip to content

Commit

Permalink
filters
Browse files Browse the repository at this point in the history
  • Loading branch information
PasoStudio73 committed Mar 4, 2025
1 parent f09d9ee commit 6838d29
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 109 deletions.
34 changes: 12 additions & 22 deletions src/filters/multivariate/correlationfilter.jl
Original file line number Diff line number Diff line change
@@ -1,41 +1,31 @@
# ---------------------------------------------------------------------------- #
# correlation filter #
# ---------------------------------------------------------------------------- #
struct CorrelationFilter <: AbstractCorrelationFilter
corf::Function
threshold::Real

function CorrelationFilter(corf::Function, threshold::Real)
if (threshold < 0 || threshold > 1)
throw(DomainError("Threshold must be within 0 and 1"))
end
return new(corf, threshold)
(0 threshold 1) || throw(DomainError("Threshold must be within 0 and 1"))
new(corf, threshold)
end
end

# ========================================================================================
# ACCESSORS

corf(selector::CorrelationFilter) = selector.corf
threshold(selector::CorrelationFilter) = selector.threshold

# ========================================================================================
# TRAITS

is_supervised(::AbstractCorrelationFilter) = flase
is_unsupervised(::AbstractCorrelationFilter) = true

# ========================================================================================
# APPLY

function apply(
X::AbstractDataFrame,
selector::CorrelationFilter
)::Vector{Int}
mtrx = Matrix(X)
cormtrx = corf(selector)(mtrx)
function apply(X::AbstractMatrix, selector::CorrelationFilter)::Vector{Int}
cormtrx = corf(selector)(X)
return findcorrelation(cormtrx; threshold=threshold(selector))
end
apply(Xdf::AbstractDataFrame, selector::CorrelationFilter)::Vector{Int} = apply(Matrix(Xdf), selector)

# ========================================================================================
# UTILS

# ---------------------------------------------------------------------------- #
# utils #
# ---------------------------------------------------------------------------- #
"""
findcorrelation(cormtrx; threshold)
"""
Expand Down
18 changes: 10 additions & 8 deletions src/filters/univariate/chi2filter.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
# ---------------------------------------------------------------------------- #
# chi2 filter #
# ---------------------------------------------------------------------------- #
struct Chi2Filter{T<:AbstractLimiter} <: AbstractChi2Filter{T}
limiter::T
# parameters
end

# ========================================================================================
# TRAITS

is_supervised(::AbstractChi2Filter) = true
is_unsupervised(::AbstractChi2Filter) = false

# ========================================================================================
# SCORE

function score(
X::AbstractDataFrame,
X::AbstractMatrix,
y::AbstractVector{<:Class},
selector::Chi2Filter
)::Vector{Float64}
Expand All @@ -29,9 +30,10 @@ function score(
end
return scores
end
score(Xdf::AbstractDataFrame, y::AbstractVector{<:Class}, selector::Chi2Filter) = score(Matrix(Xdf), y, selector)

# ========================================================================================
# CUSTOM CONSTRUCTORS

Chi2Threshold(; alpha = 0.05) = Chi2Filter(ThresholdLimiter(alpha, <=))
# ---------------------------------------------------------------------------- #
# custom constructors #
# ---------------------------------------------------------------------------- #
Chi2Threshold(; alpha = 0.05) = Chi2Filter(ThresholdLimiter(alpha, ))
Chi2Ranking(nbest) = Chi2Filter(RankingLimiter(nbest, false))
15 changes: 7 additions & 8 deletions src/filters/univariate/fisherscorefilter.jl
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
# ---------------------------------------------------------------------------- #
# fisher filter #
# ---------------------------------------------------------------------------- #
struct FisherScoreFilter{T <: AbstractLimiter} <: AbstractFisherScore{T}
limiter::T
# parameters
end

# ========================================================================================
# TRAITS

is_supervised(::AbstractFisherScore) = true

# ========================================================================================
# SCORE
is_unsupervised(::AbstractFisherScore) = false

function score(
X::AbstractDataFrame,
X::AbstractMatrix,
y::AbstractVector{<:Class},
selector::FisherScoreFilter
)::Vector{Float64}
lmy = labelmap(y)
ey = labelencode(lmy, y)
scores = fisher_score.fisher_score(Matrix(X), ey)
scores = fisher_score.fisher_score(X, ey)
return scores
end
score(Xdf::AbstractDataFrame, y::AbstractVector{<:Class}, selector::FisherScoreFilter) = score(Matrix(Xdf), y, selector)
23 changes: 11 additions & 12 deletions src/filters/univariate/pearsoncorfilter.jl
Original file line number Diff line number Diff line change
@@ -1,32 +1,31 @@
# ---------------------------------------------------------------------------- #
# pearson filter #
# ---------------------------------------------------------------------------- #
struct PearsonCorFilter{T <: AbstractLimiter} <: AbstractPearsonCorFilter{T}
limiter::T
# parameters
# TODO parameters
end

# ========================================================================================
# TRAITS

is_supervised(::AbstractPearsonCorFilter) = true

# ========================================================================================
# SCORE
is_unsupervised(::AbstractPearsonCorFilter) = false

function score(
X::AbstractDataFrame,
X::AbstractMatrix,
y::AbstractVector{<:Class},
selector::PearsonCorFilter
)::Vector{Float64}
coltypes = eltype.(eachcol(X))
uncalcidxes = findall(==(false), coltypes .<: Real)
if (!isempty(uncalcidxes))
throw(DomainError("Columns must be subtype of Real.\n
The following column indices are not handable: $(uncalcidxes)"))
The following column indices are not handable: $(uncalcidxes)"))
end
scores = cor.(eachcol(X), [y])
return scores
end
score(Xdf::AbstractDataFrame, y::AbstractVector{<:Class}, selector::PearsonCorFilter)::Vector{Float64} = score(Matrix(Xdf), y, selector)

# ========================================================================================
# CUSTOM CONSTRUCTORS

# ---------------------------------------------------------------------------- #
# custom constructors #
# ---------------------------------------------------------------------------- #
PearsonCorRanking(nbest) = PearsonCorFilter(RankingLimiter(nbest, false))
27 changes: 10 additions & 17 deletions src/filters/univariate/randomfilter.jl
Original file line number Diff line number Diff line change
@@ -1,33 +1,26 @@
# ---------------------------------------------------------------------------- #
# random filter #
# ---------------------------------------------------------------------------- #
struct RandomFilter{T<:AbstractLimiter} <: AbstractRandomFilter{T}
limiter::T
# parameters
seed::Union{Int,Nothing}
end

# ========================================================================================
# ACCESSORS

seed(selector::RandomFilter) = selector.seed

# ========================================================================================
# TRAITS

is_supervised(::AbstractRandomFilter) = false
is_unsupervised(::AbstractRandomFilter) = true

# ========================================================================================
# SCORE

function score(
X::AbstractDataFrame,
selector::RandomFilter
)::Vector{<:Real}
function score(X::AbstractMatrix, selector::RandomFilter)::Vector{Float64}
s = seed(selector)
rng = isnothing(s) ? MersenneTwister() : MersenneTwister(s)
return rand(rng, ncol(X))
return rand(rng, size(X, 2))
end
score(Xdf::AbstractDataFrame, selector::RandomFilter)::Vector{Float64} = score(Matrix(Xdf), selector)

# ========================================================================================
# CUSTOM CONSTRUCTORS

# ---------------------------------------------------------------------------- #
# custom constructors #
# ---------------------------------------------------------------------------- #
RandomRanking(nbest::Integer, seed::Integer) = RandomFilter(RankingLimiter(nbest), seed)
RandomRanking(nbest::Integer) = RandomFilter(RankingLimiter(nbest), nothing)
60 changes: 27 additions & 33 deletions src/filters/univariate/statisticalfilter.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# ---------------------------------------------------------------------------- #
# statistical filter #
# ---------------------------------------------------------------------------- #
"""
Perform provided hypothesis test `htest` (from: https://github.com/JuliaStats/HypothesisTests.jl)
on each variable.
Expand All @@ -11,22 +14,14 @@ struct StatisticalFilter{T<:AbstractLimiter} <: AbstractStatisticalFilter{T}
versus::Symbol
end

# ========================================================================================
# ACCESSORS

htest(selector::StatisticalFilter) = selector.htest
versus(selector::StatisticalFilter) = selector.versus

# ========================================================================================
# TRAITS

is_supervised(::AbstractStatisticalFilter) = true

# ========================================================================================
# SCORE
is_unsupervised(::AbstractStatisticalFilter) = false

function score(
X::AbstractDataFrame,
X::AbstractMatrix,
y::AbstractVector{<:Class},
selector::StatisticalFilter
)::DataFrame
Expand All @@ -45,10 +40,10 @@ function score(
# the second the index, or indices, of the classes with which the first item is compared
itr = Vector(vrs == :ovo ?
collect(subsets(ic, 2)) :
[ [first(setdiff(ic, x)), x] for x in subsets(ic, nclass - 1) ]
[[first(setdiff(ic, x)), x] for x in subsets(ic, nclass - 1)]
)

colnames = join.([ [classes[c], classes[vs]] for (c, vs) in itr ], "-vs-")
colnames = join.([[classes[c], classes[vs]] for (c, vs) in itr ], "-vs-")
scores = DataFrame(colnames .=> [Float64[]])
for cidx in 1:numcol
pvals = []
Expand All @@ -64,43 +59,42 @@ function score(
end
return scores
end
score(Xdf::AbstractDataFrame, y::AbstractVector{<:Class}, selector::StatisticalFilter)::DataFrame = score(Matrix(Xdf), y, selector)

# ========================================================================================
# CUSTOM LIMITER

# ---------------------------------------------------------------------------- #
# custom limiter #
# ---------------------------------------------------------------------------- #
struct StatisticalLimiter{T<:AbstractLimiter} <: AbstractLimiter
limiter::T
end

function limit(scores::DataFrame, sl::StatisticalLimiter)
function limit(scores::AbstractMatrix, sl::StatisticalLimiter)
return limit(collect.(collect(eachrow(scores))), sl.limiter)
end

# ========================================================================================
# CUSTOM CONSTRUCTORS

function StatisticalMajority(
# ---------------------------------------------------------------------------- #
# custom constructors #
# ---------------------------------------------------------------------------- #
function statistical_majority(
htest::Any;
versus::Symbol = :ova,
significance::Real = 0.05,
rejectnullhp = true
versus::Symbol=:ova,
significance::Real=0.05,
rejectnullhp::Bool=true
)
(significance < 0 || significance > 1) &&
throw(DomainError("significance must be within 0 and 1"))
rejectnull = rejectnullhp ? (<=) : (>)
(0 significance 1) || throw(DomainError("significance must be within 0 and 1"))
rejectnull = rejectnullhp ? () : (>)
sl = StatisticalLimiter(MajorityLimiter(ThresholdLimiter(significance, rejectnull)))
return StatisticalFilter(sl, htest, versus)
end

function StatisticalAtLeastOnce(
function statistical_atleastonce(
htest::Any;
versus = :ova,
significance = 0.05,
rejectnullhp = true
versus::Symbol=:ova,
significance::Real=0.05,
rejectnullhp::Bool=true
)
(significance < 0 || significance > 1) &&
throw(DomainError("significance must be within 0 and 1"))
rejectnull = rejectnullhp ? (<=) : (>)
(0 significance 1) || throw(DomainError("significance must be within 0 and 1"))
rejectnull = rejectnullhp ? () : (>)
sl = StatisticalLimiter(AtLeastLimiter(ThresholdLimiter(significance, rejectnull), 1))
return StatisticalFilter(sl, htest, versus)
end
12 changes: 8 additions & 4 deletions src/filters/univariate/suplapscorefiler.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# ---------------------------------------------------------------------------- #
# laplacian filter #
# ---------------------------------------------------------------------------- #
struct SupLaplacianScoreFilter{T<:AbstractLimiter} <: AbstractSupLaplacianScore{T}
limiter::T
metric::Symbol
Expand All @@ -19,27 +22,28 @@ struct SupLaplacianScoreFilter{T<:AbstractLimiter} <: AbstractSupLaplacianScore{
end

is_supervised(::AbstractSupLaplacianScore) = true
is_unsupervised(::AbstractSupLaplacianScore) = false

metric(s::SupLaplacianScoreFilter) = s.metric
weightmode(s::SupLaplacianScoreFilter) = s.weightmode
nneighbors(s::SupLaplacianScoreFilter) = s.nneighbors

function score(
X::AbstractDataFrame,
X::AbstractMatrix,
y::AbstractVector{<:Class},
selector::SupLaplacianScoreFilter
)::Vector{Float64}
lmy = labelmap(y)
ey = labelencode(lmy, y)
m = Matrix(X)
w = construct_w.construct_W(
Matrix(m);
X;
y = ey,
metric = string(metric(selector)),
neighbor_mode = "supervised",
weight_mode = string(weightmode(selector)),
k = nneighbors(selector)
)
score = lap_score.lap_score(m; W=w)
score = lap_score.lap_score(X; W=w)
return replace!(score, NaN => Inf)
end
score(Xdf::AbstractDataFrame, y::AbstractVector{<:Class}, selector::SupLaplacianScoreFilter)::Vector{Float64} = score(Matrix(Xdf), y, selector)
11 changes: 6 additions & 5 deletions src/filters/univariate/variancefilter.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ---------------------------------------------------------------------------- #
# variance filters #
# variance filter #
# ---------------------------------------------------------------------------- #
struct VarianceFilter{T<:AbstractLimiter} <: AbstractVarianceFilter{T}
limiter::T
Expand All @@ -9,14 +9,15 @@ end
is_supervised(::AbstractVarianceFilter) = false
is_unsupervised(::AbstractVarianceFilter) = true

function score(X::AbstractMatrix, selector::VarianceFilter)
function score(X::AbstractMatrix, selector::VarianceFilter)::Vector{Float64}
# sum is scaled with n-1
# var(itr; corrected::Bool=true, mean=nothing[, dims])
return var.(eachcol(X))
end
score(Xdf::AbstractDataFrame, selector::VarianceFilter) = score(Matrix(Xdf), selector)
score(Xdf::AbstractDataFrame, selector::VarianceFilter)::Vector{Float64} = score(Matrix(Xdf), selector)

# Ranking
# ---------------------------------------------------------------------------- #
# custom constructors #
# ---------------------------------------------------------------------------- #
VarianceRanking(nbest) = VarianceFilter(RankingLimiter(nbest, true))
# Threshold
VarianceThreshold(threshold) = VarianceFilter(ThresholdLimiter(threshold, ))

0 comments on commit 6838d29

Please sign in to comment.