diff --git a/Project.toml b/Project.toml
index 96cdeab..22225b6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,39 +1,29 @@
 name = "SoleFeatures"
 uuid = "3ceb4e54-d968-4e97-8c18-2deeb0d429fb"
 authors = ["Patrik Cavina", "Federico Manzella", "Giovanni Pagliarini"]
-version = "0.2.0"
+version = "0.3.0"
 
 [deps]
 Catch22 = "acdeb78f-3d39-4310-8fdf-6d75c17c6d5a"
+CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MLBase = "f0e99cf1-93fa-52ec-9ecc-5026115318e0"
-MultiData = "8cc5100c-b3d1-4f82-90cb-0ea93d317aba"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 SoleBase = "4475fa32-7023-44a0-aa70-4813b230e492"
-SoleData = "123f1ae1-6307-4526-ab5b-aab3a92a2b8c"
-SoleLogics = "b002da8f-3cb3-4d91-bbe3-2953433912b5"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
 Catch22 = "0.7"
+CategoricalArrays = "0.10"
 DataFrames = "1"
-HypothesisTests = "0.10 - 0.11"
+HypothesisTests = "0.11"
 IterTools = "1"
-MultiData = "0 - 0.1"
 OrderedCollections = "1"
-PyCall = "1"
 Random = "1"
-Reexport = "1"
-Revise = "3"
-SoleData = "0.16"
+SoleBase = "0.13"
 StatsBase = "0.30 - 0.34"
 julia = "1"
 
diff --git a/src/SoleFeatures.jl b/src/SoleFeatures.jl
index 2028298..7f7a14c 100644
--- a/src/SoleFeatures.jl
+++ b/src/SoleFeatures.jl
@@ -1,96 +1,111 @@
-__precompile__()
+# __precompile__()
 module SoleFeatures
 
-using StatsBase
-using MultiData
-using SoleData
-using Reexport
+using SoleBase
+using StatsBase, Catch22
+using CategoricalArrays, DataFrames
 using Random
-using LinearAlgebra
-using HypothesisTests
-using IterTools
-using PyCall
-using MLBase
-# using Pkg
 
-# abstracts
-export AbstractFeaturesSelector
-export AbstractFilterBased
-export AbstractWrapperBased
-export AbstractEmbeddedBased
-export AbstractLimiter
-# structs
-export VarianceThreshold
-export VarianceRanking
-export RandomRanking
-export StatisticalAtLeastOnce
-export StatisticalMajority
-export PearsonCorRanking
-export Chi2Ranking
-export Chi2Threshold
-export MutualInformationClassifRanking
-export CompoundStatisticalAtLeastOnce
-export CompoundStatisticalMajority
-export CorrelationFilter
-# main functions
-export apply, buildbitmask, transform, transform!
-# utils
-export bm2var
+include("utils/features_set.jl")
+export mode_5, mode_10, embedding_dist, acf_timescale, acf_first_min, ami2, trev, outlier_timing_pos
+export outlier_timing_neg, whiten_timescale, forecast_error, ami_timescale, high_fluctuation, stretch_decreasing
+export stretch_high, entropy_pairs, rs_range, dfa, low_freq_power, centroid_freq, transition_variance, periodicity
+export base_set, catch9, catch22_set, complete_set
 
-@reexport using DataFrames
+include("dataset/dataset_structs.jl")
+export Feature
 
-const req_py_pkgs = ["scipy", "scikit-learn", "skfeature"]
-const fs = PyNULL()
-const construct_w = PyNULL()
-const lap_score = PyNULL()
-const fisher_score = PyNULL()
-function __init__()
+include("dataset/prepare_dataset.jl")
+export feature_selection_preprocess
 
-    pypkgs = getindex.(PyCall.Conda.parseconda(`list`, PyCall.Conda.ROOTENV), "name")
-    needinstall = !all(p -> in(p, pypkgs), req_py_pkgs)
+# using MultiData
+# using SoleData
+# using Reexport
+# using LinearAlgebra
+# using HypothesisTests
+# using IterTools
+# using PyCall
+# using MLBase
+# # using Pkg
 
-    if (needinstall)
-        PyCall.Conda.pip_interop(true, PyCall.Conda.ROOTENV)
-        PyCall.Conda.add("scipy")
-        PyCall.Conda.add("scikit-learn")
-        PyCall.Conda.pip(
-            "install",
-            "git+https://github.com/jundongl/scikit-feature.git#egg=skfeature",
-            PyCall.Conda.ROOTENV
-        )
-    end
+# # abstracts
+# export AbstractFeaturesSelector
+# export AbstractFilterBased
+# export AbstractWrapperBased
+# export AbstractEmbeddedBased
+# export AbstractLimiter
+# # structs
+# export VarianceThreshold
+# export VarianceRanking
+# export RandomRanking
+# export StatisticalAtLeastOnce
+# export StatisticalMajority
+# export PearsonCorRanking
+# export Chi2Ranking
+# export Chi2Threshold
+# export MutualInformationClassifRanking
+# export CompoundStatisticalAtLeastOnce
+# export CompoundStatisticalMajority
+# export CorrelationFilter
+# # main functions
+# export apply, buildbitmask, transform, transform!
+# # utils
+# export bm2var
 
-    copy!(fs, pyimport_conda("sklearn.feature_selection", "scikit-learn"))
-    copy!(construct_w, pyimport_conda("skfeature.utility.construct_W", "skfeature"))
-    copy!(lap_score, pyimport_conda(
-        "skfeature.function.similarity_based.lap_score",
-        "skfeature"
-    ))
-    copy!(fisher_score, pyimport_conda(
-        "skfeature.function.similarity_based.fisher_score",
-        "skfeature"
-    ))
-end
+# @reexport using DataFrames
 
-include("interface.jl")
-include("core.jl")
-# Utils
-include("utils/utils.jl")
-# Filters
-include("filters/limiter.jl")
-include("filters/interface.jl")
-include("filters/univariate/randomfilter.jl")
-include("filters/univariate/statisticalfilter.jl")
-include("filters/univariate/variancefilter.jl")
-include("filters/univariate/chi2filter.jl")
-include("filters/univariate/pearsoncorfilter.jl")
-include("filters/univariate/mutualinformationclassif.jl")
-include("filters/univariate/suplapscorefiler.jl")
-include("filters/univariate/fisherscorefilter.jl")
-include("filters/univariate/utils.jl")
-include("filters/multivariate/correlationfilter.jl")
-# Experimental
-include("experimental/Experimental.jl")
-import .Experimental
+# const req_py_pkgs = ["scipy", "scikit-learn", "skfeature"]
+# const fs = PyNULL()
+# const construct_w = PyNULL()
+# const lap_score = PyNULL()
+# const fisher_score = PyNULL()
+# function __init__()
+
+#     pypkgs = getindex.(PyCall.Conda.parseconda(`list`, PyCall.Conda.ROOTENV), "name")
+#     needinstall = !all(p -> in(p, pypkgs), req_py_pkgs)
+
+#     if (needinstall)
+#         PyCall.Conda.pip_interop(true, PyCall.Conda.ROOTENV)
+#         PyCall.Conda.add("scipy")
+#         PyCall.Conda.add("scikit-learn")
+#         PyCall.Conda.pip(
+#             "install",
+#             "git+https://github.com/jundongl/scikit-feature.git#egg=skfeature",
+#             PyCall.Conda.ROOTENV
+#         )
+#     end
+
+#     copy!(fs, pyimport_conda("sklearn.feature_selection", "scikit-learn"))
+#     copy!(construct_w, pyimport_conda("skfeature.utility.construct_W", "skfeature"))
+#     copy!(lap_score, pyimport_conda(
+#         "skfeature.function.similarity_based.lap_score",
+#         "skfeature"
+#     ))
+#     copy!(fisher_score, pyimport_conda(
+#         "skfeature.function.similarity_based.fisher_score",
+#         "skfeature"
+#     ))
+# end
+
+# include("interface.jl")
+# include("core.jl")
+# # Utils
+# include("utils/utils.jl")
+# # Filters
+# include("filters/limiter.jl")
+# include("filters/interface.jl")
+# include("filters/univariate/randomfilter.jl")
+# include("filters/univariate/statisticalfilter.jl")
+# include("filters/univariate/variancefilter.jl")
+# include("filters/univariate/chi2filter.jl")
+# include("filters/univariate/pearsoncorfilter.jl")
+# include("filters/univariate/mutualinformationclassif.jl")
+# include("filters/univariate/suplapscorefiler.jl")
+# include("filters/univariate/fisherscorefilter.jl")
+# include("filters/univariate/utils.jl")
+# include("filters/multivariate/correlationfilter.jl")
+# # Experimental
+# include("experimental/Experimental.jl")
+# import .Experimental
 
 end # module
diff --git a/src/dataset/dataset_structs.jl b/src/dataset/dataset_structs.jl
new file mode 100644
index 0000000..bff3c08
--- /dev/null
+++ b/src/dataset/dataset_structs.jl
@@ -0,0 +1,312 @@
+# ---------------------------------------------------------------------------- #
+#                                    dataset                                   #
+# ---------------------------------------------------------------------------- #
+# const DEFAULT_PREPROC = (
+#     train_ratio = 0.8,
+#     valid_ratio = 1.0,
+#     shuffle     = true,
+#     stratified  = false,
+#     nfolds      = 6,
+#     rng         = TaskLocalRNG()
+# )
+
+const DEFAULT_FE = (
+    features = catch9,
+)
+const DEFAULT_FE_WINPARAMS = (
+    type = adaptivewindow,
+    nwindows = 10,
+    relative_overlap = 0.2
+)
+
+# const AVAIL_WINS       = (movingwindow, wholewindow, splitwindow, adaptivewindow)
+# const AVAIL_TREATMENTS = (:aggregate, :reducesize)
+
+const WIN_PARAMS = Dict(
+    movingwindow   => (window_size = 1024, window_step = 512),
+    wholewindow    => NamedTuple(),
+    splitwindow    => (nwindows = 20),
+    adaptivewindow => (nwindows = 20, relative_overlap = 0.5)
+)
+
+# """
+# Abstract type for dataset configuration outputs
+# """
+# abstract type AbstractDatasetConfig end
+
+# """
+# Abstract type for dataset outputs
+# """
+# abstract type AbstractDataset end
+
+# """
+# Abstract type for dataset train, test and validation indexing
+# """
+# abstract type AbstractIndexCollection end
+
+"""
+Abstract type for feature struct
+"""
+abstract type AbstractFeature end
+
+# """
+#     DatasetInfo{F<:Base.Callable, R<:Real, I<:Integer, RNG<:AbstractRNG} <: AbstractDatasetConfig
+
+# An immutable struct containing dataset configuration and metadata.
+# It is included in ModelConfig and Dataset structs,
+# In a ModelConfig object, it is reachable through the `ds.info` field. 
+
+# # Fields
+# - `algo::Symbol`:
+#     Algorithm type, can be :classification, or :regression.
+# - `treatment::Symbol`: 
+#     Data treatment method, specify the behaviour of data reducing if dataset is composed of time-series.
+#     :aggregate, time-series will be reduced to a scalar (propositional case).
+#     :reducesize, time-series will be windowed to reduce size.
+# - `features::Vector{F}`: 
+#     Features functions applied to the dataset.
+# - `train_ratio::R`: 
+#     Ratio of training data (0-1), specify the ratio between train and test partitions,
+#     the higher the ratio, the more data will be used for training.
+# - `valid_ratio::R`: 
+#     Ratio of validation data (0-1), spoecify the ratio between train and validation partitions,
+#     the higher the ratio, the more data will be used for validation.
+#     If `valid_ratio` is unspecified, no validation data will be used.
+# - `shuffle::Bool`: 
+#     Whether to shuffle data during train, validation and test partitioning.
+# - `stratified::Bool`: 
+#     Whether to use cross-validation stratified sampling technique.
+# - `nfolds::I`: 
+#     Number of cross-validation folds.
+# - `rng::RNG`: 
+#     Random number generator.
+# - `winparams::Union{NamedTuple, Nothing}`: 
+#     Window parameters: NamedTuple should have the following fields:
+#     whole window (; type=wholewindow)
+#     adaptive window (type=adaptivewindow, nwindows, relative_overlap),
+#     moving window (type=movingwindow, nwindows, relative_overlap, window_size, window_step)
+#     split window (type=splitwindow, nwindows).
+# - `vnames::Union{Vector{Symbol}, Nothing}`: 
+#     Variable names, usually dataset column names.
+# """
+# struct DatasetInfo{F<:Base.Callable, R<:Real, I<:Integer, RNG<:AbstractRNG} <: AbstractDatasetConfig
+#     algo        :: Symbol
+#     treatment   :: Symbol
+#     features    :: Vector{F}
+#     train_ratio :: R
+#     valid_ratio :: R
+#     shuffle     :: Bool
+#     stratified  :: Bool
+#     nfolds      :: I
+#     rng         :: RNG
+#     winparams   :: Union{NamedTuple, Nothing}
+#     vnames      :: Union{Vector{Symbol}, Nothing}
+# end
+
+# function DatasetInfo(
+#     algo::Symbol,
+#     treatment::Symbol,
+#     features::AbstractVector{F},
+#     train_ratio::R,
+#     valid_ratio::R,
+#     shuffle::Bool,
+#     stratified::Bool,
+#     nfolds::I,
+#     rng::RNG,
+#     winparams::Union{NamedTuple, Nothing},
+#     vnames::Union{AbstractVector{<:Union{AbstractString,Symbol}}, Nothing}
+# ) where {F<:Base.Callable, R<:Real, I<:Integer, RNG<:AbstractRNG}
+#     # Validate ratios
+#     0 ≤ train_ratio ≤ 1 || throw(ArgumentError("train_ratio must be between 0 and 1"))
+#     0 ≤ valid_ratio ≤ 1 || throw(ArgumentError("valid_ratio must be between 0 and 1"))
+
+#     converted_vnames = isnothing(vnames) ? nothing : Vector{Symbol}(Symbol.(vnames))
+
+#     DatasetInfo{F,R,I,RNG}(
+#         algo, treatment, features, train_ratio, valid_ratio,
+#         shuffle, stratified, nfolds, rng, winparams, converted_vnames
+#     )
+# end
+
+# function Base.show(io::IO, info::DatasetInfo)
+#     println(io, "DatasetInfo:")
+#     for field in fieldnames(DatasetInfo)
+#         value = getfield(info, field)
+#         println(io, "  ", rpad(String(field) * ":", 15), value)
+#     end
+# end
+
+# """
+#     TT_indexes{T<:Integer} <: AbstractVector{T}
+
+# A struct that stores indices for train-validation-test splits of a dataset,
+# used in Dataset struct.
+
+# # Fields
+# - `train::Vector{T}`: Vector of indices for the training set
+# - `valid::Vector{T}`: Vector of indices for the validation set
+# - `test::Vector{T}`:  Vector of indices for the test set
+# """
+# struct TT_indexes{T<:Integer} <: AbstractIndexCollection
+#     train       :: Vector{T}
+#     valid       :: Vector{T}
+#     test        :: Vector{T}
+# end
+
+# function TT_indexes(
+#     train::AbstractVector{T},
+#     valid::AbstractVector{T},
+#     test::AbstractVector{T}
+# ) where {T<:Integer}
+#     TT_indexes{T}(train, valid, test)
+# end
+
+# Base.show(io::IO, t::TT_indexes) = print(io, "TT_indexes(train=", t.train, ", validation=", t.valid, ", test=", t.test, ")")
+# Base.length(t::TT_indexes) = length(t.train) + length(t.valid) + length(t.test)
+
+# function _create_views(X, y, tt, stratified::Bool)
+#     if stratified
+#         Xtrain = view.(Ref(X), getfield.(tt, :train), Ref(:))
+#         Xvalid = view.(Ref(X), getfield.(tt, :valid), Ref(:))
+#         Xtest  = view.(Ref(X), getfield.(tt, :test), Ref(:))
+#         ytrain = view.(Ref(y), getfield.(tt, :train))
+#         yvalid = view.(Ref(y), getfield.(tt, :valid))
+#         ytest  = view.(Ref(y), getfield.(tt, :test))
+#     else
+#         Xtrain = @views X[tt.train, :]
+#         Xvalid = @views X[tt.valid, :]
+#         Xtest  = @views X[tt.test, :]
+#         ytrain = @views y[tt.train]
+#         yvalid = @views y[tt.valid]
+#         ytest  = @views y[tt.test]
+#     end
+#     return Xtrain, Xvalid, Xtest, ytrain, yvalid, ytest
+# end
+
+# """
+#     Dataset{T<:AbstractDataFrame,S} <: AbstractDataset
+
+# An immutable struct that efficiently stores dataset splits for machine learning.
+
+# # Fields
+# - `X::T`: The feature matrix as a DataFrame
+# - `y::S`: The target vector
+# - `tt::Union{TT_indexes{I}, Vector{TT_indexes{I}}}`: Train-test split indices
+# - `info::DatasetInfo`: Dataset metadata and configuration
+# - `Xtrain`, `Xvalid`, `Xtest`: Data views for features
+# - `ytrain`, `yvalid`, `ytest`: Data views for targets
+# """
+# struct Dataset{T<:AbstractDataFrame,S} <: AbstractDataset
+#     X           :: T
+#     y           :: S
+#     tt          :: Union{TT_indexes, AbstractVector{<:TT_indexes}}
+#     info        :: DatasetInfo
+#     Xtrain      :: Union{SubDataFrame{T}, Vector{<:SubDataFrame{T}}}
+#     Xvalid      :: Union{SubDataFrame{T}, Vector{<:SubDataFrame{T}}}
+#     Xtest       :: Union{SubDataFrame{T}, Vector{<:SubDataFrame{T}}}
+#     ytrain      :: Union{SubArray{<:eltype(S)}, Vector{<:SubArray{<:eltype(S)}}}
+#     yvalid      :: Union{SubArray{<:eltype(S)}, Vector{<:SubArray{<:eltype(S)}}}
+#     ytest       :: Union{SubArray{<:eltype(S)}, Vector{<:SubArray{<:eltype(S)}}}
+
+#     function Dataset(X::T, y::S, tt, info) where {T<:AbstractDataFrame,S}
+#         if info.stratified
+#             Xtrain = view.(Ref(X), getfield.(tt, :train), Ref(:))
+#             Xvalid = view.(Ref(X), getfield.(tt, :valid), Ref(:))
+#             Xtest  = view.(Ref(X), getfield.(tt, :test), Ref(:))
+#             ytrain = view.(Ref(y), getfield.(tt, :train))
+#             yvalid = view.(Ref(y), getfield.(tt, :valid))
+#             ytest  = view.(Ref(y), getfield.(tt, :test))
+#         else
+#             Xtrain = @views X[tt.train, :]
+#             Xvalid = @views X[tt.valid, :]
+#             Xtest  = @views X[tt.test, :]
+#             ytrain = @views y[tt.train]
+#             yvalid = @views y[tt.valid]
+#             ytest  = @views y[tt.test]
+#         end
+
+#         new{T,S}(X, y, tt, info, Xtrain, Xvalid, Xtest, ytrain, yvalid, ytest)
+#     end
+# end
+
+# function Base.show(io::IO, ds::Dataset)
+#     println(io, "Dataset:")
+#     println(io, "  X shape:        ", size(ds.X))
+#     println(io, "  y length:       ", length(ds.y))
+#     if ds.tt isa AbstractVector
+#         println(io, "  Train/Valid/Test:     ", length(ds.tt), " folds")
+#     else
+#         println(io, "  Train indices:  ", length(ds.tt.train))
+#         println(io, "  Valid indices:  ", length(ds.tt.valid))
+#         println(io, "  Test indices:   ", length(ds.tt.test))
+#     end
+#     print(io, ds.info)
+# end
+
+"""
+    Feature{V<:Number, T<:Union{Symbol, String}} <: AbstractFeature
+
+A parametric struct that represents a feature extracted from time series data.
+
+# Type Parameters
+- `V`: Type of the feature value (must be a subtype of `Number`)
+- `T`: Type of the variable name (must be either `Symbol` or `String`)
+
+# Fields
+- `value::V`: The numerical value of the feature
+- `var::T`: The variable name/identifier
+- `feats::Symbol`: The feature extraction function name
+- `nwin::Int`: The window number (must be positive)
+
+# Constructors
+```julia
+Feature(value::Number, var::Union{Symbol,String}, feats::Symbol, nwin::Integer)
+"""
+struct Feature{V<:Number, T<:Union{Symbol, String}} <: AbstractFeature
+    value :: V
+    var   :: T
+    feats :: Symbol
+    nwin  :: Int
+
+    function Feature(value::Number, var::Union{Symbol,String}, feats::Symbol, nwin::Integer)
+        nwin > 0 || throw(ArgumentError("Window number must be positive"))
+        new{typeof(value), typeof(var)}(value, var, feats, nwin)
+    end
+end
+
+# Pretty printing
+Base.show(io::IO, f::Feature) = print(io, 
+    "Feature($(f.value), $(f.var), $(f.feats), window=$(f.nwin))")
+
+# Value access methods
+Base.getproperty(f::Feature, s::Symbol) = getfield(f, s)
+Base.propertynames(::Feature) = (:value, :var, :feats, :nwin)
+
+# Conversion methods for NaN handling
+Base.convert(::Type{Feature}, x::Missing) = Feature(NaN, :missing, :none, 1)
+Base.convert(::Type{Feature}, x::Nothing) = Feature(NaN, :nothing, :none, 1)
+
+# Test if value is NaN
+Base.isnan(f::Feature) = isnan(f.value)
+
+# Numeric comparisons
+Base.isless(f::Feature, x::Number) = isless(f.value, x)
+Base.isless(x::Number, f::Feature) = isless(x, f.value)
+Base.isless(f1::Feature, f2::Feature) = isless(f1.value, f2.value)
+
+# Convert to number for arithmetic operations
+Base.convert(::Type{Number}, f::Feature) = f.value
+Base.convert(::Type{Float64}, f::Feature) = convert(Float64, f.value)
+
+# Forward numeric operations to the value field
+for op in (:+, :-, :*, :/, :^)
+    @eval Base.$op(f::Feature, x::Number) = $op(f.value, x)
+    @eval Base.$op(x::Number, f::Feature) = $op(x, f.value)
+end
+
+# Get variable name
+variable_name(f::Feature) = f.var
+# Get feature type
+feature_type(f::Feature) = f.feats
+# Get window number
+window_number(f::Feature) = f.nwin
diff --git a/src/dataset/prepare_dataset.jl b/src/dataset/prepare_dataset.jl
new file mode 100644
index 0000000..55ebc5b
--- /dev/null
+++ b/src/dataset/prepare_dataset.jl
@@ -0,0 +1,427 @@
+# # ---------------------------------------------------------------------------- #
+# #                                   utils                                      #
+# # ---------------------------------------------------------------------------- #
+# check_dataframe_type(df::AbstractDataFrame) = all(col -> eltype(col) <: Union{Real,AbstractArray{<:Real}}, eachcol(df))
+# hasnans(X::AbstractDataFrame) = any(x -> x == 1, SoleData.hasnans.(eachcol(X)))
+
+# ---------------------------------------------------------------------------- #
+#                              check dimensions                                #
+# ---------------------------------------------------------------------------- #
+"""
+    _check_dimensions(X::DataFrame) -> Int
+
+Internal function.
+Check dimensionality of elements in DataFrame columns.
+Currently supports only scalar values and time series (1-dimensional arrays).
+
+# Returns
+- `Int`: 0 for scalar elements, 1 for 1D array elements
+
+# Throws
+- `DimensionMismatch`: If elements have inconsistent dimensions
+- `ArgumentError`: If elements have more than 1D
+"""
+function _check_dimensions(X::DataFrame)
+    isempty(X) && return 0
+    
+    # Get reference dimensions from first element
+    first_col = first(eachcol(X))
+    ref_dims = ndims(first(first_col))
+    
+    # Early dimension check
+    ref_dims > 1 && throw(ArgumentError("Elements more than 1D are not supported."))
+    
+    # Check all columns maintain same dimensionality
+    all(col -> all(x -> ndims(x) == ref_dims, col), eachcol(X)) ||
+        throw(DimensionMismatch("Inconsistent dimensions across elements"))
+    
+    return ref_dims
+end
+
+# ---------------------------------------------------------------------------- #
+#                                 treatment                                    #
+# ---------------------------------------------------------------------------- #
+"""
+    _treatment(X::DataFrame, vnames::AbstractVector{String}, treatment::Symbol, 
+               features::AbstractVector{<:Base.Callable}, winparams::NamedTuple)
+
+Internal function.
+Processes the input DataFrame `X` based on the specified `treatment` type, 
+either aggregating or reducing the size of the data. The function applies 
+the given `features` to the columns specified by `vnames`, using window 
+parameters defined in `winparams`.
+
+# Arguments
+- `X::DataFrame`: The input data to be processed.
+- `vnames::AbstractVector{String}`: Names of the columns in `X` to be treated.
+- `treatment::Symbol`: The type of treatment to apply, either `:aggregate` 
+  or `:reducesize`.
+- `features::AbstractVector{<:Base.Callable}`: Functions to apply to the 
+  specified columns.
+- `winparams::NamedTuple`: Parameters defining the windowing strategy, 
+  including the type of window function.
+
+# Returns
+- `DataFrame`: A new DataFrame with the processed data.
+
+# Throws
+- `ArgumentError`: If `winparams` does not contain a valid `type`.
+"""
+function _treatment(
+    X::DataFrame,
+    vnames::AbstractVector{String},
+    treatment::Symbol,
+    features::AbstractVector{<:Base.Callable},
+    winparams::NamedTuple
+)
+    # check parameters
+    haskey(winparams, :type) || throw(ArgumentError("winparams must contain a type, $(keys(WIN_PARAMS))"))
+    haskey(WIN_PARAMS, winparams.type) || throw(ArgumentError("winparams.type must be one of: $(keys(WIN_PARAMS))"))
+
+    max_interval = maximum(length.(eachrow(X)))
+    _wparams = NamedTuple(k => v for (k,v) in pairs(winparams) if k != :type)
+    n_intervals = winparams.type(max_interval; _wparams...)
+
+    # Initialize DataFrame
+    if treatment == :aggregate        # propositional
+        if n_intervals == 1
+            valid_X = DataFrame([v => Float64[]
+                                 for v in [string(f, "(", v, ")")
+                                       for f in features for v in vnames]]
+            )
+        else
+            valid_X = DataFrame([v => Float64[]
+                                 for v in [string(f, "(", v, ")w", i)
+                                       for f in features for v in vnames
+                                       for i in 1:length(n_intervals)]]
+            )
+        end
+
+    elseif treatment == :reducesize   # modal
+        # valid_X = DataFrame([name => Vector{Float64}[] for name in vnames])
+        valid_X = DataFrame([name => Vector{Float64}[] for name in vnames])
+
+    elseif treatment == :feature_selection
+        if n_intervals == 1
+            # valid_X = DataFrame([v => Float64[]
+            valid_X = DataFrame([v => Feature[]
+                for v in [string(f, "(", v, ")")
+                    for f in features for v in vnames]]
+            )
+        else
+            # valid_X = DataFrame([v => Float64[]
+            valid_X = DataFrame([v => Feature[]
+                for v in [string(f, "(", v, ")w", i)
+                    for f in features for v in vnames
+                    for i in 1:length(n_intervals)]]
+            )
+        end
+    end
+
+    # Fill DataFrame
+    for row in eachrow(X)
+        row_intervals = winparams.type(maximum(length.(collect(row))); _wparams...)
+        # interval_dif is used in case we encounter a row with less intervals than the maximum
+        interval_diff = length(n_intervals) - length(row_intervals)
+
+        if treatment == :aggregate
+            push!(valid_X, vcat([
+                vcat([f(col[r]) for r in row_intervals],
+                    # if interval_diff is positive, fill the rest with NaN
+                    fill(NaN, interval_diff)) for col in row, f in features
+                ]...)
+            )
+        elseif treatment == :reducesize
+            f = haskey(_wparams, :reducefunc) ? _wparams.reducefunc : mean
+            push!(valid_X, [
+                vcat([f(col[r]) for r in row_intervals],
+                    # if interval_diff is positive, fill the rest with NaN
+                    fill(NaN, interval_diff)) for col in row
+                ]
+            )
+        elseif treatment == :feature_selection
+            push!(valid_X, vcat([
+                vcat([
+                    Feature(f(col[r]), vnames[i], Symbol(f), w) for (w, r) in enumerate(row_intervals)],
+                    # if interval_diff is positive, fill the rest with NaN
+                    fill(NaN, interval_diff)) for (i, col) in enumerate(row), f in features
+                ]...)
+            )
+        end
+    end
+
+    return valid_X
+end
+
+# # ---------------------------------------------------------------------------- #
+# #                                 partitioning                                 #
+# # ---------------------------------------------------------------------------- #
+# """
+#     _partition(y::Union{CategoricalArray, Vector{T}}, train_ratio::Float64, 
+#                shuffle::Bool, stratified::Bool, nfolds::Int, rng::AbstractRNG) 
+#                where {T<:Union{AbstractString, Number}}
+
+# Partitions the input vector `y` into training and testing indices based on 
+# the specified parameters. Supports both stratified and non-stratified 
+# partitioning.
+
+# # Arguments
+# - `y::Union{CategoricalArray, Vector{T}}`: The target variable to partition.
+# - `train_ratio::Float64`: The ratio of data to be used for training in 
+#   non-stratified partitioning.
+# - `shuffle::Bool`: Whether to shuffle the data before partitioning.
+# - `stratified::Bool`: Whether to perform stratified partitioning.
+# - `nfolds::Int`: Number of folds for cross-validation in stratified 
+#   partitioning.
+# - `rng::AbstractRNG`: Random number generator for reproducibility.
+
+# # Returns
+# - `Vector{Tuple{Vector{Int}, Vector{Int}}}`: A vector of tuples containing 
+#   training and testing indices.
+
+# # Throws
+# - `ArgumentError`: If `nfolds` is less than 2 when `stratified` is true.
+# """
+
+# function _partition(
+#     y::Union{CategoricalArray,Vector{T}},
+#     # validation::Bool,
+#     train_ratio::Float64,
+#     valid_ratio::Float64,
+#     shuffle::Bool,
+#     stratified::Bool,
+#     nfolds::Int,
+#     rng::AbstractRNG
+# ) where {T<:Union{AbstractString,Number}}
+#     if stratified
+#         stratified_cv = MLJ.StratifiedCV(; nfolds, shuffle, rng)
+#         tt = MLJ.MLJBase.train_test_pairs(stratified_cv, 1:length(y), y)
+#         if valid_ratio == 1.0
+#             return [TT_indexes(train, eltype(train)[], test) for (train, test) in tt]
+#         else
+#             tv = collect((MLJ.partition(t[1], train_ratio)..., t[2]) for t in tt)
+#             return [TT_indexes(train, valid, test) for (train, valid, test) in tv]
+#         end
+#     else
+#         tt = MLJ.partition(eachindex(y), train_ratio; shuffle, rng)
+#         if valid_ratio == 1.0
+#             return TT_indexes(tt[1], eltype(tt[1])[], tt[2])
+#         else
+#             tv = MLJ.partition(tt[1], valid_ratio; shuffle, rng)
+#             return TT_indexes(tv[1], tv[2], tt[2])
+#         end
+#     end
+# end
+
+# # ---------------------------------------------------------------------------- #
+# #                               prepare dataset                                #
+# # ---------------------------------------------------------------------------- #
+# """
+#     prepare_dataset(X::AbstractDataFrame, y::AbstractVector; algo::Symbol=:classification, 
+#                     treatment::Symbol=:aggregate, features::AbstractVector{<:Base.Callable}=DEFAULT_FEATS, 
+#                     train_ratio::Float64=0.8, shuffle::Bool=true, stratified::Bool=false, 
+#                     nfolds::Int=6, rng::AbstractRNG=Random.TaskLocalRNG(), 
+#                     winparams::Union{NamedTuple,Nothing}=nothing, 
+#                     vnames::Union{AbstractVector{<:Union{AbstractString,Symbol}},Nothing}=nothing)
+
+# Prepares a dataset for machine learning by processing the input DataFrame `X` and target vector `y`. 
+# Supports both classification and regression tasks, with options for data treatment and partitioning.
+
+# # Arguments
+# - `X::AbstractDataFrame`: The input data containing features.
+# - `y::AbstractVector`: The target variable corresponding to the rows in `X`.
+# - `algo::Symbol`: The type of algorithm, either `:classification` or `:regression`.
+# - `treatment::Symbol`: The data treatment method, default is `:aggregate`.
+# - `features::AbstractVector{<:Base.Callable}`: Functions to apply to the data columns.
+# - `train_ratio::Float64`: Ratio of data to be used for training.
+# - `shuffle::Bool`: Whether to shuffle data before partitioning.
+# - `stratified::Bool`: Whether to use stratified partitioning.
+# - `nfolds::Int`: Number of folds for cross-validation.
+# - `rng::AbstractRNG`: Random number generator for reproducibility.
+# - `winparams::Union{NamedTuple,Nothing}`: Parameters for windowing strategy.
+# - `vnames::Union{AbstractVector{<:Union{AbstractString,Symbol}},Nothing}`: Names of the columns in `X`.
+
+# # Returns
+# - `SoleXplorer.Dataset`: A dataset object containing processed data and partitioning information.
+
+# # Throws
+# - `ArgumentError`: If input parameters are invalid or unsupported column types are encountered.
+# """
+
+# function prepare_dataset(
+#     X::AbstractDataFrame,
+#     y::AbstractVector;
+#     # model.config
+#     algo::Symbol=:classification,
+#     treatment::Symbol=:aggregate,
+#     features::AbstractVector{<:Base.Callable}=DEFAULT_FEATS,
+#     # validation::Bool=false,
+#     # model.preprocess
+#     train_ratio::Float64=0.8,
+#     valid_ratio::Float64=1.0,
+#     shuffle::Bool=true,
+#     stratified::Bool=false,
+#     nfolds::Int=6,
+#     rng::AbstractRNG=Random.TaskLocalRNG(),
+#     # model.winparams
+#     winparams::Union{NamedTuple,Nothing}=nothing,
+#     vnames::Union{AbstractVector{<:Union{AbstractString,Symbol}},Nothing}=nothing,
+# )
+#     # check parameters
+#     check_dataframe_type(X) || throw(ArgumentError("DataFrame must contain only numeric values"))
+#     size(X, 1) == length(y) || throw(ArgumentError("Number of rows in DataFrame must match length of class labels"))
+#     treatment in AVAIL_TREATMENTS || throw(ArgumentError("Treatment must be one of: $AVAIL_TREATMENTS"))
+
+#     if algo == :regression
+#         y isa AbstractVector{<:Number} || throw(ArgumentError("Regression requires a numeric target variable"))
+#         y isa AbstractFloat || (y = Float64.(y))
+#     elseif algo == :classification
+#         y isa AbstractVector{<:AbstractFloat} && throw(ArgumentError("Classification requires a categorical target variable"))
+#         y isa CategoricalArray || (y = coerce(y, MLJ.Multiclass))
+#     else
+#         throw(ArgumentError("Algorithms supported, :regression and :classification"))
+#     end
+
+#     if isnothing(vnames)
+#         vnames = names(X)
+#     else
+#         size(X, 2) == length(vnames) || throw(ArgumentError("Number of columns in DataFrame must match length of variable names"))
+#         vnames = eltype(vnames) <: Symbol ? string.(vnames) : vnames
+#     end
+
+#     hasnans(X) && @warn "DataFrame contains NaN values"
+
+#     column_eltypes = eltype.(eachcol(X))
+
+#     ds_info = DatasetInfo(
+#         algo,
+#         treatment,
+#         features,
+#         train_ratio,
+#         valid_ratio,
+#         shuffle,
+#         stratified,
+#         nfolds,
+#         rng,
+#         winparams,
+#         vnames,
+#         # validation
+#     )
+
+#     # case 1: dataframe with numeric columns
+#     if all(t -> t <: Number, column_eltypes)
+#         return SoleXplorer.Dataset(
+#             DataFrame(vnames .=> eachcol(X)), y,
+#             # _partition(y, validation, train_ratio, valid_ratio, shuffle, stratified, nfolds, rng),
+#             _partition(y, train_ratio, valid_ratio, shuffle, stratified, nfolds, rng),
+#             ds_info
+#         )
+#     # case 2: dataframe with vector-valued columns
+#     elseif all(t -> t <: AbstractVector{<:Number}, column_eltypes)
+#         return SoleXplorer.Dataset(
+#             # if winparams is nothing, then leave the dataframe as it is
+#             isnothing(winparams) ? DataFrame(vnames .=> eachcol(X)) : _treatment(X, vnames, treatment, features, winparams), y,
+#             # _partition(y, validation, train_ratio, valid_ratio, shuffle, stratified, nfolds, rng),
+#             _partition(y, train_ratio, valid_ratio, shuffle, stratified, nfolds, rng),
+#             ds_info
+#         )
+#     else
+#         throw(ArgumentError("Column type not yet supported"))
+#     end
+# end
+
+# function prepare_dataset(
+#     X::AbstractDataFrame,
+#     y::AbstractVector,
+#     model::AbstractModelSet
+# )
+#     # check if it's needed also validation set
+#     # validation = haskey(VALIDATION, model.type) && getproperty(model.params, VALIDATION[model.type][1]) != VALIDATION[model.type][2]
+#     # valid_ratio = (validation && model.preprocess.valid_ratio == 1) ? 0.8 : model.preprocess.valid_ratio
+
+#     prepare_dataset(
+#         X, y;
+#         algo=model.config.algo,
+#         treatment=model.config.treatment,
+#         features=model.features,
+#         # validation,
+#         # model.preprocess
+#         train_ratio=model.preprocess.train_ratio,
+#         valid_ratio=model.preprocess.valid_ratio,
+#         shuffle=model.preprocess.shuffle,
+#         stratified=model.preprocess.stratified,
+#         nfolds=model.preprocess.nfolds,
+#         rng=model.preprocess.rng,
+#         winparams=model.winparams,
+#     )
+# end
+
+# # y is not a vector, but a symbol or a string that identifies the column in X
+# function prepare_dataset(
+#     X::AbstractDataFrame,
+#     y::Union{Symbol,AbstractString},
+#     args...; kwargs...
+# )
+#     prepare_dataset(X[!, Not(y)], X[!, y], args...; kwargs...)
+# end
+
+# ---------------------------------------------------------------------------- #
+#                        feature selection preprocess                          #
+# ---------------------------------------------------------------------------- #
+"""
+    feature_selection_preprocess(
+        X::DataFrame;
+        vnames::Union{Vector{String}, Vector{Symbol}, Nothing}=nothing,
+        features::Union{Vector{<:Base.Callable}, Nothing}=nothing,
+        nwindows::Union{Int, Nothing}=nothing
+    ) -> DataFrame
+
+Process a DataFrame for feature selection by converting its columns into Feature objects.
+
+# Arguments
+- `X::DataFrame`: Input DataFrame containing time series data
+- `vnames::Union{Vector{String}, Vector{Symbol}, Nothing}=nothing`: Names for the variables. 
+   If nothing, uses DataFrame column names
+- `features::Union{Vector{<:Base.Callable}, Nothing}=nothing`: Feature extraction functions. 
+   If nothing, uses DEFAULT_FE.features
+- `nwindows::Union{Int, Nothing}=nothing`: Number of windows for time series segmentation. 
+   If nothing, uses DEFAULT_FE_WINPARAMS
+
+# Returns
+- `DataFrame`: A DataFrame where each element is a Feature object containing:
+  - value: extracted feature value
+  - var: variable name
+  - feats: feature extraction function used
+  - nwin: window number
+
+# Example
+```julia
+# Basic usage with default parameters
+df = DataFrame(a = [rand(10) for _ in 1:5])
+result = feature_selection_preprocess(df)
+
+# Custom features and windows
+df = DataFrame(a = [rand(10) for _ in 1:5])
+result = feature_selection_preprocess(df,
+    features = [mean, std],
+    nwindows = 3
+)
+"""
+function feature_selection_preprocess(
+    X::DataFrame;
+    vnames::Union{Vector{String}, Vector{Symbol}, Nothing}=nothing,
+    features::Union{Vector{<:Base.Callable}, Nothing}=nothing,
+    nwindows::Union{Int, Nothing}=nothing
+)
+    # check parameters
+    isnothing(vnames) && (vnames = names(X))
+    isnothing(features) && (features = DEFAULT_FE.features)
+    treatment = :feature_selection
+    _ = _check_dimensions(X)
+    if !isnothing(nwindows)
+        nwindows > 0 || throw(ArgumentError("Number of windows must be positive."))
+    end
+    winparams = isnothing(nwindows) ? DEFAULT_FE_WINPARAMS : merge(DEFAULT_FE_WINPARAMS, (nwindows = nwindows,))
+
+    _treatment(X, vnames, treatment, features, winparams)
+end
diff --git a/src/utils/features_set.jl b/src/utils/features_set.jl
new file mode 100644
index 0000000..13aa897
--- /dev/null
+++ b/src/utils/features_set.jl
@@ -0,0 +1,39 @@
+# ---------------------------------------------------------------------------- #
+#                        catch22 pretty named functions                        #
+# ---------------------------------------------------------------------------- #
+
+mode_5(x) = Catch22.DN_HistogramMode_5((x));                              @doc (@doc Catch22.DN_HistogramMode_5) mode_5
+mode_10(x) = Catch22.DN_HistogramMode_10((x));                            @doc (@doc Catch22.DN_HistogramMode_10) mode_10
+embedding_dist(x) = Catch22.CO_Embed2_Dist_tau_d_expfit_meandiff((x));    @doc (@doc Catch22.CO_Embed2_Dist_tau_d_expfit_meandiff) embedding_dist
+acf_timescale(x) = Catch22.CO_f1ecac((x));                                @doc (@doc Catch22.CO_f1ecac) acf_timescale
+acf_first_min(x) = Catch22.CO_FirstMin_ac((x));                           @doc (@doc Catch22.CO_FirstMin_ac) acf_first_min
+ami2(x) = Catch22.CO_HistogramAMI_even_2_5((x));                          @doc (@doc Catch22.CO_HistogramAMI_even_2_5) ami2
+trev(x) = Catch22.CO_trev_1_num((x));                                     @doc (@doc Catch22.CO_trev_1_num) trev
+outlier_timing_pos(x) = Catch22.DN_OutlierInclude_p_001_mdrmd((x));       @doc (@doc Catch22.DN_OutlierInclude_p_001_mdrmd) outlier_timing_pos
+outlier_timing_neg(x) = Catch22.DN_OutlierInclude_n_001_mdrmd((x));       @doc (@doc Catch22.DN_OutlierInclude_n_001_mdrmd) outlier_timing_neg
+whiten_timescale(x) = Catch22.FC_LocalSimple_mean1_tauresrat((x));        @doc (@doc Catch22.FC_LocalSimple_mean1_tauresrat) whiten_timescale
+forecast_error(x) = Catch22.FC_LocalSimple_mean3_stderr((x));             @doc (@doc Catch22.FC_LocalSimple_mean3_stderr) forecast_error
+ami_timescale(x) = Catch22.IN_AutoMutualInfoStats_40_gaussian_fmmi((x));  @doc (@doc Catch22.IN_AutoMutualInfoStats_40_gaussian_fmmi) ami_timescale
+high_fluctuation(x) = Catch22.MD_hrv_classic_pnn40((x));                  @doc (@doc Catch22.MD_hrv_classic_pnn40) high_fluctuation
+stretch_decreasing(x) = Catch22.SB_BinaryStats_diff_longstretch0((x));    @doc (@doc Catch22.SB_BinaryStats_diff_longstretch0) stretch_decreasing
+stretch_high(x) = Catch22.SB_BinaryStats_mean_longstretch1((x));          @doc (@doc Catch22.SB_BinaryStats_mean_longstretch1) stretch_high
+entropy_pairs(x) = Catch22.SB_MotifThree_quantile_hh((x));                @doc (@doc Catch22.SB_MotifThree_quantile_hh) entropy_pairs
+rs_range(x) = Catch22.SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1((x));   @doc (@doc Catch22.SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1) rs_range
+dfa(x) = Catch22.SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1((x));             @doc (@doc Catch22.SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1) dfa
+low_freq_power(x) = Catch22.SP_Summaries_welch_rect_area_5_1((x));        @doc (@doc Catch22.SP_Summaries_welch_rect_area_5_1) low_freq_power
+centroid_freq(x) = Catch22.SP_Summaries_welch_rect_centroid((x));         @doc (@doc Catch22.SP_Summaries_welch_rect_centroid) centroid_freq
+transition_variance(x) = Catch22.SB_TransitionMatrix_3ac_sumdiagcov((x)); @doc (@doc Catch22.SB_TransitionMatrix_3ac_sumdiagcov) transition_variance
+periodicity(x) = Catch22.PD_PeriodicityWang_th0_01((x));                  @doc (@doc Catch22.PD_PeriodicityWang_th0_01) periodicity
+
+# ---------------------------------------------------------------------------- #
+#                                     catch9                                   #
+# ---------------------------------------------------------------------------- #
+base_set = [maximum, minimum, mean, std]
+catch9 = [maximum, minimum, mean, median, std, stretch_high, stretch_decreasing, entropy_pairs, transition_variance]
+catch22_set = [mode_5, mode_10, embedding_dist, acf_timescale, acf_first_min, ami2, trev, outlier_timing_pos,
+    outlier_timing_neg, whiten_timescale, forecast_error, ami_timescale, high_fluctuation, stretch_decreasing,
+    stretch_high, entropy_pairs, rs_range, dfa, low_freq_power, centroid_freq, transition_variance, periodicity]
+complete_set = [maximum, minimum, mean, median, std, StatsBase.cov,
+    mode_5, mode_10, embedding_dist, acf_timescale, acf_first_min, ami2, trev, outlier_timing_pos,
+    outlier_timing_neg, whiten_timescale, forecast_error, ami_timescale, high_fluctuation, stretch_decreasing,
+    stretch_high, entropy_pairs, rs_range, dfa, low_freq_power, centroid_freq, transition_variance, periodicity]
\ No newline at end of file
diff --git a/test/modules/prepare_dataset.jl b/test/modules/prepare_dataset.jl
new file mode 100644
index 0000000..b0d6ff4
--- /dev/null
+++ b/test/modules/prepare_dataset.jl
@@ -0,0 +1,93 @@
+using SoleFeatures
+using Test
+using Sole
+using Random, StatsBase, DataFrames
+using MLJTuning
+
+# ---------------------------------------------------------------------------- #
+#                             DATASET PREPARATION                              #
+# ---------------------------------------------------------------------------- #
+X, y       = SoleData.load_arff_dataset("NATOPS")
+train_seed = 11
+rng        = Random.Xoshiro(train_seed)
+Random.seed!(train_seed)
+
+# downsize dataset
+num_cols_to_sample = 10
+num_rows_to_sample = 50
+chosen_cols = StatsBase.sample(rng, 1:size(X, 2), num_cols_to_sample; replace=false)
+chosen_rows = StatsBase.sample(rng, 1:size(X, 1), num_rows_to_sample; replace=false)
+
+X = X[chosen_rows, chosen_cols]
+y = y[chosen_rows]
+
+@testset "feature_selection_preprocess" begin    
+    @testset "Basic functionality" begin        
+        # Test default parameters
+        result = feature_selection_preprocess(X)
+        @test result isa DataFrame
+        @test all(col -> eltype(col) <: SoleFeatures.Feature, eachcol(result))
+        @test size(result, 1) == size(X, 1)
+        
+        # Test first Feature object properties
+        first_feature = first(result[!, 1])
+        @test first_feature isa SoleFeatures.Feature
+        @test first_feature.var isa String
+        @test first_feature.feats isa Symbol
+        @test first_feature.nwin isa Int
+        @test first_feature.nwin > 0
+    end
+    
+    @testset "Custom parameters" begin
+        X2 = DataFrame(
+            temp = [rand(10) for _ in 1:5],
+            press = [rand(10) for _ in 1:5]
+        )
+        
+        # Custom features and window
+        custom_features = [mean, std]
+        result = feature_selection_preprocess(X2,
+            features = custom_features,
+            nwindows = 3,
+            vnames = ["temperature", "pressure"]
+        )
+        
+        # Check dimensions
+        expected_cols = length(custom_features) * size(X2, 2) * 3  # features * variables * windows
+        @test size(result, 2) == expected_cols
+        
+        # Check feature names
+        for (f, v, w) in Iterators.product(custom_features, ["temperature", "pressure"], 1:3)
+            col_name = "$(f)($(v))w$(w)"
+            @test col_name in names(result)
+        end
+    end
+    
+    @testset "Error handling" begin
+        # Test with empty DataFrame
+        @test_throws ArgumentError feature_selection_preprocess(DataFrame())
+        
+        # Test with mixed dimensions
+        X_invalid = DataFrame(
+            a = [1.0, 2.0],
+            b = [[1.0, 2.0], [3.0, 4.0]]
+        )
+        @test_throws DimensionMismatch feature_selection_preprocess(X_invalid)
+        
+        # Test with invalid windows
+        X = DataFrame(a = [rand(10) for _ in 1:5])
+        @test_throws ArgumentError feature_selection_preprocess(X, nwindows = 0)
+        @test_throws ArgumentError feature_selection_preprocess(X, nwindows = -1)
+    end
+    
+    @testset "Performance" begin
+        # Create larger dataset
+        X = DataFrame(
+            [Symbol("var$i") => [rand(100) for _ in 1:100] for i in 1:5]
+        )
+        
+        # Measure execution time
+        time_taken = @elapsed feature_selection_preprocess(X)
+        @test time_taken < 5.0  # Should complete within 5 seconds
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index e5ec4ce..24cfe6b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,223 +1,256 @@
-using HypothesisTests
-using StatsBase
-using Test
-using Revise
-using MultiData
 using SoleFeatures
+using Test
+using Random
 
-include("./test_function.jl")
-
-@testset "SoleFeatures.jl" begin
-
-    @testset "transform" begin
-
-        @testset "transform!(md, bm; i_modality) using bitmask on a modality of MultiDataset" begin
-            df = random_timeseries_df(; nvar=10)
-            md = MultiData.MultiDataset(df, [[3,7,8], [1,2,4], [5,6,9,10]])
-            bm_mod = BitVector([0,1,0])
-            idx_mod = 1
-            # expected values
-            emfd = deepcopy(md)
-            MultiData.dropvariables!(emfd, [3,8])
-
-            SoleFeatures.transform!(md, bm_mod; i_modality=idx_mod)
-
-            @test isequal(md, emfd)
-        end
-
-        @testset "transform!(md, bm) using bitmask on whole MultiDataset" begin
-            df = random_timeseries_df(; nvar=5)
-            md = MultiData.MultiDataset(df, [[4,2,1], [5,3]])
-            bm_mod = BitVector([0,1,0,1,1])
-            # expected values
-            emfd = deepcopy(md)
-            MultiData.dropvariables!(emfd, [1,3])
-
-            SoleFeatures.transform!(md, bm_mod)
-
-            @test isequal(md, emfd)
-        end
-
-        @testset "transform!(df, bm) using bitmask on DataFrame" begin
-            df = random_timeseries_df(; nvar=5)
-            bm = BitVector([0,1,0,1,1])
-            # expected values
-            edf = deepcopy(df)
-            select!(edf, [2,4,5])
-
-            SoleFeatures.transform!(df, bm)
-
-            @test isequal(df, edf)
-        end
-
-        @testset "transform(md, bm; i_modality) using bitmask on a modality of MultiDataset" begin
-            df = random_timeseries_df(; nvar=10)
-            md = MultiData.MultiDataset(df, [[3,7,8], [1,2,4], [5,6,9,10]])
-            bm_mod = BitVector([0,1,0])
-            idx_mod = 1
-            # expected values
-            emfd = deepcopy(md)
-            MultiData.dropvariables!(emfd, [3,8])
-
-            md = SoleFeatures.transform(md, bm_mod; i_modality=idx_mod)
-
-            @test isequal(md, emfd)
-        end
-
-        @testset "transform(md, bm) using bitmask on whole MultiDataset" begin
-            df = random_timeseries_df(; nvar=5)
-            md = MultiData.MultiDataset(df, [[4,2,1], [5,3]])
-            bm_mod = BitVector([0,1,0,1,1])
-            # expected values
-            emfd = deepcopy(md)
-            MultiData.dropvariables!(emfd, [1,3])
-
-            md = SoleFeatures.transform(md, bm_mod)
-
-            @test isequal(md, emfd)
-        end
-
-        @testset "transform(df, bm) using bitmask on DataFrame" begin
-            df = random_timeseries_df(; nvar=5)
-            bm = BitVector([0,1,0,1,1])
-            # expected values
-            edf = deepcopy(df)
-            select!(edf, [2,4,5])
-
-            df = SoleFeatures.transform(df, bm)
-
-            @test isequal(df, edf)
-        end
-
+function run_tests(list)
+    println("\n" * ("#"^50))
+    for test in list
+        println("TEST: $test")
+        include(test)
     end
+end
 
-    @testset "utils" begin
-
-        @testset "_mod_bm2mfd_bm using array of frames and array of bitmasks" begin
-            df = random_timeseries_df(; nvar=10)
-            md = MultiData.MultiDataset(df, [[3,7,8], [1,2,4], [5,6,9,10]])
-            frms = [1,2,3]
-            bms = Vector{BitVector}([ [0,1,0],[0,0,1],[0,1,1,0] ])
-            # expected values
-            ebm = BitVector([ 0,0,0,1,0,1,1,0,1,0 ])
-
-            resbm = SoleFeatures._mod_bm2mfd_bm(md, frms, bms)
-
-            @test isequal(resbm, ebm)
-        end
+println("Julia version: ", VERSION)
 
-        @testset "_mod_bm2mfd_bm using modality and bitmask" begin
-            df = random_timeseries_df(; nvar=10)
-            md = MultiData.MultiDataset(df, [[3,7,8], [1,2,4], [5,6,9,10]])
-            frm = 2
-            bm = BitVector([0,0,1])
-            # expected values
-            ebm = BitVector([ 0,0,1,1,1,1,1,1,1,1 ])
+test_suites = [
+    ("Prepare Dataset", ["modules/prepare_dataset.jl", ]),
 
-            resbm = SoleFeatures._mod_bm2mfd_bm(md, frm, bm)
+]
 
-            @test isequal(resbm, ebm)
+@testset "SoleFeatures.jl" begin
+    for ts in eachindex(test_suites)
+        name = test_suites[ts][1]
+        list = test_suites[ts][2]
+        let
+            @testset "$name" begin
+                run_tests(list)
+            end
         end
-
     end
+    println()
+end
 
-    @testset "selectors" begin
 
-        @testset "transform" begin
+# using HypothesisTests
+# using StatsBase
+# using Test
+# using Revise
+# using MultiData
+# using SoleFeatures
 
-            @testset "RandomRanking" begin
-                seed = 1997
-                rr = RandomRanking(3, seed)
-                df = random_timeseries_df(;nvar=10)
-                # expected values
-                edf = deepcopy(df)
-                select!(edf, [6,2,5])
+# include("./test_function.jl")
 
-                SoleFeatures.transform!(df, rr)
+# @testset "SoleFeatures.jl" begin
 
-                @test isequal(df, edf)
-            end
+#     @testset "transform" begin
 
-            @testset "VarianceThreshold" begin
-                df = random_df()
-                ndf = SoleFeatures.minmax_normalize(df; min_quantile=0.0, max_quantile=1.0)
-                vt = VarianceThreshold(0.09)
-                @test (SoleFeatures.transform!(df, vt) isa DataFrame)
-            end
+#         @testset "transform!(md, bm; i_modality) using bitmask on a modality of MultiDataset" begin
+#             df = random_timeseries_df(; nvar=10)
+#             md = MultiData.MultiDataset(df, [[3,7,8], [1,2,4], [5,6,9,10]])
+#             bm_mod = BitVector([0,1,0])
+#             idx_mod = 1
+#             # expected values
+#             emfd = deepcopy(md)
+#             MultiData.dropvariables!(emfd, [3,8])
 
-            @testset "VarianceRanking" begin
-                df = random_df()
-                ndf = SoleFeatures.minmax_normalize(df; min_quantile=0.0, max_quantile=1.0)
-                vr = VarianceRanking(3)
-                @test (SoleFeatures.transform!(df, vr) isa DataFrame)
-            end
+#             SoleFeatures.transform!(md, bm_mod; i_modality=idx_mod)
 
-            @testset "StatisticalMajority" begin
-                df = random_df()
-                y = rand([:a, :b, :c], 100)
-                sm = StatisticalMajority(UnequalVarianceTTest)
-                @test (SoleFeatures.transform!(df, y, sm) isa DataFrame)
-            end
+#             @test isequal(md, emfd)
+#         end
 
-            @testset "StatisticalAtLeastOnce" begin
-                df = random_df()
-                y = rand([:a, :b, :c], 100)
-                sa = StatisticalAtLeastOnce(UnequalVarianceZTest)
-                @test (SoleFeatures.transform!(df, y, sa) isa DataFrame)
-            end
+#         @testset "transform!(md, bm) using bitmask on whole MultiDataset" begin
+#             df = random_timeseries_df(; nvar=5)
+#             md = MultiData.MultiDataset(df, [[4,2,1], [5,3]])
+#             bm_mod = BitVector([0,1,0,1,1])
+#             # expected values
+#             emfd = deepcopy(md)
+#             MultiData.dropvariables!(emfd, [1,3])
 
-            @testset "CompoundStatisticalMajority" begin
-                df = random_df()
-                y = rand([:a, :b, :c], 100)
-                cm = CompoundStatisticalMajority(UnequalVarianceTTest, MannWhitneyUTest)
-                @test (SoleFeatures.transform!(df, y, cm) isa DataFrame)
-            end
+#             SoleFeatures.transform!(md, bm_mod)
 
-            @testset "CompoundStatisticalAtLeastOnce" begin
-                df = random_df()
-                y = rand([:a, :b, :c], 100)
-                ca = CompoundStatisticalAtLeastOnce(UnequalVarianceZTest, MannWhitneyUTest)
-                @test (SoleFeatures.transform!(df, y, ca) isa DataFrame)
-            end
+#             @test isequal(md, emfd)
+#         end
 
-            @testset "CorrelationFilter" begin
-                df = random_df()
-                cf = CorrelationFilter(cor, 0)
-                @test (SoleFeatures.transform!(df, cf) isa DataFrame)
-            end
+#         @testset "transform!(df, bm) using bitmask on DataFrame" begin
+#             df = random_timeseries_df(; nvar=5)
+#             bm = BitVector([0,1,0,1,1])
+#             # expected values
+#             edf = deepcopy(df)
+#             select!(edf, [2,4,5])
 
-            @testset "Chi2Filter" begin
-                df = random_df()
-                y = rand([:a, :b, :c], 100)
-                c2r = Chi2Ranking(3)
-                @test (SoleFeatures.transform!(df, y, c2r) isa DataFrame)
-            end
+#             SoleFeatures.transform!(df, bm)
 
-            @testset "PearsonCorRanking" begin
-                df = random_df()
-                y = rand(100)
-                c2r = PearsonCorRanking(3)
-                @test (SoleFeatures.transform!(df, y, c2r) isa DataFrame)
-            end
+#             @test isequal(df, edf)
+#         end
 
-            @testset "MutualInformationClassif" begin
-                df = random_df()
-                y = rand([:a, :b, :c], 100)
-                mir = MutualInformationClassifRanking(3)
-                @test (SoleFeatures.transform!(df, y, mir) isa DataFrame)
-            end
-
-            # TODO: make this work: see the FIXME in the file test/runtests.jl
-            # @testset "VarianceRanking on MultiDataset" begin
-            #     df = random_df();
-            #     df = SoleFeatures.minmax_normalize(df; min_quantile=0.0, max_quantile=1.0)
-            #     md = MultiData.MultiDataset([ [1,2,3,4], [5] ], df)
-            #     vr = VarianceRanking(3)
-            #     @test (SoleFeatures.transform!(md, vr; i_modality=1) isa MultiDataset)
-            # end
-
-        end
+#         @testset "transform(md, bm; i_modality) using bitmask on a modality of MultiDataset" begin
+#             df = random_timeseries_df(; nvar=10)
+#             md = MultiData.MultiDataset(df, [[3,7,8], [1,2,4], [5,6,9,10]])
+#             bm_mod = BitVector([0,1,0])
+#             idx_mod = 1
+#             # expected values
+#             emfd = deepcopy(md)
+#             MultiData.dropvariables!(emfd, [3,8])
 
-    end
+#             md = SoleFeatures.transform(md, bm_mod; i_modality=idx_mod)
 
-end
+#             @test isequal(md, emfd)
+#         end
+
+#         @testset "transform(md, bm) using bitmask on whole MultiDataset" begin
+#             df = random_timeseries_df(; nvar=5)
+#             md = MultiData.MultiDataset(df, [[4,2,1], [5,3]])
+#             bm_mod = BitVector([0,1,0,1,1])
+#             # expected values
+#             emfd = deepcopy(md)
+#             MultiData.dropvariables!(emfd, [1,3])
+
+#             md = SoleFeatures.transform(md, bm_mod)
+
+#             @test isequal(md, emfd)
+#         end
+
+#         @testset "transform(df, bm) using bitmask on DataFrame" begin
+#             df = random_timeseries_df(; nvar=5)
+#             bm = BitVector([0,1,0,1,1])
+#             # expected values
+#             edf = deepcopy(df)
+#             select!(edf, [2,4,5])
+
+#             df = SoleFeatures.transform(df, bm)
+
+#             @test isequal(df, edf)
+#         end
+
+#     end
+
+#     @testset "utils" begin
+
+#         @testset "_mod_bm2mfd_bm using array of frames and array of bitmasks" begin
+#             df = random_timeseries_df(; nvar=10)
+#             md = MultiData.MultiDataset(df, [[3,7,8], [1,2,4], [5,6,9,10]])
+#             frms = [1,2,3]
+#             bms = Vector{BitVector}([ [0,1,0],[0,0,1],[0,1,1,0] ])
+#             # expected values
+#             ebm = BitVector([ 0,0,0,1,0,1,1,0,1,0 ])
+
+#             resbm = SoleFeatures._mod_bm2mfd_bm(md, frms, bms)
+
+#             @test isequal(resbm, ebm)
+#         end
+
+#         @testset "_mod_bm2mfd_bm using modality and bitmask" begin
+#             df = random_timeseries_df(; nvar=10)
+#             md = MultiData.MultiDataset(df, [[3,7,8], [1,2,4], [5,6,9,10]])
+#             frm = 2
+#             bm = BitVector([0,0,1])
+#             # expected values
+#             ebm = BitVector([ 0,0,1,1,1,1,1,1,1,1 ])
+
+#             resbm = SoleFeatures._mod_bm2mfd_bm(md, frm, bm)
+
+#             @test isequal(resbm, ebm)
+#         end
+
+#     end
+
+#     @testset "selectors" begin
+
+#         @testset "transform" begin
+
+#             @testset "RandomRanking" begin
+#                 seed = 1997
+#                 rr = RandomRanking(3, seed)
+#                 df = random_timeseries_df(;nvar=10)
+#                 # expected values
+#                 edf = deepcopy(df)
+#                 select!(edf, [6,2,5])
+
+#                 SoleFeatures.transform!(df, rr)
+
+#                 @test isequal(df, edf)
+#             end
+
+#             @testset "VarianceThreshold" begin
+#                 df = random_df()
+#                 ndf = SoleFeatures.minmax_normalize(df; min_quantile=0.0, max_quantile=1.0)
+#                 vt = VarianceThreshold(0.09)
+#                 @test (SoleFeatures.transform!(df, vt) isa DataFrame)
+#             end
+
+#             @testset "VarianceRanking" begin
+#                 df = random_df()
+#                 ndf = SoleFeatures.minmax_normalize(df; min_quantile=0.0, max_quantile=1.0)
+#                 vr = VarianceRanking(3)
+#                 @test (SoleFeatures.transform!(df, vr) isa DataFrame)
+#             end
+
+#             @testset "StatisticalMajority" begin
+#                 df = random_df()
+#                 y = rand([:a, :b, :c], 100)
+#                 sm = StatisticalMajority(UnequalVarianceTTest)
+#                 @test (SoleFeatures.transform!(df, y, sm) isa DataFrame)
+#             end
+
+#             @testset "StatisticalAtLeastOnce" begin
+#                 df = random_df()
+#                 y = rand([:a, :b, :c], 100)
+#                 sa = StatisticalAtLeastOnce(UnequalVarianceZTest)
+#                 @test (SoleFeatures.transform!(df, y, sa) isa DataFrame)
+#             end
+
+#             @testset "CompoundStatisticalMajority" begin
+#                 df = random_df()
+#                 y = rand([:a, :b, :c], 100)
+#                 cm = CompoundStatisticalMajority(UnequalVarianceTTest, MannWhitneyUTest)
+#                 @test (SoleFeatures.transform!(df, y, cm) isa DataFrame)
+#             end
+
+#             @testset "CompoundStatisticalAtLeastOnce" begin
+#                 df = random_df()
+#                 y = rand([:a, :b, :c], 100)
+#                 ca = CompoundStatisticalAtLeastOnce(UnequalVarianceZTest, MannWhitneyUTest)
+#                 @test (SoleFeatures.transform!(df, y, ca) isa DataFrame)
+#             end
+
+#             @testset "CorrelationFilter" begin
+#                 df = random_df()
+#                 cf = CorrelationFilter(cor, 0)
+#                 @test (SoleFeatures.transform!(df, cf) isa DataFrame)
+#             end
+
+#             @testset "Chi2Filter" begin
+#                 df = random_df()
+#                 y = rand([:a, :b, :c], 100)
+#                 c2r = Chi2Ranking(3)
+#                 @test (SoleFeatures.transform!(df, y, c2r) isa DataFrame)
+#             end
+
+#             @testset "PearsonCorRanking" begin
+#                 df = random_df()
+#                 y = rand(100)
+#                 c2r = PearsonCorRanking(3)
+#                 @test (SoleFeatures.transform!(df, y, c2r) isa DataFrame)
+#             end
+
+#             @testset "MutualInformationClassif" begin
+#                 df = random_df()
+#                 y = rand([:a, :b, :c], 100)
+#                 mir = MutualInformationClassifRanking(3)
+#                 @test (SoleFeatures.transform!(df, y, mir) isa DataFrame)
+#             end
+
+#             # TODO: make this work: see the FIXME in the file test/runtests.jl
+#             # @testset "VarianceRanking on MultiDataset" begin
+#             #     df = random_df();
+#             #     df = SoleFeatures.minmax_normalize(df; min_quantile=0.0, max_quantile=1.0)
+#             #     md = MultiData.MultiDataset([ [1,2,3,4], [5] ], df)
+#             #     vr = VarianceRanking(3)
+#             #     @test (SoleFeatures.transform!(md, vr; i_modality=1) isa MultiDataset)
+#             # end
+
+#         end
+
+#     end
+
+# end