diff --git a/CHANGELOG.md b/CHANGELOG.md index 98341e4d..61c4f6dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# v0.16.4 + +Add [MLJ.jl](https://github.com/alan-turing-institute/MLJ.jl) support. + # v0.16.3 New function: `midlife`. diff --git a/Project.toml b/Project.toml index ae5c0bf7..27f18acf 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Ripserer" uuid = "aa79e827-bd0b-42a8-9f10-2b302677a641" authors = ["mtsch "] -version = "0.16.3" +version = "0.16.4" [deps] Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" @@ -11,6 +11,7 @@ Future = "9fa8497b-333b-5362-9e8d-4d0656e87820" IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" MiniQhull = "978d7f02-9e05-4691-894f-ae31a51d76ca" PersistenceDiagrams = "90b4794c-894b-4756-a0f8-5efeb5ddf7ae" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" @@ -25,8 +26,9 @@ DataStructures = "0.17, 0.18" Distances = "0.8, 0.9, 0.10" IterTools = "1" LightGraphs = "1.3.3" +MLJModelInterface = "^0.3.5" MiniQhull = "0.2" -PersistenceDiagrams = "^0.8.2" +PersistenceDiagrams = "0.9" ProgressMeter = "1" RecipesBase = "1" StaticArrays = "0.12, 1" @@ -36,10 +38,11 @@ julia = "1" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Aqua", "Documenter", "Random", "SafeTestsets", "Suppressor", "Test"] +test = ["Aqua", "Documenter", "MLJBase", "Random", "SafeTestsets", "Suppressor", "Test"] diff --git a/docs/Project.toml b/docs/Project.toml index 2b95603f..53f34982 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,9 +3,12 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6" GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" +ImageIO = "82e4d734-157c-48bb-816b-45c225c6df19" ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1" Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" +MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661" MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" PersistenceDiagrams = "90b4794c-894b-4756-a0f8-5efeb5ddf7ae" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" diff --git a/docs/src/api.md b/docs/src/api.md index 13274269..1592843e 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -114,6 +114,20 @@ Ripserer.Chain Mod ``` +## MLJ.jl Interface + +```@docs +Ripserer.RipsPersistentHomology +``` + +```@docs +Ripserer.AlphaPersistentHomology +``` + +```@docs +Ripserer.CubicalPersistentHomology +``` + ## Experimental Features ```@docs diff --git a/docs/src/examples/cubical.jl b/docs/src/examples/cubical.jl index f4b2881a..b984d054 100644 --- a/docs/src/examples/cubical.jl +++ b/docs/src/examples/cubical.jl @@ -24,9 +24,9 @@ curve_plot = plot(curve; legend=false, title="Curve") # will use a small, 240×240 pixel version of the image. Ripserer should have no problems # with processing larger images, but this will work well enough for this tutorial. -blackhole_image = load(joinpath( - @__DIR__, "../assets/data/240px-Black_hole_-_Messier_87_crop_max_res.jpg" -)) +blackhole_image = load( + joinpath(@__DIR__, "../assets/data/240px-Black_hole_-_Messier_87_crop_max_res.jpg") +) blackhole_plot = plot(blackhole_image; title="Black Hole") # To use the image with Ripserer, we have to convert it to grayscale. diff --git a/docs/src/examples/malaria.jl b/docs/src/examples/malaria.jl index 12c03f03..12ef3385 100644 --- a/docs/src/examples/malaria.jl +++ b/docs/src/examples/malaria.jl @@ -1,9 +1,11 @@ -# # Image Classification With Cubical Filtrations and Persistence Images +# # Image Classification With Cubical Persistent Homology # In this example, we will show how to use Ripserer in an image classification # context. Persistent homology is not a predictive algorithm, but it can be used to extract # useful features from data. +# ## Setting up + using Ripserer using PersistenceDiagrams using Images # also required: ImageIO to read .png files @@ -104,6 +106,8 @@ plot(plot(dim_1[end]; persistence=true), heatmap(image_1(dim_1[end]); aspect_rat persims = [[vec(image_0(dim_0[i])); vec(image_1(dim_1[i]))] for i in 1:length(diagrams)] +# ## Fitting A Model + # Now it's time to fit our model. We will use # [GLMNet.jl](https://github.com/JuliaStats/GLMNet.jl) to fit a regularized linear model. @@ -137,7 +141,7 @@ nothing; # hide # Get the classification accuracy. -accuracy = count(predictions .== test_y) / length(test_y) +count(predictions .== test_y) / length(test_y) # Not half bad considering we haven't touched the images and we left pretty much all # settings on default. @@ -158,3 +162,52 @@ plot( # These correspond to the area we identified at the beginning. Also note that in this case, # the classifier does not care about ``H_1`` at all. + +# ## Using MLJ + +# Another, more straightforward way to execute a similar pipeline is to use Ripserer's +# [MLJ.jl](https://github.com/alan-turing-institute/MLJ.jl) integration. We will use a +# random forest classifier for this example. + +# We start by loading MLJ and the classifier. Not that +# [MLJDecisionTreeInterface.jl](https://github.com/bensadeghi/DecisionTree.jl) needs to be +# installed for this to work. + +using MLJ +tree = @load RandomForestClassifier pkg = "DecisionTree" verbosity = 0 + +# We create a pipeline of `CubicalPersistentHomology` followed by the classifier. In this +# case, `CubicalPersistentHomology` takes care of both the homology computation and the +# conversion to persistence images. + +pipe = @pipeline(CubicalPersistentHomology(), tree) + +# We train the pipeline the same way you would fit any other MLJ model. Remember, we need to +# use grayscale versions of images stored in `inputs`. + +classes = coerce(classes, Binary) +train, test = partition(eachindex(classes), 0.7; shuffle=true, rng=1337) +mach = machine(pipe, inputs, classes) +fit!(mach; rows=train) + +# Next, we predict the classes on the test data and print out the classification accuracy. + +yhat = predict_mode(mach, inputs[test]) +accuracy(yhat, classes[test]) + +# The result is quite a bit worse than before. We can try mitigating that by using a +# different vectorizer. + +pipe.cubical_persistent_homology.vectorizer = PersistenceCurveVectorizer() +mach = machine(pipe, inputs, classes) +fit!(mach; rows=train) + +yhat = predict_mode(mach, inputs[test]) +accuracy(yhat, classes[test]) + +# The result could be improved further by choosing a different model and +# vectorizer. However, this is just a short introduction. Please see the [MLJ.jl +# documentation](https://alan-turing-institute.github.io/MLJ.jl/dev/) for more information +# on model tuning and selection, and the [PersistenceDiagrams.jl +# documentation](https://mtsch.github.io/PersistenceDiagrams.jl/dev/mlj/) for a list of +# vectorizers and their options. diff --git a/docs/src/index.md b/docs/src/index.md index 336af570..110125a8 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -48,6 +48,7 @@ Ripserer and its companion package * Various persistence diagram vectorization functions, implemented with persistence images and persistence curves. * Easy extensibility through a documented API. +* Integration with [MLJ.jl](https://github.com/alan-turing-institute/MLJ.jl). * Experimental shortest representative cycle computation. * Experimental sparse circular coordinate computation. diff --git a/src/Ripserer.jl b/src/Ripserer.jl index 73fced1c..8798f4c3 100644 --- a/src/Ripserer.jl +++ b/src/Ripserer.jl @@ -25,6 +25,8 @@ using RecipesBase using StaticArrays using TupleTools +import MLJModelInterface + # This functionality is imported to avoid having to deal with name clashes. There is no # piracy involved here. import LightGraphs: vertices, edges, nv, adjacency_matrix @@ -51,7 +53,10 @@ export Mod, ripserer, reconstruct_cycle, Partition, - CircularCoordinates + CircularCoordinates, + RipsPersistentHomology, + AlphaPersistentHomology, + CubicalPersistentHomology include("base/primefield.jl") include("base/abstractcell.jl") @@ -77,5 +82,6 @@ include("filtrations/edgecollapse.jl") include("extra/cycles.jl") include("extra/circularcoordinates.jl") +include("extra/mlj.jl") end diff --git a/src/extra/cycles.jl b/src/extra/cycles.jl index 911ebbdb..b3080ff5 100644 --- a/src/extra/cycles.jl +++ b/src/extra/cycles.jl @@ -135,9 +135,13 @@ function reconstruct_cycle( distances=distance_matrix(filtration), ) where {T} if !hasproperty(interval, :representative) - throw(ArgumentError("interval has no representative! Run `ripserer` with `reps=true`")) + throw( + ArgumentError("interval has no representative! Run `ripserer` with `reps=true`") + ) elseif !(eltype(interval.representative) <: AbstractChainElement{<:AbstractCell{1}}) - throw(ArgumentError("cycles can only be reconstructed for 1-dimensional intervals.")) + throw( + ArgumentError("cycles can only be reconstructed for 1-dimensional intervals.") + ) elseif !(birth(interval) ≤ _birth_or_value(r) < death(interval)) return simplex_type(filtration, 1)[] else diff --git a/src/extra/mlj.jl b/src/extra/mlj.jl new file mode 100644 index 00000000..c7c459d0 --- /dev/null +++ b/src/extra/mlj.jl @@ -0,0 +1,241 @@ +const MMI = MLJModelInterface + +using PersistenceDiagrams: AbstractVectorizer + +abstract type RipsererModel <: MMI.Unsupervised end + +const PointLike{N} = AbstractVector{NTuple{N,MMI.Continuous}} +const DistanceMatrix = AbstractMatrix{MMI.Continuous} +const ImageLike{N} = AbstractArray{MMI.Continuous,N} + +function _transform(model::RipsererModel, verbosity::Int, X) + result = (; [Symbol(:dim_, i) => PersistenceDiagram[] for i in 0:(model.dim_max)]...) + if verbosity > 0 + progbar = Progress(MMI.nrows(X); desc="Computing persistent homology...") + end + for x in X + r = ripserer(_filtration(model, x); _ripserer_args(model)...) + for i in 1:(model.dim_max + 1) + push!(result[i], r[i]) + end + verbosity > 0 && next!(progbar) + end + return MMI.table(result) +end + +function MMI.fit(model::RipsererModel, verbosity::Int, X) + diagrams = _transform(model, verbosity, X) + vectorizers, _, report = MMI.fit(model.vectorizer, verbosity, diagrams) + return (vectorizers, nothing, report) +end + +function MMI.transform(model::RipsererModel, vectorizers, X) + diagrams = _transform(model, 0, X) + return MMI.transform(model.vectorizer, vectorizers, diagrams) +end + +MMI.output_scitype(::Type{<:RipsererModel}) = MMI.Table(MMI.Continuous) + +""" + RipsPersistentHomology + +Compute Vietoris-Rips persistent homology and convert the results to a table of continuous +values. + +!!! warning Warning + Computing Vietoris-Rips persistent homology may be CPU and memory intensive even for + modestly-sized data sets. Consider using [`AlphaPersistentHomology`](@ref) for + low-dimensional data. + +# Hyperparameters + +* `vectorizer = PersistenceImageVectorizer()`: `AbstractVectorizer` used to transform + persistence diagrams to continuous vectors. See the [PersistenceDiagrams.jl + Documentation](https://mtsch.github.io/PersistenceDiagrams.jl/dev/mlj/) for more + information. + +* `dim_max = 1`: compute persistent homology up to this dimension. + +* `modulus = 2`: compute persistent homology with coefficients in the prime field of integers + mod `modulus`. + +* `threshold = nothing`: compute persistent homology up to diameter smaller than threshold. + +* `cutoff = 0`: only keep intervals with `persistence(interval) > cutoff`. + +* `sparse = false`: use a sparse Rips filtration. + +* `collapse = false`: use the [`EdgeCollapsedRips`](@ref) filtration. This is usually a good + choice for computations with a high `dim_max`. + +# See also + +* [`ripserer`](@ref) +* [`Rips`](@ref) +* [`EdgeCollapsedRips`](@ref) + +""" +MMI.@mlj_model mutable struct RipsPersistentHomology <: RipsererModel + vectorizer::AbstractVectorizer = PersistenceImageVectorizer() + dim_max::Int = 1::(_ ≥ 0) + modulus::Int = 2::(is_prime(_)) + threshold::Union{Nothing,Float64} = nothing::(isnothing(_) || _ > 0) + cutoff::Float64 = 0::(_ ≥ 0) + sparse::Bool = false + collapse::Bool = false +end + +function _filtration(model::RipsPersistentHomology, x) + if isnothing(model.threshold) + kwargs = (; threshold=model.threshold, sparse=model.sparse) + else + kwargs = (; sparse=model.sparse) + end + if model.collapse + return EdgeCollapsedRips(x; kwargs...) + else + return Rips(x; kwargs...) + end +end + +function _ripserer_args(model::RipsPersistentHomology) + return (; dim_max=model.dim_max, cutoff=model.cutoff, modulus=model.modulus) +end + +function MMI.input_scitype(::Type{<:RipsPersistentHomology}) + return Union{ + MMI.Table(PointLike{N}) where {N}, + MMI.Table(DistanceMatrix), + AbstractVector{PointLike{N}} where {N}, + AbstractVector{DistanceMatrix}, + } +end + +""" + AlphaPersistentHomology + +Compute alpha complex persistent homology and convert the results to a table of continuous +values. + +!!! warning Warning + Using high-dimensional data with this model may be computationaly expensive. Consider + using [`RipsPersistentHomology`](@ref). + +!!! warning + This model uses [MiniQhull.jl](https://github.com/gridap/MiniQhull.jl). Please see + the installation instructions if fitting causes errors. MiniQhull currently has + problems running on Windows. See [this + issue](https://github.com/gridap/MiniQhull.jl/issues/5) for more info. + +# Hyperparameters + +* `vectorizer = PersistenceImageVectorizer()`: `AbstractVectorizer` used to transform + persistence diagrams to continuous vectors. See the [PersistenceDiagrams.jl + Documentation](https://mtsch.github.io/PersistenceDiagrams.jl/dev/mlj/) for more + information. + +* `dim_max = 1`: compute persistent homology up to this dimension. + +* `modulus = 2`: compute persistent homology with coefficients in the prime field of integers + mod `modulus`. + +* `threshold = nothing`: compute persistent homology up to diameter smaller than threshold. + +* `cutoff = 0`: only keep intervals with `persistence(interval) > cutoff`. + +# See also + +* [`ripserer`](@ref) +* [`Alpha`](@ref) + +""" +MMI.@mlj_model mutable struct AlphaPersistentHomology <: RipsererModel + vectorizer::AbstractVectorizer = PersistenceImageVectorizer() + dim_max::Int = 1::(_ ≥ 0) + modulus::Int = 2::(is_prime(_)) + threshold::Union{Nothing,Float64} = nothing::(isnothing(_) || _ > 0) + cutoff::Float64 = 0::(_ ≥ 0) +end + +function _filtration(model::AlphaPersistentHomology, data) + if isnothing(model.threshold) + return Alpha(data) + else + return Alpha(data; threshold=model.threshold) + end +end + +function _ripserer_args(model::AlphaPersistentHomology) + return (; dim_max=model.dim_max, cutoff=model.cutoff, modulus=model.modulus) +end + +function MMI.input_scitype(::Type{<:AlphaPersistentHomology}) + return Union{MMI.Table(PointLike{N}),AbstractVector{PointLike{N}}} where {N} +end + +""" + CubicalPersistentHomology + +Compute cubical persistent homology and convert the results to a table of continuous +values. + +# Hyperparameters + +* `vectorizer = PersistenceImageVectorizer()`: `AbstractVectorizer` used to transform + persistence diagrams to continuous vectors. See the [PersistenceDiagrams.jl + Documentation](https://mtsch.github.io/PersistenceDiagrams.jl/dev/mlj/) for more + information. + +* `dim_max = 1`: compute persistent homology up to this dimension. + +* `threshold = nothing`: compute persistent homology up to diameter smaller than threshold. + +* `cutoff = 0`: only keep intervals with `persistence(interval) > cutoff`. + +* `negate = false`: negate the image before computation. + +# See also + +* [`ripserer`](@ref) +* [`Cubical`](@ref) + +""" +MMI.@mlj_model mutable struct CubicalPersistentHomology <: RipsererModel + vectorizer::AbstractVectorizer = PersistenceImageVectorizer() + dim_max::Int = 1::(_ ≥ 0) + threshold::Union{Nothing,Float64} = nothing::(isnothing(_) || _ > 0) + cutoff::Float64 = 0::(_ ≥ 0) + negate::Bool = false +end + +function _filtration(model::CubicalPersistentHomology, data) + data = model.negate ? -data : data + if isnothing(model.threshold) + return Cubical(data) + else + return Cubical(data; threshold=model.threshold) + end +end + +function _ripserer_args(model::CubicalPersistentHomology) + return (; dim_max=model.dim_max, cutoff=model.cutoff) +end + +function MMI.input_scitype(::Type{<:CubicalPersistentHomology}) + return Union{ + MMI.Table(ImageLike{N}) where {N}, + MMI.Table(MMI.Image), + AbstractVector{ImageLike{N}} where {N}, + MMI.AbstractVector{MMI.Image}, + } +end + +MMI.metadata_pkg.( + (RipsPersistentHomology, AlphaPersistentHomology, CubicalPersistentHomology), + name="Ripserer", + uuid="aa79e827-bd0b-42a8-9f10-2b302677a641", + url="https://github.com/mtsch/Ripserer.jl", + license="MIT", + julia=true, + is_wrapper=false, +) diff --git a/src/filtrations/custom.jl b/src/filtrations/custom.jl index c68e262e..021ee0be 100644 --- a/src/filtrations/custom.jl +++ b/src/filtrations/custom.jl @@ -22,8 +22,8 @@ Base.getindex(cf::AbstractCustomFiltration, d) = cf[Val(d)] function Base.getindex(cf::AbstractCustomFiltration, ::Val{D}) where {D} if D ≤ dim(cf) return [ - simplex_type(cf, D)(i, b) - for (i, b) in simplex_dicts(cf)[D + 1] if b ≤ threshold(cf) + simplex_type(cf, D)(i, b) for + (i, b) in simplex_dicts(cf)[D + 1] if b ≤ threshold(cf) ] else return simplex_type(cf, D)[] diff --git a/src/filtrations/rips.jl b/src/filtrations/rips.jl index 7e8c6ac3..d0ab5b45 100644 --- a/src/filtrations/rips.jl +++ b/src/filtrations/rips.jl @@ -131,8 +131,11 @@ function _check_distance_matrix(dist::SparseMatrixCSC) i == rows[j] && continue edge_birth = vals[j] iszero(edge_birth) && throw(ArgumentError("zero edges in input matrix")) - edge_birth < vertex_birth && - throw(ArgumentError("edges with birth value lower than vertex births in input matrix")) + edge_birth < vertex_birth && throw( + ArgumentError( + "edges with birth value lower than vertex births in input matrix" + ), + ) edge_birth ≠ dist[i, rows[j]] && throw(ArgumentError("input matrix not symmetric")) end @@ -146,8 +149,11 @@ function _check_distance_matrix(dist::AbstractMatrix) vertex_birth = max(dist[j, j], dist[i, i]) edge_birth = dist[j, i] iszero(edge_birth) && throw(ArgumentError("zero edges in input matrix")) - edge_birth < vertex_birth && - throw(ArgumentError("edges with birth value lower than vertex births in input matrix")) + edge_birth < vertex_birth && throw( + ArgumentError( + "edges with birth value lower than vertex births in input matrix" + ), + ) edge_birth ≠ dist[i, j] && throw(ArgumentError("input matrix not symmetric")) end end diff --git a/test/base/plotting.jl b/test/base/plotting.jl index c5fb4b3c..95d99682 100644 --- a/test/base/plotting.jl +++ b/test/base/plotting.jl @@ -46,16 +46,18 @@ series(args...; kwargs...) = apply_recipe(Dict{Symbol,Any}(kwargs), args...) @test length(series(sx, data)) == 1 @test length(series([sx], data)) == 1 @test length(series([sx], data)) == 1 - @test length(series( - PersistenceInterval( - 1.0, - 1.0; - birth_simplex=sx, - death_simplex=fsx, - representative=[ChainElement{typeof(sx),typeof(1//1)}(sx, 1//1)], + @test length( + series( + PersistenceInterval( + 1.0, + 1.0; + birth_simplex=sx, + death_simplex=fsx, + representative=[ChainElement{typeof(sx),typeof(1//1)}(sx, 1//1)], + ), + data, ), - data, - )) == 1 + ) == 1 @test_throws ErrorException series(PersistenceInterval(1.0, 1.0), data) end end diff --git a/test/extra/circularcoordinates.jl b/test/extra/circularcoordinates.jl index ff2d3a43..b1fdc175 100644 --- a/test/extra/circularcoordinates.jl +++ b/test/extra/circularcoordinates.jl @@ -43,13 +43,15 @@ include("../testdatasets.jl") end @testset "Cocycles" begin - rips = Rips([ - 0.0 1.1 2.0 2.0 1.2 - 1.1 0.0 1.0 2.0 2.0 - 2.0 1.0 0.0 1.0 2.0 - 2.0 2.0 1.0 0.0 1.0 - 1.2 2.0 2.0 1.0 0.0 - ]) + rips = Rips( + [ + 0.0 1.1 2.0 2.0 1.2 + 1.1 0.0 1.0 2.0 2.0 + 2.0 1.0 0.0 1.0 2.0 + 2.0 2.0 1.0 0.0 1.0 + 1.2 2.0 2.0 1.0 0.0 + ], + ) cocycle = ripserer(rips; modulus=17, reps=true)[2][1].representative integral = _to_integer_coefficients(cocycle) diff --git a/test/extra/cycles.jl b/test/extra/cycles.jl index 6f5b82b7..629ff744 100644 --- a/test/extra/cycles.jl +++ b/test/extra/cycles.jl @@ -6,14 +6,16 @@ using Ripserer: distance_matrix, OneSkeleton @testset "OneSkeleton respects the LightGraphs interface" begin @testset "no threshold or removed simplices" begin - flt = Rips([ - 0 1 2 3 2 1 - 1 0 1 2 3 2 - 2 1 0 1 2 3 - 3 2 1 0 1 2 - 2 3 2 1 0 1 - 1 2 3 2 1 0 - ]) + flt = Rips( + [ + 0 1 2 3 2 1 + 1 0 1 2 3 2 + 2 1 0 1 2 3 + 3 2 1 0 1 2 + 2 3 2 1 0 1 + 1 2 3 2 1 0 + ] + ) g = OneSkeleton(flt) @test eltype(g) ≡ Int @@ -53,8 +55,8 @@ using Ripserer: distance_matrix, OneSkeleton @test !has_vertex(g, 7) @test !has_vertex(g, 0) @test edges(g) == [ - Edge(j, i) for i in 1:5 - for j in (i + 1):6 if dists[i, j] ≠ 3 && (j, i) ∉ [(2, 1), (3, 1)] + Edge(j, i) for i in 1:5 for + j in (i + 1):6 if dists[i, j] ≠ 3 && (j, i) ∉ [(2, 1), (3, 1)] ] @test vertices(g) == 1:6 @test is_directed(g) == is_directed(typeof(g)) == false @@ -125,14 +127,16 @@ end @testset "Cycle reconstruction" begin @testset "small cycle" begin - flt = Rips([ - 0 1 2 3 2 1 - 1 0 1 2 3 2 - 2 1 0 1 2 3 - 3 2 1 0 1 2 - 2 3 2 1 0 1 - 1 2 3 2 1 0 - ]) + flt = Rips( + [ + 0 1 2 3 2 1 + 1 0 1 2 3 2 + 2 1 0 1 2 3 + 3 2 1 0 1 2 + 2 3 2 1 0 1 + 1 2 3 2 1 0 + ] + ) interval = ripserer(flt; reps=true)[2][1] cyc1 = reconstruct_cycle(flt, interval) @@ -174,14 +178,15 @@ end @testset "circle points over a grid with a hole" begin # The idea here is that at birth time, the cycle is a circle and at time 1, the # cycle is a square surrounding the hole. - pts = unique!(vcat( - [(3sin(t), 3cos(t)) for t in range(0, 2π; length=22)[1:(end - 1)]], - vec([ - (i - 5, j - 5) - for - (i, j) in Iterators.product(0.0:10.0, 0.0:10.0) if !(i ∈ 4:6 && j ∈ 4:6) - ]), - )) + pts = unique!( + vcat( + [(3sin(t), 3cos(t)) for t in range(0, 2π; length=22)[1:(end - 1)]], + vec([ + (i - 5, j - 5) for + (i, j) in Iterators.product(0.0:10.0, 0.0:10.0) if !(i ∈ 4:6 && j ∈ 4:6) + ]), + ), + ) flt = Rips(pts) interval = sort!(ripserer(flt; reps=true)[2]; by=persistence)[end] @@ -277,14 +282,16 @@ end end @testset "Errors" begin - flt = Rips([ - 0 1 2 3 2 1 - 1 0 1 2 3 2 - 2 1 0 1 2 3 - 3 2 1 0 1 2 - 2 3 2 1 0 1 - 1 2 3 2 1 0 - ]) + flt = Rips( + [ + 0 1 2 3 2 1 + 1 0 1 2 3 2 + 2 1 0 1 2 3 + 3 2 1 0 1 2 + 2 3 2 1 0 1 + 1 2 3 2 1 0 + ] + ) _, d1, d2 = ripserer(flt; reps=2, dim_max=2) @test_throws ArgumentError reconstruct_cycle(flt, d1[1]) diff --git a/test/extra/mlj.jl b/test/extra/mlj.jl new file mode 100644 index 00000000..37e5b827 --- /dev/null +++ b/test/extra/mlj.jl @@ -0,0 +1,73 @@ +using Ripserer +using Test +using MLJBase +using PersistenceDiagrams + +include("../testdatasets.jl") + +function test_mlj_model(model, X) + mach = machine(model, X) + fit!(mach; verbosity=0) + Xt = transform(mach, X) + + @test nrows(Xt) == length(X) + @test length(first(Xt)) ≥ 30 + + model.vectorizer = PersistenceCurveVectorizer() + fit!(mach; verbosity=0) + Xt = transform(mach, X) + + @test nrows(Xt) == length(X) + @test length(first(Xt)) == 20 + + model.vectorizer.length = 100 + fit!(mach; verbosity=0) + Xt = transform(mach, X) + @test nrows(Xt) == length(X) + @test length(first(Xt)) == 200 + + model.dim_max = 2 + fit!(mach; verbosity=0) + Xt = transform(mach, X) + @test nrows(Xt) == length(X) + @test length(first(Xt)) == 300 +end + +@testset "Rips" begin + @testset "point-like input" begin + X = [torus_points(100), torus_points(50), torus_points(10)] + test_mlj_model(RipsPersistentHomology(), X) + end + @testset "distance matrix input, modulus" begin + X = [icosahedron, cycle, projective_plane] + + model = RipsPersistentHomology() + mach = machine(model, X) + fit!(mach; verbosity=0) + Xt1 = transform(mach, X) + + model.modulus = 3 + mach = machine(model, X) + fit!(mach; verbosity=0) + Xt2 = transform(mach, X) + + @test Xt1 ≠ Xt2 + end +end + +@testset "Alpha" begin + @static if Sys.iswindows() + @test_broken begin + Alpha(points) + true + end + else + X = [torus_points(500), torus_points(400), torus_points(300)] + test_mlj_model(AlphaPersistentHomology(), X) + end +end + +@testset "Cubical" begin + X = [rand(5, 10, 20), rand(10, 10, 10), rand(15, 5, 11)] + test_mlj_model(CubicalPersistentHomology(), X) +end diff --git a/test/filtrations/custom.jl b/test/filtrations/custom.jl index 6fa65e76..3853b2ed 100644 --- a/test/filtrations/custom.jl +++ b/test/filtrations/custom.jl @@ -56,14 +56,16 @@ end 1 2 1 0 ] - cf = Custom([ - [(i,) => 0 for i in 1:4] - [(i, j) => dist[i, j] for i in 1:4 for j in (i + 1):4] + cf = Custom( [ - (i, j, k) => max(dist[i, j], dist[i, k], dist[j, k]) for i in 1:4 - for j in (i + 1):4 for k in (j + 1):4 - ] - ]) + [(i,) => 0 for i in 1:4] + [(i, j) => dist[i, j] for i in 1:4 for j in (i + 1):4] + [ + (i, j, k) => max(dist[i, j], dist[i, k], dist[j, k]) for i in 1:4 for + j in (i + 1):4 for k in (j + 1):4 + ] + ], + ) @test simplex(cf, Val(3), (4, 3, 2, 1)) == nothing @test births(cf) == zeros(4) @@ -71,14 +73,16 @@ end end @testset "Custom filtration vs sparse Rips" begin - spdist = sparse([ - 0 1 0 0 0 1 - 1 0 1 0 0 0 - 0 1 0 1 0 0 - 0 0 1 0 1 0 - 0 0 0 1 0 1 - 1 0 0 0 1 0 - ]) + spdist = sparse( + [ + 0 1 0 0 0 1 + 1 0 1 0 0 0 + 0 1 0 1 0 0 + 0 0 1 0 1 0 + 0 0 0 1 0 1 + 1 0 0 0 1 0 + ] + ) cf = Custom([ (1,) => 0, (2,) => 0, diff --git a/test/runtests.jl b/test/runtests.jl index d417f4a1..e34f16a4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -49,6 +49,9 @@ end @safetestset "circularcoordinates" begin include("extra/circularcoordinates.jl") end +@safetestset "mlj" begin + include("extra/mlj.jl") +end @safetestset "aqua" begin include("aqua.jl") diff --git a/test/testdatasets.jl b/test/testdatasets.jl index e7922e8e..8c48432c 100644 --- a/test/testdatasets.jl +++ b/test/testdatasets.jl @@ -119,9 +119,9 @@ euclidean space. function torus_points(n, R=2, r=1) n = floor(Int, sqrt(n)) return [ - ((R + r * cos(θ)) * cos(φ), (R + r * cos(θ)) * sin(φ), r * sin(θ)) - for θ in range(0, 2π; length=n + 1)[1:(end - 1)] - for φ in range(0, 2π; length=n + 1)[1:(end - 1)] + ((R + r * cos(θ)) * cos(φ), (R + r * cos(θ)) * sin(φ), r * sin(θ)) for + θ in range(0, 2π; length=n + 1)[1:(end - 1)] for + φ in range(0, 2π; length=n + 1)[1:(end - 1)] ] end