diff --git a/Project.toml b/Project.toml index a743268..98f813e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "JDF" uuid = "babc3d20-cd49-4f60-a736-a8f9c08892d3" authors = ["Dai ZJ "] -version = "0.4.5" +version = "0.4.6" [deps] Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" diff --git a/src/type-writer-loader/Missing.jl b/src/type-writer-loader/Missing.jl index ad6afc9..7ea73c2 100644 --- a/src/type-writer-loader/Missing.jl +++ b/src/type-writer-loader/Missing.jl @@ -5,7 +5,7 @@ some_elm(::Type{Missing}) = missing # the dispatch for Union{T, Missing} # 1. compress the missing # 2. and also load the missing -compress_then_write(b::Vector{Union{T,Missing}}, io) where {T} = begin +function compress_then_write(b::Vector{Union{T,Missing}}, io) where {T} b_S = coalesce.(b, some_elm(T)) metadata = compress_then_write(b_S, io) @@ -26,18 +26,18 @@ end compress_then_write(b::Vector{Missing}, _) = (len = 0, type = Missing, orig_len = length(b)) -column_loader!(buffer, ::Type{Union{Missing,T}}, io, metadata) where {T} = begin +function column_loader!(buffer, ::Type{Union{Missing,T}}, io, metadata) where {T} # read the content Tmeta = metadata.Tmeta t_pre = column_loader!(buffer, Tmeta.type, io, Tmeta) |> allowmissing - #t = t_pre + # read the missings as bool m = column_loader(Bool, io, metadata.missingmeta) - #return t_pre + t_pre[m] .= missing t_pre end -column_loader!(buffer, ::Type{Missing}, io, metadata) = +column_loader!(_, ::Type{Missing}, io, metadata) = Vector{Missing}(missing, metadata.orig_len) diff --git a/src/type-writer-loader/categorical-arrays.jl b/src/type-writer-loader/categorical-arrays.jl index eff987d..a320320 100644 --- a/src/type-writer-loader/categorical-arrays.jl +++ b/src/type-writer-loader/categorical-arrays.jl @@ -2,8 +2,7 @@ using DataAPI using CategoricalArrays: CategoricalVector, CategoricalArray, CategoricalPool -compress_then_write(b::CategoricalVector{T,IntType}, io) where {T, IntType<:Integer} = begin - #println("abc") +function compress_then_write(b::CategoricalVector{T,IntType}, io) where {T, IntType<:Integer} compress_refs = compress_then_write(b.refs, io) compress_poolindex = compress_then_write(DataAPI.levels(b), io) @@ -15,15 +14,38 @@ compress_then_write(b::CategoricalVector{T,IntType}, io) where {T, IntType<:Inte ) end -column_loader(b::Type{CategoricalVector}, io, metadata) = begin +# function column_loader(::Type{CategoricalVector{Union{Missing, T}, I}}, io, metadata) where {T, I} +# println("got here1") +# refs_meta = metadata.refs +# pi_meta = metadata.poolindex +# ref = column_loader(refs_meta.type, io, refs_meta) +# poolindex = column_loader(pi_meta.type, io, pi_meta) + +# return CategoricalArray{pi_meta.type,1}( +# ref, +# CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered), +# ) +# end + +function column_loader(::Type{CategoricalVector}, io, metadata) refs_meta = metadata.refs pi_meta = metadata.poolindex ref = column_loader(refs_meta.type, io, refs_meta) poolindex = column_loader(pi_meta.type, io, pi_meta) - CategoricalArray{pi_meta.type,1}( - ref, - CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered), - ) + + # this checks for missing in the values which would be represented by ref = 0 + if any(==(0), ref) + return CategoricalArray{Union{pi_meta.type, Missing},1}( + ref, + CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered), + ) + else + # no missing in the values, just return + return CategoricalArray{pi_meta.type,1}( + ref, + CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered), + ) + end end if false diff --git a/test/test-categorical-ararys.jl b/test/test-categorical-ararys.jl index 622cba4..c02e940 100644 --- a/test/test-categorical-ararys.jl +++ b/test/test-categorical-ararys.jl @@ -12,7 +12,7 @@ using Tables JDF.save(df, "a3cate.jdf") df_loaded_back = JDF.load("a3cate.jdf", cols = [:x2, :x1]) - df2 = DataFrame(df_loaded_back; copycols=true) + df2 = DataFrame(df_loaded_back; copycols = true) @test size(df2, 2) == 2 @test size(df2, 1) == 100 @time df2[!, :x1] isa CategoricalVector{Int} @@ -28,3 +28,15 @@ end rm("iris.jdf", force = true, recursive = true) end + +@testset "CategoricalArray{Union{Missing, String}}" begin + # Guard against github 73 + df2 = DataFrame(sex = categorical(["Male", missing, "Female"])) + JDF.save("df2.jdf", df2) + + b = JDF.load("df2.jdf") |> DataFrame + + @test any(ismissing, b.sex) + + rm("df2.jdf", force=true, recursive=true) +end \ No newline at end of file