Skip to content

Commit

Permalink
fixed #73
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaodaigh committed Jan 16, 2022
1 parent 244a8a7 commit c3ea475
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "JDF"
uuid = "babc3d20-cd49-4f60-a736-a8f9c08892d3"
authors = ["Dai ZJ <zhuojia.dai@gmail.com>"]
version = "0.4.5"
version = "0.4.6"

[deps]
Blosc = "a74b3585-a348-5f62-a45c-50e91977d574"
Expand Down
10 changes: 5 additions & 5 deletions src/type-writer-loader/Missing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ some_elm(::Type{Missing}) = missing
# the dispatch for Union{T, Missing}
# 1. compress the missing
# 2. and also load the missing
compress_then_write(b::Vector{Union{T,Missing}}, io) where {T} = begin
function compress_then_write(b::Vector{Union{T,Missing}}, io) where {T}
b_S = coalesce.(b, some_elm(T))

metadata = compress_then_write(b_S, io)
Expand All @@ -26,18 +26,18 @@ end
compress_then_write(b::Vector{Missing}, _) =
(len = 0, type = Missing, orig_len = length(b))

column_loader!(buffer, ::Type{Union{Missing,T}}, io, metadata) where {T} = begin
function column_loader!(buffer, ::Type{Union{Missing,T}}, io, metadata) where {T}
# read the content
Tmeta = metadata.Tmeta

t_pre = column_loader!(buffer, Tmeta.type, io, Tmeta) |> allowmissing
#t = t_pre

# read the missings as bool
m = column_loader(Bool, io, metadata.missingmeta)
#return t_pre

t_pre[m] .= missing
t_pre
end

column_loader!(buffer, ::Type{Missing}, io, metadata) =
column_loader!(_, ::Type{Missing}, io, metadata) =
Vector{Missing}(missing, metadata.orig_len)
36 changes: 29 additions & 7 deletions src/type-writer-loader/categorical-arrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ using DataAPI

using CategoricalArrays: CategoricalVector, CategoricalArray, CategoricalPool

compress_then_write(b::CategoricalVector{T,IntType}, io) where {T, IntType<:Integer} = begin
#println("abc")
function compress_then_write(b::CategoricalVector{T,IntType}, io) where {T, IntType<:Integer}
compress_refs = compress_then_write(b.refs, io)
compress_poolindex = compress_then_write(DataAPI.levels(b), io)

Expand All @@ -15,15 +14,38 @@ compress_then_write(b::CategoricalVector{T,IntType}, io) where {T, IntType<:Inte
)
end

column_loader(b::Type{CategoricalVector}, io, metadata) = begin
# function column_loader(::Type{CategoricalVector{Union{Missing, T}, I}}, io, metadata) where {T, I}
# println("got here1")
# refs_meta = metadata.refs
# pi_meta = metadata.poolindex
# ref = column_loader(refs_meta.type, io, refs_meta)
# poolindex = column_loader(pi_meta.type, io, pi_meta)

# return CategoricalArray{pi_meta.type,1}(
# ref,
# CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered),
# )
# end

function column_loader(::Type{CategoricalVector}, io, metadata)
refs_meta = metadata.refs
pi_meta = metadata.poolindex
ref = column_loader(refs_meta.type, io, refs_meta)
poolindex = column_loader(pi_meta.type, io, pi_meta)
CategoricalArray{pi_meta.type,1}(
ref,
CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered),
)

# this checks for missing in the values which would be represented by ref = 0
if any(==(0), ref)
return CategoricalArray{Union{pi_meta.type, Missing},1}(
ref,
CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered),
)
else
# no missing in the values, just return
return CategoricalArray{pi_meta.type,1}(
ref,
CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered),
)
end
end

if false
Expand Down
14 changes: 13 additions & 1 deletion test/test-categorical-ararys.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ using Tables
JDF.save(df, "a3cate.jdf")
df_loaded_back = JDF.load("a3cate.jdf", cols = [:x2, :x1])

df2 = DataFrame(df_loaded_back; copycols=true)
df2 = DataFrame(df_loaded_back; copycols = true)
@test size(df2, 2) == 2
@test size(df2, 1) == 100
@time df2[!, :x1] isa CategoricalVector{Int}
Expand All @@ -28,3 +28,15 @@ end

rm("iris.jdf", force = true, recursive = true)
end

@testset "CategoricalArray{Union{Missing, String}}" begin
# Guard against github 73
df2 = DataFrame(sex = categorical(["Male", missing, "Female"]))
JDF.save("df2.jdf", df2)

b = JDF.load("df2.jdf") |> DataFrame

@test any(ismissing, b.sex)

rm("df2.jdf", force=true, recursive=true)
end

0 comments on commit c3ea475

Please sign in to comment.