Skip to content

Commit

Permalink
Merge pull request #75 from xiaodaigh/github-73
Browse files Browse the repository at this point in the history
fixed #73
  • Loading branch information
xiaodaigh authored Jan 16, 2022
2 parents 244a8a7 + c3ea475 commit 0eeba95
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "JDF"
uuid = "babc3d20-cd49-4f60-a736-a8f9c08892d3"
authors = ["Dai ZJ <zhuojia.dai@gmail.com>"]
version = "0.4.5"
version = "0.4.6"

[deps]
Blosc = "a74b3585-a348-5f62-a45c-50e91977d574"
Expand Down
10 changes: 5 additions & 5 deletions src/type-writer-loader/Missing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ some_elm(::Type{Missing}) = missing
# the dispatch for Union{T, Missing}
# 1. compress the missing
# 2. and also load the missing
compress_then_write(b::Vector{Union{T,Missing}}, io) where {T} = begin
function compress_then_write(b::Vector{Union{T,Missing}}, io) where {T}
b_S = coalesce.(b, some_elm(T))

metadata = compress_then_write(b_S, io)
Expand All @@ -26,18 +26,18 @@ end
compress_then_write(b::Vector{Missing}, _) =
(len = 0, type = Missing, orig_len = length(b))

column_loader!(buffer, ::Type{Union{Missing,T}}, io, metadata) where {T} = begin
function column_loader!(buffer, ::Type{Union{Missing,T}}, io, metadata) where {T}
# read the content
Tmeta = metadata.Tmeta

t_pre = column_loader!(buffer, Tmeta.type, io, Tmeta) |> allowmissing
#t = t_pre

# read the missings as bool
m = column_loader(Bool, io, metadata.missingmeta)
#return t_pre

t_pre[m] .= missing
t_pre
end

column_loader!(buffer, ::Type{Missing}, io, metadata) =
column_loader!(_, ::Type{Missing}, io, metadata) =
Vector{Missing}(missing, metadata.orig_len)
36 changes: 29 additions & 7 deletions src/type-writer-loader/categorical-arrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ using DataAPI

using CategoricalArrays: CategoricalVector, CategoricalArray, CategoricalPool

compress_then_write(b::CategoricalVector{T,IntType}, io) where {T, IntType<:Integer} = begin
#println("abc")
function compress_then_write(b::CategoricalVector{T,IntType}, io) where {T, IntType<:Integer}
compress_refs = compress_then_write(b.refs, io)
compress_poolindex = compress_then_write(DataAPI.levels(b), io)

Expand All @@ -15,15 +14,38 @@ compress_then_write(b::CategoricalVector{T,IntType}, io) where {T, IntType<:Inte
)
end

column_loader(b::Type{CategoricalVector}, io, metadata) = begin
# function column_loader(::Type{CategoricalVector{Union{Missing, T}, I}}, io, metadata) where {T, I}
# println("got here1")
# refs_meta = metadata.refs
# pi_meta = metadata.poolindex
# ref = column_loader(refs_meta.type, io, refs_meta)
# poolindex = column_loader(pi_meta.type, io, pi_meta)

# return CategoricalArray{pi_meta.type,1}(
# ref,
# CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered),
# )
# end

function column_loader(::Type{CategoricalVector}, io, metadata)
refs_meta = metadata.refs
pi_meta = metadata.poolindex
ref = column_loader(refs_meta.type, io, refs_meta)
poolindex = column_loader(pi_meta.type, io, pi_meta)
CategoricalArray{pi_meta.type,1}(
ref,
CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered),
)

# this checks for missing in the values which would be represented by ref = 0
if any(==(0), ref)
return CategoricalArray{Union{pi_meta.type, Missing},1}(
ref,
CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered),
)
else
# no missing in the values, just return
return CategoricalArray{pi_meta.type,1}(
ref,
CategoricalPool{eltype(poolindex),eltype(ref)}(Array(poolindex), metadata.ordered),
)
end
end

if false
Expand Down
14 changes: 13 additions & 1 deletion test/test-categorical-ararys.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ using Tables
JDF.save(df, "a3cate.jdf")
df_loaded_back = JDF.load("a3cate.jdf", cols = [:x2, :x1])

df2 = DataFrame(df_loaded_back; copycols=true)
df2 = DataFrame(df_loaded_back; copycols = true)
@test size(df2, 2) == 2
@test size(df2, 1) == 100
@time df2[!, :x1] isa CategoricalVector{Int}
Expand All @@ -28,3 +28,15 @@ end

rm("iris.jdf", force = true, recursive = true)
end

@testset "CategoricalArray{Union{Missing, String}}" begin
# Guard against github 73
df2 = DataFrame(sex = categorical(["Male", missing, "Female"]))
JDF.save("df2.jdf", df2)

b = JDF.load("df2.jdf") |> DataFrame

@test any(ismissing, b.sex)

rm("df2.jdf", force=true, recursive=true)
end

2 comments on commit 0eeba95

@xiaodaigh
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/52512

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.6 -m "<description of version>" 0eeba95f09fbdc0e83a674b93d77d3aa7b7ee955
git push origin v0.4.6

Please sign in to comment.