Skip to content

Commit

Permalink
Add save_zip and load_zip functions. (#41)
Browse files Browse the repository at this point in the history
* add save and load zip functions

* bump version
  • Loading branch information
nhz2 authored Oct 17, 2023
1 parent 625b050 commit 809e231
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "SmallZarrGroups"
uuid = "d423b6e5-1c84-4ae2-8d2d-b903aee15ac7"
authors = ["nhz2 <nhz2@cornell.edu>"]
version = "0.7.0"
version = "0.7.1"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
16 changes: 16 additions & 0 deletions src/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,22 @@ function load_dir(dirpath::AbstractString)::ZGroup
load_dir(reader)
end

"""
load_zip(filename::AbstractString)::ZGroup
load_zip(data::Vector{UInt8})::ZGroup
Load data in a file `filename` or a `data` vector in ZipStore format.
"""
function load_zip(filename::AbstractString)::ZGroup
reader = ZarrZipReader(read(filename))
load_dir(reader)
end
function load_zip(data::Vector{UInt8})::ZGroup
reader = ZarrZipReader(data)
load_dir(reader)
end


function try_add_attrs!(zthing::Union{ZGroup, ZArray}, reader::AbstractReader, keyname_dict, key_prefix)
attrsidx = get(Returns(0), keyname_dict, key_prefix*".zattrs")
Expand Down
36 changes: 24 additions & 12 deletions src/saving.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,40 @@ function save_dir(dirpath::AbstractString, z::ZGroup)
if endswith(dirpath, ".zip")
@argcheck !isdir(dirpath)
mkpath(dirname(dirpath))
open(dirpath; write=true) do io
writer = ZarrZipWriter(io)
try
save_dir(writer, z)
finally
closewriter(writer)
end
end
save_zip(dirpath, z)
else
save_dir(DirectoryWriter(dirpath), z)
end
nothing
end

"""
Note this will delete pre existing data at dirpath
"""
function save_dir(writer::AbstractWriter, z::ZGroup)
# TODO add something to prevent loops
_save_zgroup(writer, "", z::ZGroup)
end

"""
save_zip(filename::AbstractString, z::ZGroup)
save_zip(io::IO, z::ZGroup)
Save data in a file `filename` or an `io` in ZipStore format.
Note this will delete pre existing data in `filename`.
The `io` passed to this function must be empty.
This function will not close `io`.
"""
function save_zip(filename::AbstractString, z::ZGroup)::Nothing
open(filename; write=true) do io
save_zip(io, z)
end
end
function save_zip(io::IO, z::ZGroup)::Nothing
writer = ZarrZipWriter(io)
try
save_dir(writer, z)
finally
closewriter(writer)
end
end

"""
save attributes using JSON3
"""
Expand Down
5 changes: 3 additions & 2 deletions src/writers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ function write_key(d::DirectoryWriter, key::AbstractString, data)::Nothing
end

"""
Write to an in memory zipfile, that gets saved to disk on close.
This writer will overwrite any existing file at `path`
Write to an `IO` in ZipStore format.
The wrapped io will be written to,
but will not be seeked or read.
"""
struct ZarrZipWriter{IO_TYPE<:IO} <: AbstractWriter
zipfile::ZipWriter{IO_TYPE}
Expand Down
71 changes: 70 additions & 1 deletion test/test_simple-usage.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,75 @@ end
end

@testset "saving and loading a zip file" begin
g = ZGroup()
data1 = rand(10,20)
g["testarray1"] = data1
attrs(g["testarray1"])["foo"] = "bar1"
data2 = rand(Int,20)
g["testarray2"] = data2
data3 = rand(UInt8,20)
g["testgroup1"] = ZGroup()
g["testgroup1"]["testarray1"] = data3
attrs(g["testgroup1/testarray1"])["foo"] = "bar3"
mktempdir() do path
# Note this will delete pre existing data at "path/test.zip".
# This zip file can be read by zarr-python.
SmallZarrGroups.save_zip(joinpath(path,"test.zip"), g)
@test isfile(joinpath(path,"test.zip"))
# load_zip can load zip files saved by save_zip, or saved by zarr-python.
# It can also load zip files created by zipping a zarr directory.
# Note the zip file must be in the format described in the zarr-python docs:
# "
# Take note that the files in the Zip file must be relative to the root of the Zarr archive.
# You may find it easier to create such a Zip file with 7z, e.g.:
# `7z a -tzip archive.zarr.zip archive.zarr/.`
# "
gload = SmallZarrGroups.load_zip(joinpath(path,"test.zip"))
@test collect(gload["testarray1"]) == data1
@test attrs(gload["testarray1"]) == OrderedDict([
"foo" => "bar1",
])
@test gload["testarray2"] == data2
@test attrs(gload["testarray2"]) == OrderedDict([])
@test attrs(gload) == OrderedDict([])
@test gload["testgroup1/testarray1"] == data3
@test attrs(gload["testgroup1/testarray1"]) == OrderedDict([
"foo" => "bar3",
])
end
end

@testset "saving and loading an in memory zip file" begin
g = ZGroup()
data1 = rand(10,20)
g["testarray1"] = data1
attrs(g["testarray1"])["foo"] = "bar1"
data2 = rand(Int,20)
g["testarray2"] = data2
data3 = rand(UInt8,20)
g["testgroup1"] = ZGroup()
g["testgroup1"]["testarray1"] = data3
attrs(g["testgroup1/testarray1"])["foo"] = "bar3"
io = IOBuffer()
SmallZarrGroups.save_zip(io, g)
data = take!(io)
# data now contains the data of a zipfile
# it could be saved to disk, sent to another process, or loaded back as a ZGroup.
gload = SmallZarrGroups.load_zip(data)
@test collect(gload["testarray1"]) == data1
@test attrs(gload["testarray1"]) == OrderedDict([
"foo" => "bar1",
])
@test gload["testarray2"] == data2
@test attrs(gload["testarray2"]) == OrderedDict([])
@test attrs(gload) == OrderedDict([])
@test gload["testgroup1/testarray1"] == data3
@test attrs(gload["testgroup1/testarray1"]) == OrderedDict([
"foo" => "bar3",
])
end

@testset "deprecated saving and loading a zip file" begin
g = ZGroup()
data1 = rand(10,20)
g["testarray1"] = data1
Expand Down Expand Up @@ -266,7 +335,7 @@ end
end
end

@testset "saving and loading an in memory zip file" begin
@testset "deprecated saving and loading an in memory zip file" begin
g = ZGroup()
data1 = rand(10,20)
g["testarray1"] = data1
Expand Down

2 comments on commit 809e231

@nhz2
Copy link
Member Author

@nhz2 nhz2 commented on 809e231 Oct 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/93574

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.7.1 -m "<description of version>" 809e23187cda50317f3ef36c9e48d91d55f20f0c
git push origin v0.7.1

Please sign in to comment.