Skip to content

Commit

Permalink
hmb-gen: netcdf now compressed by default, with --no-netcdf-compressi… (
Browse files Browse the repository at this point in the history
#65)

* hmb-gen: netcdf now compressed by default, with --no-netcdf-compression as opt-out flag

* hmb-gen: netcdf now compressed by default, with --no-netcdf-compression as opt-out flag
  • Loading branch information
carueda authored Mar 8, 2025
1 parent df80f7e commit b54a2a2
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 57 deletions.
1 change: 1 addition & 0 deletions pbp/main_hmb_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def main():
file_helper=file_helper,
output_dir=opts.output_dir,
output_prefix=opts.output_prefix,
compress_netcdf=opts.compress_netcdf,
global_attrs_uri=opts.global_attrs,
set_global_attrs=opts.set_global_attrs,
variable_attrs_uri=opts.variable_attrs,
Expand Down
8 changes: 8 additions & 0 deletions pbp/main_hmb_generator_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,14 @@ def parse_arguments():
help="Output filename prefix",
)

parser.add_argument(
"--no-netcdf-compression",
dest="compress_netcdf",
default=True,
action="store_false",
help="Do not compress the generated NetCDF file.",
)

parser.add_argument(
"--s3",
default=False,
Expand Down
34 changes: 24 additions & 10 deletions pbp/process_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
output_dir: str,
output_prefix: str,
gen_netcdf: bool = True,
compress_netcdf: bool = True,
global_attrs_uri: Optional[str] = None,
set_global_attrs: Optional[list[list[str]]] = None,
variable_attrs_uri: Optional[str] = None,
Expand All @@ -56,6 +57,8 @@ def __init__(
Output filename prefix.
:param gen_netcdf:
True to generate the netCDF file.
:param compress_netcdf:
True to compress the generated NetCDF file.
:param global_attrs_uri:
URI of JSON file with global attributes to be added to the NetCDF file.
:param set_global_attrs:
Expand Down Expand Up @@ -83,6 +86,7 @@ def __init__(
+ f"\n output_dir: {output_dir}"
+ f"\n output_prefix: {output_prefix}"
+ f"\n gen_netcdf: {gen_netcdf}"
+ f"\n compress_netcdf: {compress_netcdf}"
+ f"\n global_attrs_uri: {global_attrs_uri}"
+ f"\n set_global_attrs: {set_global_attrs}"
+ f"\n variable_attrs_uri: {variable_attrs_uri}"
Expand All @@ -101,6 +105,7 @@ def __init__(
self.output_dir = output_dir
self.output_prefix = output_prefix
self.gen_netcdf = gen_netcdf
self.compress_netcdf = compress_netcdf

self.metadata_helper = MetadataHelper(
self.log,
Expand Down Expand Up @@ -315,18 +320,27 @@ def save_dataset_to_netcdf(
log, #: loguru.Logger,
ds: xr.Dataset,
filename: str,
compress_netcdf: bool = True,
) -> bool:
log.info(f" - saving dataset to: {filename}")
log.info(f" - saving dataset to: {filename} (compressed: {compress_netcdf})")
encoding: dict[Any, dict[str, Any]] = {
"effort": {"_FillValue": None},
"frequency": {"_FillValue": None},
"sensitivity": {"_FillValue": None},
}
if compress_netcdf:
# TODO(Danelle) please review this
for k in ds.data_vars:
if ds[k].ndim < 2:
continue
encoding[k] = {
"zlib": True,
"complevel": 3,
"fletcher32": True,
"chunksizes": tuple(map(lambda x: x // 2, ds[k].shape)),
}
try:
ds.to_netcdf(
filename,
engine="h5netcdf",
encoding={
"effort": {"_FillValue": None},
"frequency": {"_FillValue": None},
"sensitivity": {"_FillValue": None},
},
)
ds.to_netcdf(filename, format="NETCDF4", engine="h5netcdf", encoding=encoding)
return True
except Exception as e: # pylint: disable=broad-exception-caught
error = f"Unable to save {filename}: {e}"
Expand Down
Loading

0 comments on commit b54a2a2

Please sign in to comment.