Skip to content

Commit

Permalink
feat: allow saving HDF5 files to io.BytesIO (#47)
Browse files Browse the repository at this point in the history
thunor.io.write_hdf() now supports writing to a io.BytesIO()
buffer as well as a file on disk
  • Loading branch information
alubbock authored Sep 10, 2024
1 parent 98c536d commit 84d7a29
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
15 changes: 12 additions & 3 deletions thunor/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,18 +868,27 @@ def write_hdf(df_data, filename, dataset_format='fixed'):
----------
df_data: HtsPandas
HTS dataset
filename: str
Output filename
filename: str or io.BytesIO
Output filename, or io.BytesIO instance for in-memory use
dataset_format: str
One of 'fixed' or 'table'. See pandas HDFStore docs for details
"""
with pd.HDFStore(filename, 'w', complib='zlib', complevel=9) as hdf:
if isinstance(filename, io.BytesIO):
extra_kwargs = {"driver": "H5FD_CORE", "driver_core_backing_store": 0}
filepath = 'in-memory-file'
else:
extra_kwargs = {}
filepath = filename
with pd.HDFStore(filepath, 'w', complib='zlib', complevel=9,
**extra_kwargs) as hdf:
hdf.root._v_attrs.generator = package_name
hdf.root._v_attrs.generator_version = __version__
hdf.put('doses', df_data.doses_unstacked(), format=dataset_format)
hdf.put('assays', df_data.assays, format=dataset_format)
if df_data.controls is not None:
hdf.put('controls', df_data.controls, format=dataset_format)
if isinstance(filename, io.BytesIO):
filename.write(hdf._handle.get_file_image())


def _stack_doses(df_doses, inplace=True):
Expand Down
13 changes: 12 additions & 1 deletion thunor/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def setUpClass(cls):
with importlib.resources.as_file(ref) as filename:
cls.hts007 = thunor.io.read_hdf(filename)

def test_hdf5_read_write(self):
def test_hdf5_read_write_file(self):
with tempfile.NamedTemporaryFile(suffix='.h5') as tf:
if os.name == 'nt':
# Can't have two file handles on Windows
Expand All @@ -43,6 +43,17 @@ def test_hdf5_read_write(self):

_assert_datasets_equal(self.hts007, newdf)

def test_hdf5_read_write_buffer(self):
buf = io.BytesIO()

thunor.io.write_hdf(self.hts007, filename=buf)

buf.seek(0)

newdf = thunor.io.read_hdf(buf)

_assert_datasets_equal(self.hts007, newdf)

def test_vanderbilt_csv_read_write(self):
with tempfile.NamedTemporaryFile(suffix='.csv') as tf:
if os.name == 'nt':
Expand Down

0 comments on commit 84d7a29

Please sign in to comment.