diff --git a/src/h5json/dset_util.py b/src/h5json/dset_util.py index c5da351..7a3a7aa 100644 --- a/src/h5json/dset_util.py +++ b/src/h5json/dset_util.py @@ -107,7 +107,9 @@ def resize_dataset(dset_json, shape): raise TypeError(f"dataset with shape class: {shape_class} cannot be resized") if len(shape_class["dims"]) != len(shape): raise ValueError("Resize shape parameter doesn't match dataset's rank") - # TBD: validate shape + if shape_json["dims"] == list(shape): + # no change, just return + return shape_json["dims"] = list(shape) dset_json["modified"] = time.time() diff --git a/src/h5json/hdf5db.py b/src/h5json/hdf5db.py index 39de3b6..991e756 100644 --- a/src/h5json/hdf5db.py +++ b/src/h5json/hdf5db.py @@ -16,6 +16,7 @@ from .array_util import jsonToArray, bytesArrayToList from .dset_util import make_new_dset, resize_dataset from .objid import createObjId, getCollectionForId +from . import selections from .apiversion import _apiver from .reader.h5reader import H5Reader from .writer.h5writer import H5Writer @@ -49,6 +50,9 @@ def __init__( self._reader = h5_reader self._writer = h5_writer + + self._new_objects = set() # set of obj_id's + self._dirty_objects = set() # set of obj_id's if self._reader: root_id = self._reader.get_root_id() @@ -65,19 +69,70 @@ def __init__( self._db[root_id] = group_json self._root_id = root_id + @property + def db(self): + """ return object db dictionary """ + return self._db + + @property + def reader(self): + """ return reader instance """ + return self._reader + + @property + def writer(self): + """ return writer instance """ + return self._writer + + @property + def root_id(self): + """ return root uuid """ + return self._root_id + + def is_new(self, obj_id): + """ return true if this is a new object (has not been persisted) """ + return obj_id in self._new_objects + + def is_dirty(self, obj_id): + """ return true if this object has been modified """ + if self.is_new(obj_id): + return True + return obj_id in self._dirty_objects + + def make_dirty(self, obj_id): + """ Mark the object as dirty and update the lastModified timestamp """ + if self.is_new(obj_id): + # object hasn't been initially written yet, just return + return + if obj_id not in self.db: + self.log.error("make dirty called on deleted object") + raise KeyError(f"obj_id: {obj_id} not found") + if self.db[obj_id] is None: + # object deleted, just return + return + obj_json = self.db[obj_id] + now = time.time() + obj_json["lastModified"] = now + self._dirty_objects.add(obj_id) + + def flush(self): """ write out any changes """ - if self._writer: - self._writer.flush() + if not self.writer: + return # nothing to do + if self.writer.flush(): + # reset new and dirty sets + self._new_objects = set() + self._dirty_objects = set() def close(self): """ close reader and writer handles """ self.log.info("Hdf5db __close") self.flush() - if self._writer: - self._writer.close() - if self._reader: - self._reader.close() + if self.writer: + self.writer.close() + if self.reader: + self.reader.close() self._root_id = None self._db = {} @@ -94,14 +149,14 @@ def __exit__(self, type, value, traceback): def getObjectById(self, obj_id): """ return object with given id """ - if obj_id not in self._db: - if self._reader: + if obj_id not in self.db: + if self.reader: # load the obj from the reader - obj_json = self._reader.getObjectById(obj_id) - self._db[obj_id] = obj_json + obj_json = self.reader.getObjectById(obj_id) + self.db[obj_id] = obj_json else: raise KeyError(f"obj_id: {obj_id} not found") - obj_json = self._db[obj_id] + obj_json = self.db[obj_id] return obj_json @@ -110,10 +165,10 @@ def getObjectIdByPath(self, h5path, parent_id=None): otherwise the root_id """ if h5path == "/": - return self._root_id # just return root id + return self.root_id # just return root id if parent_id is None: - parent_id = self._root_id + parent_id = self.root_id self.log.debug(f"getObjectIdDByPath(h5path: {h5path} parent_id: {parent_id}") obj_json = self.getObjectById(parent_id) @@ -175,9 +230,9 @@ def getObjectByPath(self, path): def getDtype(self, obj_id): """ Return numpy data type for given object id """ - if obj_id not in self._db: + if obj_id not in self.db: raise KeyError(f"{obj_id} not found") - obj_json = self._db[obj_id] + obj_json = self.db[obj_id] if "type" not in obj_json: # group id? raise TypeError(f"{obj_id} does not have a datatype") @@ -196,7 +251,7 @@ def createCommittedType(self, datatype, cpl=None): if cpl is None: cpl = {} - ctype_id = createObjId(obj_type="datatypes", root_id=self._root_id) + ctype_id = createObjId(obj_type="datatypes", root_id=self.root_id) if isinstance(datatype, np.dtype): dt = datatype else: @@ -207,7 +262,8 @@ def createCommittedType(self, datatype, cpl=None): ctype_json = {"type": type_json, "attributes": {}, "cpl": cpl} ctype_json["created"] = time.time() ctype_json["modified"] = None - self._db[ctype_id] = ctype_json + self.db[ctype_id] = ctype_json + self._new_objects.add(ctype_id) return ctype_id @@ -224,15 +280,19 @@ def getAttribute(self, obj_id, name, includeData=True): msg = f"Attribute: [{name }] not found in object: {obj_id}" self.log.info(msg) return None + if attrs[name] == None: + msg = f"Attribute: [{name}] has been deleted" + self.log.info(None) + return None attr_json = attrs[name] if includeData and "value" not in attr_json: # Reader may not have pre-loaded large attributes # fetch it now - if not self._reader: + if not self.reader: raise RuntimeError(f"Expected to find value for attribute {name} of {obj_id}") - attr_json = self._reader.get_attribute(obj_id, name) + attr_json = self.reader.get_attribute(obj_id, name) attr_json["value"] = attr_json # this will update the _db return attr_json @@ -245,8 +305,12 @@ def getAttributes(self, obj_id): obj_json = self.getObjectById(obj_id) attrs = obj_json["attributes"] + names = [] + for name in attrs: + if attrs[name] != None: + names.append(name) - return attrs + return names def getAttributeValue(self, obj_id, name): """ Return NDArray of the given attribute value """ @@ -277,7 +341,7 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None): ctype_id = dtype[len("datatypes/"):] if getCollectionForId(ctype_id) != "datatypes": raise TypeError(f"unexpected dtype value for createAttribute: {dtype}") - if ctype_id not in self._db: + if ctype_id not in self.db: raise KeyError(f"ctype: {ctype_id} not found") ctype_json = self.getObjectById(ctype_id) type_json = ctype_json["type"].copy() @@ -345,21 +409,21 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None): obj_json = self.getObjectById(obj_id) attrs_json = obj_json["attributes"] if name in attrs_json: - # replace, update modified timestamp + # replace, keep, created timestamp created = attrs_json["created"] - modified = time.time() else: created = time.time() - modified = None type_json = getTypeItem(dtype) # finally put it all together... attr_json = {"shape": shape_json, "type": type_json, "value": value_json} attr_json["created"] = created - attr_json["modified"] = modified # slot into the obj_json["attrs"] attrs_json[name] = attr_json + # mark object as dirty + self.make_dirty(obj_id) + def deleteAttribute(self, obj_id, name): """ delete the given attribute """ @@ -367,18 +431,93 @@ def deleteAttribute(self, obj_id, name): attrs_json = obj_json["attributes"] if name not in attrs_json: raise KeyError(f"attribute [{name}] not found in {obj_id}") - del attrs_json[name] + attrs_json[name] = None # mark key for deletion + + self.make_dirty(obj_id) - def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"): + def getDatasetValues(self, dset_id, sel): """ Get values from dataset identified by obj_id. If a slices list or tuple is provided, it should have the same number of elements as the rank of the dataset. """ - self.log.info(f"getDatasetValues obj_id: {obj_id}, slices: {slices} format: {format}") - #TBD - + self.log.info(f"getDatasetValues dset_id: {dset_id}, sel: {sel}") + dset_json = self.getObjectById(dset_id) + shape_json = dset_json["shape"] + if not isinstance(sel, selections.Selection): + raise TypeError("Expected Selection class") + + if shape_json["class"] == "H5S_NULL": + return None + + if shape_json["class"] == "H5S_SCALAR": + if sel.select_type != sel.H5S_SELECT_ALL: + # TBD: support other selection types + raise ValueError("Only SELECT_ALL selections are supported for scalar datasets") + if sel.shape != (): + raise ValueError("Selection shape does not match dataset shape") + else: + dims = tuple(shape_json["dims"]) + if sel.shape != dims: + raise ValueError("Selection shape does not match dataset shape") + rank = len(dims) + + dtype = self.getDtype(dset_id) + if self.reader: + arr = self.reader.getDatasetValues(dset_id, sel) + else: + # TBD: Initialize with fill value if non-zero + arr = np.zeros(sel.shape, dtype=dtype) + + if "updates" in dset_json: + # apply any non-flushed changes that intersect the current selection + updates = dset_json["updates"] + for (update_sel, update_val) in updates: + sel_inter = selections.intersect(sel, update_sel) + if sel_inter.nselect == 0: + continue + # update portion of arr, that intersects update_val + slices = [] + for dim in range(rank): + start = sel_inter.start[dim] - sel.start[dim] + stop = start + sel_inter.count[dim] + slices.append(slice(start, stop, 1)) + slices = tuple(slices) + arr[slices] = update_val + + return arr + + def setDatasetValues(self, dset_id, sel, arr): + """ + Write the given ndarray to the dataset using the selection + """ + dset_json = self.getObjectById(dset_id) + shape_json = dset_json["shape"] + if not isinstance(sel, selections.Selection): + raise TypeError("Expected Selection class") + if sel.select_type not in (selections.H5S_SELECT_HYPERSLABS, selections.H5S_SELECT_ALL): + # TBD: support other selection types + raise ValueError("Only hyperslab selections are currently supported") + if not isinstance(arr, np.ndarray): + raise TypeError("Expected ndarray for data value") + if shape_json["class"] == "H5S_NULL": + raise ValueError("writing to null space dataset not supported") + if shape_json["class"] == "H5S_SCALAR": + if sel.shape != (): + raise ValueError("Selection shape does not match dataset shape") + if len(arr.shape) > 0: + raise TypeError("Expected scalar ndarray for scalar dataset") + else: + dims = tuple(shape_json["dims"]) + if sel.shape != dims: + raise ValueError("Selection shape does not match dataset shape") + if "updates" not in dset_json or sel.select_type == selections.H5S_SELECT_ALL: + # for select all, throw out any existing updates since this will overwrite them + dset_json["updates"] = [] + updates = dset_json["updates"] + updates.append((sel, arr.copy())) + self.make_dirty(dset_id) def createDataset( self, @@ -414,8 +553,9 @@ def createDataset( kwds["cpl"] = cpl dset_json = make_new_dset(shape=shape, dtype=dtype, **kwds) - dset_id = createObjId("datasets", root_id=self._root_id) - self._db[dset_id] = dset_json + dset_id = createObjId("datasets", root_id=self.root_id) + self.db[dset_id] = dset_json + self._new_objects.add(dset_id) return dset_id @@ -426,18 +566,25 @@ def resizeDataset(self, dset_id, shape): self.log.info(f"resizeDataset {dset_id}, {shape}") dset_json = self.getObjectById(dset_id) # will throw exception if not found - resize_dataset(dset_json, shape) + if resize_dataset(dset_json, shape): + self._dirty_objects.add(dset_id) def deleteObject(self, obj_id): """ Delete the given object """ self.log.info(f"deleteObject: {obj_id}") - if obj_id not in self._db: + if obj_id not in self.db: raise KeyError(f"Object {obj_id} not found for deletion") - if obj_id == self._root_id: + if obj_id == self.root_id: raise KeyError("Root group cannot be deleted") - del self._db[obj_id] - # TBD: add to pending deleted items + self.db[obj_id] = None + + if obj_id in self._new_objects: + self._new_objects.remove(obj_id) + + if obj_id in self._dirty_objects: + self._dirty_objects.remove(obj_id) + def getLinks(self, grp_id): """ Get the links for the given group """ @@ -445,100 +592,113 @@ def getLinks(self, grp_id): if "links" not in grp_json: raise KeyError(f"No links - {grp_id} not a group?") links = grp_json["links"] - return links + names = [] + for name in links: + if links[name] != None: + names.append(name) + return names def getLink(self, grp_id, name): """ Get the given link """ - links = self.getLinks(grp_id) + obj_json = self.getObjectById(grp_id) + links = obj_json["links"] if name not in links: - raise KeyError(f"Link [{name}] not found in {grp_id}") + self.log.info(f"Link [{name}] not found in {grp_id}") + return None + if links[name] == None: + self.log.info(f"Link {name} in {grp_id} has been deleted") + return None + return links[name] + def _addLink(self, grp_id, name, link_json): + obj_json = self.getObjectById(grp_id) + links = obj_json["links"] + links[name] = link_json + self.make_dirty(grp_id) + def createHardLink(self, grp_id, name, tgt_id): """ Create a new hardlink """ - links = self.getLinks(grp_id) - if name in links: - self.deleteLink(grp_id, name) link_json = {"class": "H5L_TYPE_HARD", "id": tgt_id} link_json["created"] = time.time() - links[name] = link_json + self._addLink(grp_id, name, link_json) def createSoftLink(self, grp_id, name, h5path): """ Create a soft link """ - links = self.getLinks(grp_id) - if name in links: - self.deleteLink(grp_id, name) link_json = {"class": "H5L_TYPE_SOFT", "h5path": h5path} link_json["created"] = time.time() - links[name] = link_json + self._addLink(grp_id, name, link_json) def createCustomLink(self, grp_id, name, link_json): """ create a custom link """ - links = self.getLinks(grp_id) - if name in links: - self.deleteLink(grp_id, name) if link_json.get("class") != "H5L_TYPE_USER_DEFINED": link_json["class"] = "H5L_TYPE_USER_DEFINED" link_json["created"] = time.time() - links[name] = link_json - + self._addLink(grp_id, name, link_json) def createExternalLink(self, grp_id, name, h5path, filepath): """ Create a external link link """ - links = self.getLinks(grp_id) - if name in links: - self.deleteLink(grp_id, name) link_json = {"class": "H5L_TYPE_EXTERNAL", "h5path": h5path, "file": filepath} link_json["created"] = time.time() - links[name] = link_json + self._addLink(grp_id, name, link_json) def deleteLink(self, grp_id, name): """ Delete the given link """ grp_json = self.getObjectById(grp_id) if "links" not in grp_json: raise KeyError(f"No links - {grp_id} not a group?") - links = self.getLinks(grp_id) + links = grp_json["links"] if name not in links: raise KeyError(f"Link [{name}] not found in {grp_id}") - del links[name] - grp_json["modified"] = time.time() + links[name] = None # mark for deletion + self.make_dirty(grp_id) def createGroup(self, cpl=None): """ Create a new group """ - grp_id = createObjId("groups", root_id=self._root_id) + grp_id = createObjId("groups", root_id=self.root_id) group_json = {"attributes": {}, "links": {}} if cpl: group_json["cpl"] = cpl else: group_json["cpl"] = {} group_json["created"] = time.time() - group_json["modified"] = None - self._db[grp_id] = group_json + self.db[grp_id] = group_json + self._new_objects.add(grp_id) return grp_id def getCollection(self, col_type=None): obj_ids = [] - for obj_id in self._db: + for obj_id in self.db: + if self.db[obj_id] == None: + # skip deleted objects + continue if not col_type or getCollectionForId(obj_id) == col_type: obj_ids.append(obj_id) return obj_ids def __len__(self): # return the number of objects - return len(self._db) - + count = 0 + for obj_id in self.db: + # skip deleted objects + if self.db[obj_id] != None: + count += 1 + return count def __iter__(self): """ Iterate over object ids """ - for obj_id in self._db: + for obj_id in self.db: + if self.db[obj_id] == None: + # skip deleted objects + continue yield obj_id def __contains__(self, obj_id): """ Test if a obj id exists """ - return obj_id in self._db + return obj_id in self.db and self.db[obj_id] != None diff --git a/src/h5json/reader/h5py_reader.py b/src/h5json/reader/h5py_reader.py index dc9220a..4e7c9b5 100644 --- a/src/h5json/reader/h5py_reader.py +++ b/src/h5json/reader/h5py_reader.py @@ -42,13 +42,10 @@ def __init__( ): self._id_map = {} self._addr_map = {} - """ if app_logger: self.log = app_logger else: self.log = logging.getLogger() - self._filepath = filepath - """ super().__init__(filepath, app_logger=app_logger) f = h5py.File(self._filepath) self._f = f @@ -264,11 +261,19 @@ def getObjectById(self, obj_id, include_attrs=True, include_links=True): return obj_json - def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"): + def getDatasetValues(self, dset_id, selection): """ Get values from dataset identified by obj_id. If a slices list or tuple is provided, it should have the same number of elements as the rank of the dataset. """ - pass + dset = self._id_map[dset_id] + self.log.info(f"getDatasetValues: {dset_id}") + if dset.shape is None: + # TBD: return something like h5py.Empty in this case? + return None + arr = dset[selection] + return arr + + diff --git a/src/h5json/reader/h5reader.py b/src/h5json/reader/h5reader.py index 6a37a07..69a45d0 100644 --- a/src/h5json/reader/h5reader.py +++ b/src/h5json/reader/h5reader.py @@ -51,7 +51,7 @@ def getAttribute(self, obj_id, name, includeData=True): pass @abstractmethod - def getDatasetValues(self, obj_id, slices=Ellipsis, format="json"): + def getDatasetValues(self, obj_id, selection): """ Get values from dataset identified by obj_id. If a slices list or tuple is provided, it should have the same diff --git a/src/h5json/selections.py b/src/h5json/selections.py new file mode 100644 index 0000000..4d700d9 --- /dev/null +++ b/src/h5json/selections.py @@ -0,0 +1,834 @@ +############################################################################## +# Copyright by The HDF Group. # +# All rights reserved. # +# # +# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and # +# Utilities. The full HDF5 REST Server copyright notice, including # +# terms governing use, modification, and redistribution, is contained in # +# the file COPYING, which can be found at the root of the source code # +# distribution tree. If you do not have access to this file, you may # +# request a copy from help@hdfgroup.org. # +############################################################################## + +# We use __getitem__ side effects, which pylint doesn't like. +# pylint: disable=pointless-statement + +""" + High-level access to HDF5 dataspace selections +""" + +from __future__ import absolute_import + +import numpy as np + +H5S_SEL_POINTS = 0 +H5S_SELECT_SET = 1 +H5S_SELECT_APPEND = 2 +H5S_SELECT_PREPEND = 3 +H5S_SELECT_OR = 4 +H5S_SELECT_NONE = 5 +H5S_SELECT_ALL = 6 +H5S_SELECT_HYPERSLABS = 7 +H5S_SELECT_NOTB = 8 +H5S_SELLECT_FANCY = 9 + + +def select(obj, args): + """ High-level routine to generate a selection from arbitrary arguments + to __getitem__. The arguments should be the following: + + obj + Datatset object + + args + Either a single argument or a tuple of arguments. See below for + supported classes of argument. + + Argument classes: + + Single Selection instance + Returns the argument. + + numpy.ndarray + Must be a boolean mask. Returns a PointSelection instance. + + RegionReference + Returns a Selection instance. + + Indices, slices, ellipses only + Returns a SimpleSelection instance + + Indices, slices, ellipses, lists or boolean index arrays + Returns a FancySelection instance. + """ + if not isinstance(args, tuple): + args = (args,) + + if hasattr(obj, "shape") and obj.shape == (): + # scalar object + sel = ScalarSelection(obj.shape, args) + return sel + + # "Special" indexing objects + if len(args) == 1: + + arg = args[0] + + if isinstance(arg, Selection): + if arg.shape != obj.shape: + raise TypeError("Mismatched selection shape") + return arg + + elif isinstance(arg, np.ndarray) or isinstance(arg, list): + sel = PointSelection(obj.shape) + sel[arg] + return sel + """ + #todo - RegionReference + elif isinstance(arg, h5r.RegionReference): + sid = h5r.get_region(arg, dsid) + if shape != sid.shape: + raise TypeError("Reference shape does not match dataset shape") + + return Selection(shape, spaceid=sid) + """ + + for a in args: + use_fancy = False + if isinstance(a, np.ndarray): + use_fancy = True + elif a is []: + use_fancy = True + elif not isinstance(a, slice) and a is not Ellipsis: + try: + int(a) + except Exception: + use_fancy = True + if use_fancy and hasattr(obj, "shape"): + sel = FancySelection(obj.shape) + sel[args] + return sel + if hasattr(obj, "shape"): + sel = SimpleSelection(obj.shape) + else: + sel = SimpleSelection(obj) + sel[args] + return sel + +def intersect(s1, s2): + """ Return the intersection of two selections """ + # TBD: this is currently only working for simple selections with stride 1 + valid_select_types = (H5S_SELECT_HYPERSLABS, H5S_SELECT_ALL) + if not isinstance(s1, Selection): + raise TypeError("Expected selection type for first arg") + if not isinstance(s2, Selection): + raise TypeError("Expected selection type for second arg") + if s1.select_type not in valid_select_types: + raise TypeError("Expected hyperslab selection for first arg") + if s2.select_type not in valid_select_types: + raise TypeError("Expected hyperslab selection for second arg") + if s1.shape != s2.shape: + raise ValueError("selections have incompatible shapes") + + slices = [] + rank = len(s1.shape) + for dim in range(rank): + start = max(s1.start[dim], s2.start[dim]) + stop = min(s1.start[dim] + s1.count[dim], s2.start[dim] + s2.count[dim]) + msg = "stepped slices not currently supported" + if s1.step[dim] > 1: + raise ValueError(msg) + if s2.step[dim] > 1: + raise ValueError("stepped slices not currently supported") + if start > stop: + stop = start + slices.append(slice(start, stop, 1)) + slices = tuple(slices) + + return select(s1.shape, slices) + + +class Selection(object): + + """ + Base class for HDF5 dataspace selections. Subclasses support the + "selection protocol", which means they have at least the following + members: + + __init__(shape) => Create a new selection on "shape"-tuple + __getitem__(args) => Perform a selection with the range specified. + What args are allowed depends on the + particular subclass in use. + + id (read-only) => h5py.h5s.SpaceID instance + shape (read-only) => The shape of the dataspace. + mshape (read-only) => The shape of the selection region. + Not guaranteed to fit within "shape", although + the total number of points is less than + product(shape). + nselect (read-only) => Number of selected points. Always equal to + product(mshape). + + broadcast(target_shape) => Return an iterable which yields dataspaces + for read, based on target_shape. + + The base class represents "unshaped" selections (1-D). + """ + + def __init__(self, shape, *args, **kwds): + """ Create a selection. """ + + shape = tuple(shape) + self._shape = shape + + self._select_type = H5S_SELECT_ALL + + @property + def select_type(self): + """ SpaceID instance """ + return self._select_type + + @property + def shape(self): + """ Shape of whole dataspace """ + return self._shape + + @property + def nselect(self): + """ Number of elements currently selected """ + + return self.getSelectNpoints() + + @property + def mshape(self): + """ Shape of selection (always 1-D for this class) """ + return (self.nselect,) + + def getSelectNpoints(self): + npoints = None + if self._select_type == H5S_SELECT_NONE: + npoints = 0 + elif self._select_type == H5S_SELECT_ALL: + dims = self._shape + npoints = 1 + for nextent in dims: + npoints *= nextent + else: + raise IOError("Unsupported select type") + return npoints + + def broadcast(self, target_shape): + """ Get an iterable for broadcasting """ + if np.product(target_shape) != self.nselect: + raise TypeError("Broadcasting is not supported for point-wise selections") + yield self._id + + def __getitem__(self, args): + raise NotImplementedError("This class does not support indexing") + + def __repr__(self): + return f"Selection(shape:{self._shape})" + + +class PointSelection(Selection): + + """ + Represents a point-wise selection. You can supply sequences of + points to the three methods append(), prepend() and set(), or a + single boolean array to __getitem__. + """ + def __init__(self, shape, *args, **kwds): + """ Create a Point selection. """ + Selection.__init__(self, shape, *args, **kwds) + self._points = [] + + @property + def points(self): + """ selection points """ + return self._points + + def getSelectNpoints(self): + npoints = None + if self._select_type == H5S_SELECT_NONE: + npoints = 0 + elif self._select_type == H5S_SELECT_ALL: + dims = self._shape + npoints = 1 + for nextent in dims: + npoints *= nextent + elif self._select_type == H5S_SEL_POINTS: + dims = self._shape + rank = len(dims) + if len(self._points) == rank and not type(self._points[0]) in (list, tuple, np.ndarray): + npoints = 1 + else: + npoints = len(self._points) + else: + raise IOError("Unsupported select type") + return npoints + + def _perform_selection(self, points, op): + """ Internal method which actually performs the selection """ + if isinstance(points, np.ndarray) or True: + points = np.asarray(points, order='C', dtype='u8') + if len(points.shape) == 1: + # points.shape = (1,points.shape[0]) + pass + + if self._select_type != H5S_SEL_POINTS: + op = H5S_SELECT_SET + self._select_type = H5S_SEL_POINTS + + if op == H5S_SELECT_SET: + self._points = points + elif op == H5S_SELECT_APPEND: + self._points.extent(points) + elif op == H5S_SELECT_PREPEND: + tmp = self._points + self._points = points + self._points.extend(tmp) + else: + raise ValueError("Unsupported operation") + + # def _perform_list_selection(points, H5S_SELECT_SET): + + def __getitem__(self, arg): + """ Perform point-wise selection from a NumPy boolean array """ + if isinstance(arg, list): + points = arg + else: + if not (isinstance(arg, np.ndarray) and arg.dtype.kind == 'b'): + raise TypeError("PointSelection __getitem__ only works with bool arrays") + if not arg.shape == self._shape: + raise TypeError("Boolean indexing array has incompatible shape") + + points = np.transpose(arg.nonzero()) + self.set(points) + return self + + def append(self, points): + """ Add the sequence of points to the end of the current selection """ + self._perform_selection(points, H5S_SELECT_APPEND) + + def prepend(self, points): + """ Add the sequence of points to the beginning of the current selection """ + self._perform_selection(points, H5S_SELECT_PREPEND) + + def set(self, points): + """ Replace the current selection with the given sequence of points""" + """ + if isinstance(points, list): + # selection with list of points + self._perform_list_selection(points, H5S_SELECT_SET) + + else: + # selection with boolean ndarray + """ + self._perform_selection(points, H5S_SELECT_SET) + + def __repr__(self): + return f"PointSelection(shape:{self._shape}, {len(self._points)} points)" + + +class SimpleSelection(Selection): + + """ A single "rectangular" (regular) selection composed of only slices + and integer arguments. Can participate in broadcasting. + """ + + @property + def mshape(self): + """ Shape of current selection """ + return self._mshape + + @property + def start(self): + return self._sel[0] + + @property + def count(self): + return self._sel[1] + + @property + def step(self): + return self._sel[2] + + def __init__(self, shape, *args, **kwds): + Selection.__init__(self, shape, *args, **kwds) + rank = len(self._shape) + self._sel = ((0,) * rank, self._shape, (1,) * rank, (False,) * rank) + self._mshape = self._shape + self._select_type = H5S_SELECT_ALL + + def __getitem__(self, args): + + if not isinstance(args, tuple): + args = (args,) + + if self._shape == (): + if len(args) > 0 and args[0] not in (Ellipsis, ()): + raise TypeError("Invalid index for scalar dataset (only ..., () allowed)") + self._select_type = H5S_SELECT_ALL + return self + + start, count, step, scalar = _handle_simple(self._shape, args) + self._sel = (start, count, step, scalar) + + # self._id.select_hyperslab(start, count, step) + self._select_type = H5S_SELECT_HYPERSLABS + + self._mshape = tuple(x for x, y in zip(count, scalar) if not y) + + return self + + def getSelectNpoints(self): + """Return number of elements in current selection + """ + npoints = None + if self._select_type == H5S_SELECT_NONE: + npoints = 0 + elif self._select_type == H5S_SELECT_ALL: + dims = self._shape + npoints = 1 + for nextent in dims: + npoints *= nextent + elif self._select_type == H5S_SELECT_HYPERSLABS: + dims = self._shape + npoints = 1 + rank = len(dims) + for i in range(rank): + npoints *= self.count[i] + else: + raise IOError("Unsupported select type") + return npoints + + def getQueryParam(self): + """ Get select param for use with HDF Rest API""" + param = '' + rank = len(self._shape) + if rank == 0: + return None + + param += "[" + for i in range(rank): + start = self.start[i] + stop = start + (self.count[i] * self.step[i]) + if stop > self._shape[i]: + stop = self._shape[i] + dim_sel = str(start) + ':' + str(stop) + if self.step[i] != 1: + dim_sel += ':' + str(self.step[i]) + if i != rank - 1: + dim_sel += ',' + param += dim_sel + param += ']' + return param + + def broadcast(self, target_shape): + """ Return an iterator over target dataspaces for broadcasting. + + Follows the standard NumPy broadcasting rules against the current + selection shape (self._mshape). + """ + if self._shape == (): + if np.product(target_shape) != 1: + raise TypeError(f"Can't broadcast {target_shape} to scalar") + self._id.select_all() + yield self._id + return + + start, count, step, scalar = self._sel + + rank = len(count) + target = list(target_shape) + + tshape = [] + for idx in range(1, rank + 1): + if len(target) == 0 or scalar[-idx]: # Skip scalar axes + tshape.append(1) + else: + t = target.pop() + if t == 1 or count[-idx] == t: + tshape.append(t) + else: + raise TypeError(f"Can't broadcast {target_shape} -> {count}") + tshape.reverse() + tshape = tuple(tshape) + + chunks = tuple(x // y for x, y in zip(count, tshape)) + nchunks = int(np.product(chunks)) + + if nchunks == 1: + yield self._id + else: + sid = self._id.copy() + sid.select_hyperslab((0,) * rank, tshape, step) + for idx in range(nchunks): + offset = tuple(x * y * z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start)) + sid.offset_simple(offset) + yield sid + + def __repr__(self): + s = f"SimpleSelection(shape:{self._shape}, start: {self._sel[0]}," + s += f" count: {self._sel[1]}, step: {self._sel[2]}" + return s + + +class FancySelection(Selection): + + """ + Implements advanced NumPy-style selection operations in addition to + the standard slice-and-int behavior. + + Indexing arguments may be ints, slices, lists of indicies, or + per-axis (1D) boolean arrays. + + Broadcasting is not supported for these selections. + """ + + @property + def slices(self): + return self._slices + + @property + def mshape(self): + """ Shape of current selection """ + return self._mshape + + def __init__(self, shape, *args, **kwds): + Selection.__init__(self, shape, *args, **kwds) + self._slices = [] + + def __getitem__(self, args): + + if not isinstance(args, tuple): + args = (args,) + + args = _expand_ellipsis(args, len(self._shape)) + select_type = H5S_SELECT_HYPERSLABS # will adjust if we have a coord + + # Create list of slices and/or coordinates + slices = [] + mshape = [] + num_coordinates = None + for idx, arg in enumerate(args): + length = self._shape[idx] + if isinstance(arg, slice): + _, count, _ = _translate_slice(arg, length) # raise exception for invalid slice + if arg.start is None: + start = 0 + else: + start = arg.start + if arg.stop is None: + stop = length + else: + stop = arg.stop + if arg.step is None: + step = 1 + else: + step = arg.step + slices.append(slice(start, stop, step)) + mshape.append(count) + + elif hasattr(arg, 'dtype') and arg.dtype == np.dtype('bool'): + if len(arg.shape) != 1: + raise TypeError("Boolean indexing arrays must be 1-D") + arg = arg.nonzero()[0] + try: + slices.append(list(arg)) + except TypeError: + pass + else: + if sorted(arg) != list(arg): + raise TypeError("Indexing elements must be in increasing order") + mshape.append(len(arg)) + select_type = H5S_SELLECT_FANCY + elif isinstance(arg, list) or hasattr(arg, 'dtype'): + # coordinate selection + slices.append(arg) + for x in arg: + if x < 0 or x >= length: + raise IndexError(f"Index ({arg}) out of range (0-{length - 1})") + if num_coordinates is None: + num_coordinates = len(arg) + elif num_coordinates == len(arg): + # second set of coordinates doesn't effect mshape + continue + else: + # this shouldn't happen since HSDS would have thrown an error + raise ValueError("coordinate num element missmatch") + mshape.append(len(arg)) + select_type = H5S_SELLECT_FANCY + elif isinstance(arg, int): + if arg < 0 or arg >= length: + raise IndexError(f"Index ({arg}) out of range (0-{length - 1})") + slices.append(arg) + elif isinstance(arg, type(Ellipsis)): + slices.append(slice(0, length, 1)) + else: + raise TypeError(f"Unexpected arg type: {arg} - {type(arg)}") + self._slices = slices + self._select_type = select_type + self._mshape = tuple(mshape) + + def getSelectNpoints(self): + """Return number of elements in current selection + """ + npoints = 1 + for idx, s in enumerate(self._slices): + if isinstance(s, slice): + length = self._shape[idx] + _, count, _ = _translate_slice(s, length) + elif isinstance(s, list): + count = len(s) + else: + # scalar selection + count = 1 + npoints *= count + + return npoints + + def getQueryParam(self): + """ Get select param for use with HDF Rest API""" + query = [] + query.append('[') + rank = len(self._slices) + for dim, s in enumerate(self._slices): + if isinstance(s, slice): + if s.start is None and s.stop is None: + query.append(':') + elif s.stop is None: + query.append(f"{s.start}:") + else: + query.append(f"{s.start}:{s.stop}") + if s.step and s.step != 1: + query.append(f":{s.step}") + elif isinstance(s, list) or hasattr(s, 'dtype'): + query.append('[') + for idx, n in enumerate(s): + query.append(str(n)) + if idx + 1 < len(s): + query.append(',') + query.append(']') + else: + # scalar selection + query.append(str(s)) + if dim + 1 < rank: + query.append(',') + query.append(']') + return "".join(query) + + def broadcast(self, target_shape): + raise TypeError("Broadcasting is not supported for complex selections") + + def __repr__(self): + return f"FancySelection(shape:{self._shape}, slices: {self._slices})" + + +def _expand_ellipsis(args, rank): + """ Expand ellipsis objects and fill in missing axes. + """ + n_el = sum(1 for arg in args if arg is Ellipsis) + if n_el > 1: + raise ValueError("Only one ellipsis may be used.") + elif n_el == 0 and len(args) != rank: + args = args + (Ellipsis,) + + final_args = [] + n_args = len(args) + for arg in args: + + if arg is Ellipsis: + final_args.extend((slice(None, None, None),) * (rank - n_args + 1)) + else: + final_args.append(arg) + + if len(final_args) > rank: + raise TypeError("Argument sequence too long") + + return final_args + + +def _handle_simple(shape, args): + """ Process a "simple" selection tuple, containing only slices and + integer objects. Return is a 4-tuple with tuples for start, + count, step, and a flag which tells if the axis is a "scalar" + selection (indexed by an integer). + + If "args" is shorter than "shape", the remaining axes are fully + selected. + """ + args = _expand_ellipsis(args, len(shape)) + + start = [] + count = [] + step = [] + scalar = [] + + for arg, length in zip(args, shape): + if isinstance(arg, slice): + x, y, z = _translate_slice(arg, length) + s = False + else: + try: + x, y, z = _translate_int(int(arg), length) + s = True + except TypeError: + raise TypeError(f'Illegal index "{arg}" (must be a slice or number)') + start.append(x) + count.append(y) + step.append(z) + scalar.append(s) + + return tuple(start), tuple(count), tuple(step), tuple(scalar) + + +def _translate_int(exp, length): + """ Given an integer index, return a 3-tuple + (start, count, step) + for hyperslab selection + """ + if exp < 0: + exp = length + exp + + if not 0 <= exp < length: + raise IndexError(f"Index ({exp}) out of range (0-{length - 1})") + + return exp, 1, 1 + + +def _translate_slice(exp, length): + """ Given a slice object, return a 3-tuple + (start, count, step) + for use with the hyperslab selection routines + """ + start, stop, step = exp.indices(length) + # Now if step > 0, then start and stop are in [0, length]; + # if step < 0, they are in [-1, length - 1] (Python 2.6b2 and later; + # Python issue 3004). + + if step < 1: + raise ValueError("Step must be >= 1 (got %d)" % step) + if stop < start: + stop = start + + count = 1 + (stop - start - 1) // step + + return start, count, step + + +def guess_shape(sid): + """ Given a dataspace, try to deduce the shape of the selection. + + Returns one of: + * A tuple with the selection shape, same length as the dataspace + * A 1D selection shape for point-based and multiple-hyperslab selections + * None, for unselected scalars and for NULL dataspaces + """ + + sel_class = sid.get_simple_extent_type() # Dataspace class + sel_type = sid.get_select_type() # Flavor of selection in use + + if sel_class == 'H5S_NULL': + # NULL dataspaces don't support selections + return None + + elif sel_class == 'H5S_SCALAR': + # NumPy has no way of expressing empty 0-rank selections, so we use None + if sel_type == H5S_SELECT_NONE: + return None + if sel_type == H5S_SELECT_ALL: + return tuple() + + elif sel_class != 'H5S_SIMPLE': + raise TypeError(f"Unrecognized dataspace class {sel_class}") + + # We have a "simple" (rank >= 1) dataspace + + N = sid.get_select_npoints() + rank = len(sid.shape) + + if sel_type == H5S_SELECT_NONE: + return (0,) * rank + + elif sel_type == H5S_SELECT_ALL: + return sid.shape + + elif sel_type == H5S_SEL_POINTS: + # Like NumPy, point-based selections yield 1D arrays regardless of + # the dataspace rank + return (N,) + + elif sel_type != H5S_SELECT_HYPERSLABS: + raise TypeError(f"Unrecognized selection method {sel_type}") + + # We have a hyperslab-based selection + + if N == 0: + return (0,) * rank + + bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds()) + + # Shape of full selection box + boxshape = topcorner - bottomcorner + np.ones((rank,)) + + def get_n_axis(sid, axis): + """ Determine the number of elements selected along a particular axis. + + To do this, we "mask off" the axis by making a hyperslab selection + which leaves only the first point along the axis. For a 2D dataset + with selection box shape (X, Y), for axis 1, this would leave a + selection of shape (X, 1). We count the number of points N_leftover + remaining in the selection and compute the axis selection length by + N_axis = N/N_leftover. + """ + + if (boxshape[axis]) == 1: + return 1 + + start = bottomcorner.copy() + start[axis] += 1 + count = boxshape.copy() + count[axis] -= 1 + + # Throw away all points along this axis + masked_sid = sid.copy() + masked_sid.select_hyperslab(tuple(start), tuple(count), op=H5S_SELECT_NOTB) + + N_leftover = masked_sid.get_select_npoints() + + return N // N_leftover + + shape = tuple(get_n_axis(sid, x) for x in range(rank)) + + if np.product(shape) != N: + # This means multiple hyperslab selections are in effect, + # so we fall back to a 1D shape + return (N,) + + return shape + + +class ScalarSelection(Selection): + + """ + Implements slicing for scalar datasets. + """ + + @property + def mshape(self): + return self._mshape + + def __init__(self, shape, *args, **kwds): + Selection.__init__(self, shape, *args, **kwds) + arg = None + if len(args) > 0: + arg = args[0] + if arg == (): + self._mshape = None + self._select_type = H5S_SELECT_ALL + elif arg == (Ellipsis,): + self._mshape = () + self._select_type = H5S_SELECT_ALL + else: + raise ValueError("Illegal slicing argument for scalar dataspace") diff --git a/src/h5json/writer/h5json_writer.py b/src/h5json/writer/h5json_writer.py index 81f9b4f..fb2c8a7 100644 --- a/src/h5json/writer/h5json_writer.py +++ b/src/h5json/writer/h5json_writer.py @@ -14,6 +14,8 @@ from .h5writer import H5Writer from ..objid import stripId, getCollectionForId +from ..array_util import bytesArrayToList +from .. import selections class H5JsonWriter(H5Writer): """ @@ -39,6 +41,7 @@ def flush(self): # json writer doesn't support incremental updates, so we'll wait # for close to write out database self.log.info("flush") + return False def close(self): """ close storage handle """ @@ -86,7 +89,7 @@ def dumpAttribute(self, obj_id, attr_name): response = {"name": attr_name} response["type"] = item["type"] response["shape"] = item["shape"] - if True: #not self.options.D: + if True: if "value" not in item: self.log.warning("no value key in attribute: " + attr_name) else: @@ -173,10 +176,18 @@ def dumpDataset(self, obj_id): shape_rsp = {} num_elements = 1 shape_rsp["class"] = shapeItem["class"] - if "dims" in shapeItem: + if shapeItem["class"] == "H5S_NULL": + dims = None + num_elements = 0 + elif shapeItem["class"] == "H5S_SCALAR": + dims = () + num_elements = 1 + else: shape_rsp["dims"] = shapeItem["dims"] - for dim in shapeItem["dims"]: - num_elements *= dim + dims = tuple(shapeItem["dims"]) + for extent in dims: + num_elements *= extent + if "maxdims" in shapeItem: maxdims = [] for dim in shapeItem["maxdims"]: @@ -196,8 +207,9 @@ def dumpDataset(self, obj_id): if not self._no_data: if num_elements > 0: - value = self.db.getDatasetValues(obj_id) - response["value"] = value # dump values unless header flag was passed + sel_all = selections.select(dims, ...) + arr = self.db.getDatasetValues(obj_id, sel_all) + response["value"] = bytesArrayToList(arr) # dump values unless header flag was passed else: response["value"] = [] # empty list return response diff --git a/test/unit/h5json_writer_test.py b/test/unit/h5json_writer_test.py index 47ff3b1..df69f02 100644 --- a/test/unit/h5json_writer_test.py +++ b/test/unit/h5json_writer_test.py @@ -17,6 +17,7 @@ from h5json.writer.h5json_writer import H5JsonWriter from h5json.objid import isRootObjId, isValidUuid, isSchema2Id, stripId from h5json.hdf5dtype import special_dtype, Reference +from h5json import selections class H5JsonWriterTest(unittest.TestCase): @@ -45,7 +46,7 @@ def __init__(self, *args, **kwargs): def testGroup(self): - with Hdf5db(h5_writer=H5JsonWriter("/tmp/foo.json", no_data=True), app_logger=self.log) as db: + with Hdf5db(h5_writer=H5JsonWriter("/tmp/foo.json", no_data=False), app_logger=self.log) as db: root_id = db.getObjectIdByPath("/") db.createAttribute(root_id, "attr1", value=[1,2,3,4]) db.createAttribute(root_id, "attr2", 42) @@ -57,6 +58,12 @@ def testGroup(self): g1_1_id = db.createGroup() db.createHardLink(g1_id, "g1.1", g1_1_id) dset_111_id = db.createDataset(shape=(10,10), dtype=np.int32) + arr = np.zeros((10, 10), dtype=np.int32) + for i in range(10): + for j in range(10): + arr[i, j] = i * j + sel_all = selections.select((10, 10), ...) + db.setDatasetValues(dset_111_id, sel_all, arr) db.createHardLink(g1_1_id, "dset1.1.1", dset_111_id) db.createSoftLink(g2_id, "slink", "somewhere") db.createExternalLink(g2_id, "extlink", "somewhere", "someplace") @@ -77,7 +84,6 @@ def testNullSpaceAttribute(self): self.assertTrue("class" in shape_item) self.assertEqual(shape_item["class"], "H5S_NULL") self.assertTrue(item["created"] > time.time() - 1.0) - self.assertEqual(item["modified"], None) value = db.getAttributeValue(root_id, "A1") self.assertEqual(value, None) @@ -98,7 +104,6 @@ def testScalarAttribute(self): self.assertEqual(item["value"], 42) now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) shape = item["shape"] self.assertEqual(shape["class"], "H5S_SCALAR") @@ -122,7 +127,6 @@ def testFixedStringAttribute(self): self.assertEqual(item["value"], "Hello, world!") now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) ret_value = db.getAttributeValue(root_id, "A1") @@ -147,7 +151,6 @@ def testVlenAsciiAttribute(self): self.assertEqual(item["value"], "Hello, world!") now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) def testVlenUtf8Attribute(self): with Hdf5db(app_logger=self.log) as db: @@ -170,8 +173,6 @@ def testVlenUtf8Attribute(self): self.assertEqual(item["value"], "Hello, world!") now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) - def testIntAttribute(self): @@ -183,7 +184,6 @@ def testIntAttribute(self): self.assertEqual(item["value"], [2, 3, 5, 7, 11]) now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) item_shape = item["shape"] self.assertEqual(item_shape["class"], "H5S_SIMPLE") self.assertEqual(item_shape["dims"], [5,]) @@ -257,7 +257,6 @@ def testCommittedType(self): item = db.getObjectById(ctype_id) now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) item_type = item["type"] @@ -294,7 +293,6 @@ def testCommittedCompoundType(self): item = db.getObjectById(ctype_id) now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) item_type = item["type"] diff --git a/test/unit/hdf5db_test.py b/test/unit/hdf5db_test.py index 2c2812d..8931dd9 100755 --- a/test/unit/hdf5db_test.py +++ b/test/unit/hdf5db_test.py @@ -14,6 +14,7 @@ import logging import numpy as np from h5json import Hdf5db +from h5json import selections from h5json.objid import isRootObjId, isValidUuid, isSchema2Id from h5json.hdf5dtype import special_dtype, Reference @@ -43,7 +44,6 @@ def __init__(self, *args, **kwargs): def testGroup(self): - with Hdf5db(app_logger=self.log) as db: root_id = db.getObjectIdByPath("/") self.assertTrue(isSchema2Id(root_id)) @@ -120,15 +120,11 @@ def testGroup(self): except KeyError: pass # expected - try: - db.getLink(g2_id, "not_a_link") - self.assertTrue(False) - except KeyError: - pass # expected + ret = db.getLink(g2_id, "not_a_link") + self.assertTrue(ret is None) def testNullSpaceAttribute(self): - with Hdf5db(app_logger=self.log) as db: root_id = db.getObjectIdByPath("/") db.createAttribute(root_id, "A1", None, shape="H5S_NULL", dtype=np.int32) @@ -138,7 +134,6 @@ def testNullSpaceAttribute(self): self.assertTrue("class" in shape_item) self.assertEqual(shape_item["class"], "H5S_NULL") self.assertTrue(item["created"] > time.time() - 1.0) - self.assertEqual(item["modified"], None) value = db.getAttributeValue(root_id, "A1") self.assertEqual(value, None) @@ -159,7 +154,6 @@ def testScalarAttribute(self): self.assertEqual(item["value"], 42) now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) shape = item["shape"] self.assertEqual(shape["class"], "H5S_SCALAR") @@ -183,8 +177,8 @@ def testFixedStringAttribute(self): self.assertEqual(item["value"], "Hello, world!") now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) ret_value = db.getAttributeValue(root_id, "A1") + self.assertEqual(ret_value, value.encode("ascii")) def testVlenAsciiAttribute(self): @@ -208,7 +202,6 @@ def testVlenAsciiAttribute(self): self.assertEqual(item["value"], "Hello, world!") now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) def testVlenUtf8Attribute(self): with Hdf5db(app_logger=self.log) as db: @@ -231,8 +224,6 @@ def testVlenUtf8Attribute(self): self.assertEqual(item["value"], "Hello, world!") now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) - def testIntAttribute(self): @@ -244,7 +235,6 @@ def testIntAttribute(self): self.assertEqual(item["value"], [2, 3, 5, 7, 11]) now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) item_shape = item["shape"] self.assertEqual(item_shape["class"], "H5S_SIMPLE") self.assertEqual(item_shape["dims"], [5,]) @@ -318,7 +308,6 @@ def testCommittedType(self): item = db.getObjectById(ctype_id) now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) item_type = item["type"] @@ -337,7 +326,6 @@ def testCommittedType(self): self.assertEqual(attr_type["length"], 15) self.assertEqual(attr_type["charSet"], "H5T_CSET_ASCII") - def testCommittedCompoundType(self): with Hdf5db(app_logger=self.log) as db: root_id = db.getObjectIdByPath("/") @@ -355,7 +343,6 @@ def testCommittedCompoundType(self): item = db.getObjectById(ctype_id) now = int(time.time()) self.assertTrue(item["created"] > now - 1) - self.assertEqual(item["modified"], None) item_type = item["type"] @@ -376,6 +363,59 @@ def testCommittedCompoundType(self): value = db.getAttributeValue(root_id, "A1") self.assertTrue(isinstance(value, np.ndarray)) + + def testSimpleDataset(self): + with Hdf5db(app_logger=self.log) as db: + nrows = 8 + ncols = 10 + shape = (nrows, ncols) + dtype = np.int32 + root_id = db.getObjectIdByPath("/") + dset_id = db.createDataset(shape, dtype=dtype) + db.createHardLink(root_id, "dset", dset_id) + db.createAttribute(dset_id, "a1", "Hello, world") + sel_all = selections.select(shape, ...) + arr = db.getDatasetValues(dset_id, sel_all) + self.assertEqual(arr.dtype, dtype) + self.assertEqual(arr.shape, shape) + self.assertEqual(arr.min(), 0) + self.assertEqual(arr.max(), 0) + row = np.zeros((ncols,), dtype=dtype) + for i in range(nrows): + row[:] = list(range(i*10, (i + 1)*10)) + row_sel = selections.select(shape, (slice(i, i + 1), slice(0, ncols))) + db.setDatasetValues(dset_id, row_sel, row) + arr = db.getDatasetValues(dset_id, sel_all) + for i in range(nrows): + row = np.array(list(range(i*10, (i + 1)*10)), dtype=dtype) + np.testing.assert_array_equal(arr[i, :], row) + + + def testScalarDataset(self): + dtype = np.int32 + with Hdf5db(app_logger=self.log) as db: + root_id = db.getObjectIdByPath("/") + dset_id = db.createDataset((), dtype=dtype) + db.createHardLink(root_id, "dset", dset_id) + db.createAttribute(dset_id, "a1", "Hello, world") + sel_all = selections.select((), ...) + arr = db.getDatasetValues(dset_id, sel_all) + self.assertEqual(arr.dtype, dtype) + self.assertEqual(arr.shape, ()) + self.assertEqual(arr[()], 0) + db.setDatasetValues(dset_id, sel_all, np.array(42, dtype=dtype)) + arr = db.getDatasetValues(dset_id, sel_all) + self.assertEqual(arr.dtype, dtype) + self.assertEqual(arr.shape, ()) + self.assertEqual(arr.min(), 42) + self.assertEqual(arr.max(), 42) + + + + + + + if __name__ == "__main__":