From 9f742a92255fa4c37f9c060b099edf0a46172814 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 23 May 2025 16:39:24 +0200 Subject: [PATCH 1/2] Adding field-indexing to caterva2 --- caterva2/client.py | 30 ++++++++++++++++++------------ caterva2/services/sub.py | 17 ++++++++++++----- caterva2/tests/test_api.py | 19 +++++++++++++++++++ 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/caterva2/client.py b/caterva2/client.py index 5a04c65c..6c8f975e 100644 --- a/caterva2/client.py +++ b/caterva2/client.py @@ -340,7 +340,10 @@ def __getitem__(self, key): >>> ds[0:10] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) """ - return self.slice(key, as_blosc2=False) + if isinstance(key, str): # used a filter or field to index so want blosc2 array as result + return self.client.get_slice(self.path, key, as_blosc2=True) + else: + return self.slice(key, as_blosc2=False) def slice( self, key: int | slice | Sequence[slice], as_blosc2: bool = True @@ -868,20 +871,19 @@ def fetch(self, path, slice_=None): [(1.0000500e-02, 1.0100005), (1.0050503e-02, 1.0100505)]], dtype=[('a', '= .1"] + downloaded = arr.slice(None)["a < 500 & b >= .1"] + np.testing.assert_allclose(servered[:], downloaded[:]) + + def test_expr_from_expr(auth_client): if not auth_client: pytest.skip("authentication support needed") From f2bd661fac0018e737977bc485eeba25f4ae7f25 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 27 May 2025 12:28:21 +0200 Subject: [PATCH 2/2] Enabled field filtering for Proxy sources --- caterva2/client.py | 27 ++++++++++++++++++++------- caterva2/services/sub.py | 14 ++++++++++++-- caterva2/tests/test_api.py | 7 +++---- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/caterva2/client.py b/caterva2/client.py index 6c8f975e..ff541818 100644 --- a/caterva2/client.py +++ b/caterva2/client.py @@ -313,13 +313,13 @@ def get_download_url(self): """ return api_utils.get_download_url(self.path, self.urlbase) - def __getitem__(self, key): + def __getitem__(self, item): """ Retrieves a slice of the dataset. Parameters ---------- - key : int, slice, tuple of ints and slices, or None + item : int, slice, tuple of ints and slices, or None Specifies the slice to fetch. Returns @@ -340,10 +340,17 @@ def __getitem__(self, key): >>> ds[0:10] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) """ - if isinstance(key, str): # used a filter or field to index so want blosc2 array as result - return self.client.get_slice(self.path, key, as_blosc2=True) + if isinstance(item, str): # used a filter or field to index so want blosc2 array as result + fields = np.dtype(eval(self.dtype)).fields + if fields is None: + raise ValueError("The array is not structured (its dtype does not have fields)") + if item in fields: + # A shortcut to access fields + return self.client.get_slice(self.path, as_blosc2=True, field=item) # arg key is None + else: # used a filter (possibly lazyexpr) + return self.client.get_slice(self.path, item, as_blosc2=True) else: - return self.slice(key, as_blosc2=False) + return self.slice(item, as_blosc2=False) def slice( self, key: int | slice | Sequence[slice], as_blosc2: bool = True @@ -874,7 +881,7 @@ def fetch(self, path, slice_=None): # Does the same as get_slice but forces return of np array return self.get_slice(path, key=slice_, as_blosc2=False) - def get_slice(self, path, key=None, as_blosc2=True): + def get_slice(self, path, key=None, as_blosc2=True, field=None): """Get a slice of a File/Dataset. Parameters @@ -888,6 +895,8 @@ def get_slice(self, path, key=None, as_blosc2=True): If True (default), the result will be returned as a Blosc2 object (either a `SChunk` or `NDArray`). If False, it will be returned as a NumPy array (equivalent to `self[key]`). + field: str + Shortcut to access a field in a structured array. If provided, `key` is ignored. Returns ------- @@ -904,7 +913,11 @@ def get_slice(self, path, key=None, as_blosc2=True): dtype=[('a', '= .1"] - downloaded = arr.slice(None)["a < 500 & b >= .1"] - np.testing.assert_allclose(servered[:], downloaded[:]) + servered = arr["(a < 500) & (b >= .1)"][:] + downloaded = arr.slice(None)["(a < 500) & (b >= .1)"][:] + [np.testing.assert_array_equal(servered[f], downloaded[f]) for f in downloaded.dtype.fields] def test_expr_from_expr(auth_client):