Skip to content

Commit 72db5cc

Browse files
authored
Merge pull request #209 from ironArray/fixCaching
Improve security of caching
2 parents bea26d9 + d20ed79 commit 72db5cc

File tree

2 files changed

+47
-4
lines changed

2 files changed

+47
-4
lines changed

caterva2/services/sub.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,8 @@ async def fetch_data(
817817
if field:
818818
raise ArgumentError("Cannot handle both field and filter parameters at the same time")
819819
filter = filter.strip()
820-
container, _ = get_filtered_array(abspath, path, filter, sortby=None)
820+
mtime = abspath.stat().st_mtime
821+
container, _ = get_filtered_array(abspath, path, filter, sortby=None, mtime=mtime)
821822
else:
822823
container = open_b2(abspath, path)
823824

@@ -1900,9 +1901,9 @@ async def htmx_path_info(
19001901
return response
19011902

19021903

1903-
# Global dictionary to store objects
1904+
# Added mtime to implicitly check when underlying files are changed, and so can't use cache (see issue #207)
19041905
@functools.lru_cache(maxsize=16)
1905-
def get_filtered_array(abspath, path, filter, sortby):
1906+
def get_filtered_array(abspath, path, filter, sortby, mtime):
19061907
arr = open_b2(abspath, path)
19071908
has_ndfields = hasattr(arr, "fields") and arr.fields != {}
19081909
assert has_ndfields
@@ -1954,7 +1955,8 @@ async def htmx_path_view(
19541955
filter = filter.strip()
19551956
if filter or sortby:
19561957
try:
1957-
arr, idx = get_filtered_array(abspath, path, filter, sortby)
1958+
mtime = abspath.stat().st_mtime
1959+
arr, idx = get_filtered_array(abspath, path, filter, sortby, mtime)
19581960
except TypeError as exc:
19591961
return htmx_error(request, f"Error in filter: {exc}")
19601962
except NameError as exc:

caterva2/tests/test_api.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,47 @@ def test_lazyexpr_fields(auth_client):
758758
[np.testing.assert_array_equal(servered[f], downloaded[f]) for f in downloaded.dtype.fields]
759759

760760

761+
def test_lazyexpr_cache(auth_client):
762+
if not auth_client:
763+
pytest.skip("authentication support needed")
764+
765+
root = auth_client.get("@personal")
766+
oppt = f"{root.name}/sa-1M.b2nd"
767+
768+
N = 1000
769+
rng = np.random.default_rng(seed=1)
770+
it = ((-x + 1, x - 2, x) for x in range(N))
771+
sa = blosc2.fromiter(
772+
it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), urlpath="sa-1M.b2nd", mode="w"
773+
)
774+
path = auth_client.upload("sa-1M.b2nd", oppt)
775+
arr = auth_client.get(path)
776+
777+
# Test a lazyexpr
778+
arr = auth_client.get(oppt)
779+
servered = arr["(A < 500) & (B >= .1)"][:]
780+
downloaded = arr.slice(None)["(A < 500) & (B >= .1)"][:]
781+
[np.testing.assert_array_equal(servered[f], downloaded[f]) for f in downloaded.dtype.fields]
782+
783+
# Overwrite the file and check that cache isn't used
784+
N = 10000
785+
rng = np.random.default_rng(seed=1)
786+
it = ((-x + 1, x - 2, x) for x in range(N))
787+
sa = blosc2.fromiter(
788+
it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), urlpath="sa-1M.b2nd", mode="w"
789+
)
790+
path = auth_client.upload("sa-1M.b2nd", oppt)
791+
arr = auth_client.get(path)
792+
793+
# Test lazyexpr again
794+
servered = arr["(A < - 500) & (B >= .1)"][:]
795+
downloaded = arr.slice(None)["(A < - 500) & (B >= .1)"][:]
796+
[np.testing.assert_allclose(servered[f], downloaded[f]) for f in downloaded.dtype.fields]
797+
798+
# remove file
799+
arr.remove()
800+
801+
761802
def test_expr_from_expr(auth_client):
762803
if not auth_client:
763804
pytest.skip("authentication support needed")

0 commit comments

Comments
 (0)