Skip to content

Commit f41d672

Browse files
Fix memory leak in metadata cache (#442)
* Fix memory leak in metadata cache
1 parent 61d0174 commit f41d672

File tree

5 files changed

+143
-17
lines changed

5 files changed

+143
-17
lines changed

docs/release_notes/version_0.10_updates.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@ Version 0.10 Updates
22
/////////////////////////
33

44

5+
Version 0.10.1
6+
===============
7+
8+
Fixes
9+
++++++
10+
11+
- Fixed memory leak in GRIB field metadata cache
12+
13+
514
Version 0.10.0
615
===============
716

src/earthkit/data/core/metadata.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,10 @@
99

1010
from abc import ABCMeta
1111
from abc import abstractmethod
12-
from functools import lru_cache
1312

1413
from earthkit.data.core.constants import DATETIME
1514
from earthkit.data.core.constants import GRIDSPEC
1615

17-
try:
18-
from functools import cache as memoise # noqa
19-
except ImportError:
20-
memoise = lru_cache
21-
2216

2317
class Metadata(metaclass=ABCMeta):
2418
r"""Base class to represent metadata.
@@ -54,8 +48,10 @@ class Metadata(metaclass=ABCMeta):
5448
def __init__(self, extra=None, cache=False):
5549
if extra is not None:
5650
self.extra = extra
57-
if cache:
58-
self.get = memoise(self.get)
51+
if cache is False:
52+
self._cache = None
53+
else:
54+
self._cache = dict() if cache is True else cache
5955

6056
def __iter__(self):
6157
"""Return an iterator over the metadata keys."""
@@ -196,12 +192,21 @@ def get(self, key, default=None, *, astype=None, raise_on_missing=False):
196192
a missing value.
197193
198194
"""
195+
if self._cache is not None:
196+
cache_id = (key, default, astype, raise_on_missing)
197+
if cache_id in self._cache:
198+
return self._cache[cache_id]
199+
199200
if self._is_extra_key(key):
200201
v = self._get_extra_key(key, default=default, astype=astype)
201202
elif self._is_custom_key(key):
202203
v = self._get_custom_key(key, default=default, astype=astype, raise_on_missing=raise_on_missing)
203204
else:
204205
v = self._get(key, default=default, astype=astype, raise_on_missing=raise_on_missing)
206+
207+
if self._cache is not None:
208+
self._cache[cache_id] = v
209+
205210
return v
206211

207212
@abstractmethod

src/earthkit/data/readers/grib/codes.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ def _metadata(self):
285285
cache = False
286286
if self._manager is not None:
287287
cache = self._manager.use_grib_metadata_cache
288+
if cache:
289+
cache = self._manager._make_metadata_cache()
288290
return GribFieldMetadata(self, cache=cache)
289291

290292
def __repr__(self):
@@ -329,10 +331,10 @@ def message(self):
329331
def _diag(self):
330332
r = r = defaultdict(int)
331333
try:
332-
md_cache = self._metadata.get.cache_info()
334+
md_cache = self._metadata._cache
335+
r["metadata_cache_size"] += len(md_cache)
333336
r["metadata_cache_hits"] += md_cache.hits
334337
r["metadata_cache_misses"] += md_cache.misses
335-
r["metadata_cache_size"] += md_cache.currsize
336338
except Exception:
337339
pass
338340
return r

src/earthkit/data/readers/grib/index/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,9 @@ def _field_created(self):
306306
def _handle_created(self):
307307
self.handle_create_count += 1
308308

309+
def _make_metadata_cache(self):
310+
return dict()
311+
309312
def diag(self):
310313
r = defaultdict(int)
311314
r["grib_field_policy"] = self.grib_field_policy

tests/grib/test_grib_cache.py

Lines changed: 114 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,44 @@
1616
from earthkit.data.testing import earthkit_examples_file
1717

1818

19+
class TestMetadataCache:
20+
def __init__(self):
21+
self.hits = 0
22+
self.misses = 0
23+
self.data = {}
24+
25+
def __contains__(self, key):
26+
return key in self.data
27+
28+
def __getitem__(self, key):
29+
self.hits += 1
30+
return self.data[key]
31+
32+
def __setitem__(self, key, value):
33+
self.misses += 1
34+
self.data[key] = value
35+
36+
def __len__(self):
37+
return len(self.data)
38+
39+
40+
@pytest.fixture
41+
def patch_metadata_cache(monkeypatch):
42+
from earthkit.data.readers.grib.index import GribResourceManager
43+
44+
def patched_make_metadata_cache(self):
45+
return TestMetadataCache()
46+
47+
monkeypatch.setattr(GribResourceManager, "_make_metadata_cache", patched_make_metadata_cache)
48+
49+
1950
def _check_diag(diag, ref):
2051
for k, v in ref.items():
2152
assert diag[k] == v, f"{k}={diag[k]} != {v}"
2253

2354

2455
@pytest.mark.parametrize("handle_cache_size", [1, 5])
25-
def test_grib_cache_basic(handle_cache_size):
56+
def test_grib_cache_basic(handle_cache_size, patch_metadata_cache):
2657

2758
with settings.temporary(
2859
{
@@ -99,7 +130,81 @@ def test_grib_cache_basic(handle_cache_size):
99130
assert ds[0].handle == md._handle
100131

101132

102-
def test_grib_cache_options_1():
133+
def test_grib_cache_basic_non_patched():
134+
"""This test is the same as test_grib_cache_basic but without the patch_metadata_cache fixture.
135+
So metadata cache hits and misses are not counted."""
136+
with settings.temporary(
137+
{
138+
"grib-field-policy": "persistent",
139+
"grib-handle-policy": "cache",
140+
"grib-handle-cache-size": 1,
141+
"use-grib-metadata-cache": True,
142+
}
143+
):
144+
ds = from_source("file", earthkit_examples_file("tuv_pl.grib"))
145+
assert len(ds) == 18
146+
147+
cache = ds._manager
148+
assert cache
149+
150+
# unique values
151+
ref_vals = ds.unique_values("paramId", "levelist", "levtype", "valid_datetime")
152+
153+
ref = {
154+
"field_cache_size": 18,
155+
"field_create_count": 18,
156+
"handle_cache_size": 1,
157+
"handle_create_count": 18,
158+
"current_handle_count": 0,
159+
# "metadata_cache_hits": 0,
160+
# "metadata_cache_misses": 18 * 6,
161+
"metadata_cache_size": 18 * 6,
162+
}
163+
_check_diag(ds._diag(), ref)
164+
165+
for i, f in enumerate(ds):
166+
assert i in cache.field_cache, f"{i} not in cache"
167+
assert id(f) == id(cache.field_cache[i]), f"{i} not the same object"
168+
169+
_check_diag(ds._diag(), ref)
170+
171+
# unique values repeated
172+
vals = ds.unique_values("paramId", "levelist", "levtype", "valid_datetime")
173+
174+
assert vals == ref_vals
175+
176+
ref = {
177+
"field_cache_size": 18,
178+
"field_create_count": 18,
179+
"handle_cache_size": 1,
180+
"handle_create_count": 18,
181+
"current_handle_count": 0,
182+
# "metadata_cache_hits": 18 * 4,
183+
# "metadata_cache_misses": 18 * 6,
184+
"metadata_cache_size": 18 * 6,
185+
}
186+
_check_diag(ds._diag(), ref)
187+
188+
# order by
189+
ds.order_by(["levelist", "valid_datetime", "paramId", "levtype"])
190+
ref = {
191+
"field_cache_size": 18,
192+
"field_create_count": 18,
193+
"handle_cache_size": 1,
194+
"handle_create_count": 18,
195+
"current_handle_count": 0,
196+
# "metadata_cache_misses": 18 * 6,
197+
"metadata_cache_size": 18 * 6,
198+
}
199+
_check_diag(ds._diag(), ref)
200+
201+
# metadata object is not decoupled from the field object
202+
md = ds[0].metadata()
203+
assert hasattr(md, "_field")
204+
assert ds[0].handle == md._handle
205+
206+
207+
def test_grib_cache_options_1(patch_metadata_cache):
103208
with settings.temporary(
104209
{
105210
"grib-field-policy": "persistent",
@@ -179,7 +284,7 @@ def test_grib_cache_options_1():
179284
_check_diag(ds._diag(), ref)
180285

181286

182-
def test_grib_cache_options_2():
287+
def test_grib_cache_options_2(patch_metadata_cache):
183288
with settings.temporary(
184289
{
185290
"grib-field-policy": "persistent",
@@ -261,7 +366,7 @@ def test_grib_cache_options_2():
261366
_check_diag(ds._diag(), ref)
262367

263368

264-
def test_grib_cache_options_3():
369+
def test_grib_cache_options_3(patch_metadata_cache):
265370
with settings.temporary(
266371
{
267372
"grib-field-policy": "persistent",
@@ -341,7 +446,7 @@ def test_grib_cache_options_3():
341446
_check_diag(ds._diag(), ref)
342447

343448

344-
def test_grib_cache_options_4():
449+
def test_grib_cache_options_4(patch_metadata_cache):
345450
with settings.temporary(
346451
{
347452
"grib-field-policy": "temporary",
@@ -420,6 +525,7 @@ def test_grib_cache_options_4():
420525
_check_diag(
421526
ds[0]._diag(), {"metadata_cache_hits": 0, "metadata_cache_misses": 0, "metadata_cache_size": 0}
422527
)
528+
423529
ref["field_create_count"] += 2
424530
ref["handle_create_count"] += 1
425531
_check_diag(ds._diag(), ref)
@@ -428,12 +534,13 @@ def test_grib_cache_options_4():
428534
_check_diag(
429535
ds[0]._diag(), {"metadata_cache_hits": 0, "metadata_cache_misses": 0, "metadata_cache_size": 0}
430536
)
537+
431538
ref["field_create_count"] += 2
432539
ref["handle_create_count"] += 1
433540
_check_diag(ds._diag(), ref)
434541

435542

436-
def test_grib_cache_options_5():
543+
def test_grib_cache_options_5(patch_metadata_cache):
437544
with settings.temporary(
438545
{
439546
"grib-field-policy": "temporary",
@@ -529,7 +636,7 @@ def test_grib_cache_options_5():
529636
_check_diag(ds._diag(), ref)
530637

531638

532-
def test_grib_cache_options_6():
639+
def test_grib_cache_options_6(patch_metadata_cache):
533640
with settings.temporary(
534641
{
535642
"grib-field-policy": "temporary",

0 commit comments

Comments
 (0)