Skip to content

Commit bf1136a

Browse files
Feature/fix memory leak in metadata cache (#443)
* Fix memory leak in metadata cache
1 parent f41d672 commit bf1136a

File tree

3 files changed

+128
-145
lines changed

3 files changed

+128
-145
lines changed

src/earthkit/data/readers/grib/codes.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -245,18 +245,19 @@ class GribField(Field):
245245

246246
_handle = None
247247

248-
def __init__(self, path, offset, length, backend, manager=None):
248+
def __init__(self, path, offset, length, backend, handle_manager=None, use_metadata_cache=False):
249249
super().__init__(backend)
250250
self.path = path
251251
self._offset = offset
252252
self._length = length
253-
self._manager = manager
253+
self._handle_manager = handle_manager
254+
self._use_metadata_cache = use_metadata_cache
254255

255256
@property
256257
def handle(self):
257258
r""":class:`CodesHandle`: Get an object providing access to the low level GRIB message structure."""
258-
if self._manager is not None:
259-
handle = self._manager.handle(self, self._create_handle)
259+
if self._handle_manager is not None:
260+
handle = self._handle_manager.handle(self, self._create_handle)
260261
if handle is None:
261262
raise RuntimeError(f"Could not get a handle for offset={self.offset} in {self.path}")
262263
return handle
@@ -282,13 +283,14 @@ def offset(self):
282283

283284
@cached_property
284285
def _metadata(self):
285-
cache = False
286-
if self._manager is not None:
287-
cache = self._manager.use_grib_metadata_cache
288-
if cache:
289-
cache = self._manager._make_metadata_cache()
286+
cache = self._use_metadata_cache
287+
if cache:
288+
cache = self._make_metadata_cache()
290289
return GribFieldMetadata(self, cache=cache)
291290

291+
def _make_metadata_cache(self):
292+
return dict()
293+
292294
def __repr__(self):
293295
return "GribField(%s,%s,%s,%s,%s,%s)" % (
294296
self._metadata.get("shortName", None),

src/earthkit/data/readers/grib/index/__init__.py

Lines changed: 92 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -217,126 +217,106 @@ def __init__(self, *args, **kwargs):
217217
FieldList._init_from_multi(self, self)
218218

219219

220-
class GribResourceManager:
221-
def __init__(
222-
self, owner, grib_field_policy, grib_handle_policy, grib_handle_cache_size, use_grib_metadata_cache
223-
):
224-
from earthkit.data.core.settings import SETTINGS
225-
226-
def _get(v, name):
227-
return v if v is not None else SETTINGS.get(name)
220+
class GribFieldManager:
221+
def __init__(self, policy, owner):
222+
self.policy = policy
223+
self.cache = None
228224

229-
self.grib_field_policy = _get(grib_field_policy, "grib-field-policy")
230-
self.grib_handle_policy = _get(grib_handle_policy, "grib-handle-policy")
231-
self.grib_handle_cache_size = _get(grib_handle_cache_size, "grib-handle-cache-size")
232-
self.use_grib_metadata_cache = _get(use_grib_metadata_cache, "use-grib-metadata-cache")
233-
234-
# fields
235-
self.field_cache = None
236-
if self.grib_field_policy == "persistent":
225+
if self.policy == "persistent":
237226
from lru import LRU
238227

239228
# TODO: the number of fields might only be available only later (e.g. fieldlists with
240-
# an SQL index). Consider making _field_cache a cached property.
229+
# an SQL index). Consider making cache a cached property.
241230
n = len(owner)
242231
if n > 0:
243-
self.field_cache = LRU(n)
244-
245-
# handles
246-
self.handle_cache = None
247-
if self.grib_handle_policy == "cache":
248-
if self.grib_handle_cache_size > 0:
249-
from lru import LRU
232+
self.cache = LRU(n)
250233

251-
self.handle_cache = LRU(self.grib_handle_cache_size)
252-
else:
253-
raise ValueError(
254-
'grib_handle_cache_size must be greater than 0 when grib_handle_policy="cache"'
255-
)
256-
257-
self.handle_create_count = 0
258234
self.field_create_count = 0
259235

260236
# check consistency
261-
if self.field_cache is not None:
262-
self.grib_field_policy == "persistent"
263-
else:
264-
self.grib_field_policy == "temporary"
265-
266-
if self.handle_cache is not None:
267-
self.grib_handle_policy == "cache"
268-
else:
269-
self.grib_handle_policy in ["persistent", "temporary"]
237+
if self.cache is not None:
238+
assert self.policy == "persistent"
270239

271240
def field(self, n, create):
272-
if self.grib_field_policy == "persistent":
273-
if n in self.field_cache:
274-
return self.field_cache[n]
241+
if self.cache is not None:
242+
if n in self.cache:
243+
return self.cache[n]
275244
else:
276245
field = create(n)
277246
self._field_created()
278-
self.field_cache[n] = field
247+
self.cache[n] = field
279248
return field
280249
else:
281250
self._field_created()
282251
return create(n)
283252

253+
def _field_created(self):
254+
self.field_create_count += 1
255+
256+
def diag(self):
257+
r = defaultdict(int)
258+
r["grib_field_policy"] = self.policy
259+
if self.cache is not None:
260+
r["field_cache_size"] = len(self.cache)
261+
262+
r["field_create_count"] = self.field_create_count
263+
return r
264+
265+
266+
class GribHandleManager:
267+
def __init__(self, policy, cache_size):
268+
self.policy = policy
269+
self.max_cache_size = cache_size
270+
self.cache = None
271+
272+
if self.policy == "cache":
273+
if self.max_cache_size > 0:
274+
from lru import LRU
275+
276+
self.cache = LRU(self.max_cache_size)
277+
else:
278+
raise ValueError(
279+
'grib_handle_cache_size must be greater than 0 when grib_handle_policy="cache"'
280+
)
281+
282+
self.handle_create_count = 0
283+
284+
# check consistency
285+
if self.cache is not None:
286+
self.policy == "cache"
287+
else:
288+
self.policy in ["persistent", "temporary"]
289+
284290
def handle(self, field, create):
285-
if self.grib_handle_policy == "cache":
291+
if self.policy == "cache":
286292
key = (field.path, field._offset)
287-
if key in self.handle_cache:
288-
return self.handle_cache[key]
293+
if key in self.cache:
294+
return self.cache[key]
289295
else:
290296
handle = create()
291297
self._handle_created()
292-
self.handle_cache[key] = handle
298+
self.cache[key] = handle
293299
return handle
294-
elif self.grib_handle_policy == "persistent":
300+
elif self.policy == "persistent":
295301
if field._handle is None:
296302
field._handle = create()
297303
self._handle_created()
298304
return field._handle
299-
elif self.grib_handle_policy == "temporary":
305+
elif self.policy == "temporary":
300306
self._handle_created()
301307
return create()
302308

303-
def _field_created(self):
304-
self.field_create_count += 1
305-
306309
def _handle_created(self):
307310
self.handle_create_count += 1
308311

309-
def _make_metadata_cache(self):
310-
return dict()
311-
312312
def diag(self):
313313
r = defaultdict(int)
314-
r["grib_field_policy"] = self.grib_field_policy
315-
r["grib_handle_policy"] = self.grib_handle_policy
316-
r["grib_handle_cache_size"] = self.grib_handle_cache_size
317-
318-
if self.field_cache is not None:
319-
r["field_cache_size"] = len(self.field_cache)
320-
321-
r["field_create_count"] = self.field_create_count
322-
323-
if self.handle_cache is not None:
324-
r["handle_cache_size"] = len(self.handle_cache)
314+
r["grib_handle_policy"] = self.policy
315+
r["grib_handle_cache_size"] = self.max_cache_size
316+
if self.cache is not None:
317+
r["handle_cache_size"] = len(self.cache)
325318

326319
r["handle_create_count"] = self.handle_create_count
327-
328-
if self.field_cache is not None:
329-
for f in self.field_cache.values():
330-
if f._handle is not None:
331-
r["current_handle_count"] += 1
332-
333-
try:
334-
md_cache = f._diag()
335-
for k in ["metadata_cache_hits", "metadata_cache_misses", "metadata_cache_size"]:
336-
r[k] += md_cache[k]
337-
except Exception:
338-
pass
339-
340320
return r
341321

342322

@@ -351,18 +331,29 @@ def __init__(
351331
**kwargs,
352332
):
353333
super().__init__(*args, **kwargs)
354-
self._manager = GribResourceManager(
355-
self, grib_field_policy, grib_handle_policy, grib_handle_cache_size, use_grib_metadata_cache
334+
335+
from earthkit.data.core.settings import SETTINGS
336+
337+
def _get_opt(v, name):
338+
return v if v is not None else SETTINGS.get(name)
339+
340+
self._field_manager = GribFieldManager(_get_opt(grib_field_policy, "grib-field-policy"), self)
341+
self._handle_manager = GribHandleManager(
342+
_get_opt(grib_handle_policy, "grib-handle-policy"),
343+
_get_opt(grib_handle_cache_size, "grib-handle-cache-size"),
356344
)
357345

346+
self._use_metadata_cache = _get_opt(use_grib_metadata_cache, "use-grib-metadata-cache")
347+
358348
def _create_field(self, n):
359349
part = self.part(n)
360350
field = GribField(
361351
part.path,
362352
part.offset,
363353
part.length,
364354
self.array_backend,
365-
manager=self._manager,
355+
handle_manager=self._handle_manager,
356+
use_metadata_cache=self._use_metadata_cache,
366357
)
367358
if field is None:
368359
raise RuntimeError(f"Could not get a handle for part={part}")
@@ -376,13 +367,29 @@ def _getitem(self, n):
376367
if n >= len(self):
377368
raise IndexError(f"Index {n} out of range")
378369

379-
return self._manager.field(n, self._create_field)
370+
return self._field_manager.field(n, self._create_field)
380371

381372
def __len__(self):
382373
return self.number_of_parts()
383374

384375
def _diag(self):
385-
return self._manager.diag()
376+
r = defaultdict(int)
377+
r.update(self._field_manager.diag())
378+
r.update(self._handle_manager.diag())
379+
380+
if self._field_manager.cache is not None:
381+
for f in self._field_manager.cache.values():
382+
if f._handle is not None:
383+
r["current_handle_count"] += 1
384+
385+
if self._use_metadata_cache:
386+
try:
387+
md_cache = f._diag()
388+
for k in ["metadata_cache_hits", "metadata_cache_misses", "metadata_cache_size"]:
389+
r[k] += md_cache[k]
390+
except Exception:
391+
pass
392+
return r
386393

387394
@abstractmethod
388395
def part(self, n):

0 commit comments

Comments
 (0)