diff --git a/prometheus_client/errors.py b/prometheus_client/errors.py new file mode 100644 index 00000000..d9906584 --- /dev/null +++ b/prometheus_client/errors.py @@ -0,0 +1,3 @@ + +class PrometheusClientRuntimeError(RuntimeError): + pass diff --git a/prometheus_client/registry.py b/prometheus_client/registry.py index 694e4bd8..7163cb67 100644 --- a/prometheus_client/registry.py +++ b/prometheus_client/registry.py @@ -1,9 +1,9 @@ from abc import ABC, abstractmethod import copy -from threading import Lock from typing import Dict, Iterable, List, Optional from .metrics_core import Metric +from .utils import WarnLock # Ideally this would be a Protocol, but Protocols are only available in Python >= 3.8. @@ -30,7 +30,7 @@ def __init__(self, auto_describe: bool = False, target_info: Optional[Dict[str, self._collector_to_names: Dict[Collector, List[str]] = {} self._names_to_collectors: Dict[str, Collector] = {} self._auto_describe = auto_describe - self._lock = Lock() + self._lock = WarnLock() self._target_info: Optional[Dict[str, str]] = {} self.set_target_info(target_info) diff --git a/prometheus_client/utils.py b/prometheus_client/utils.py index 0d2b0948..d913415b 100644 --- a/prometheus_client/utils.py +++ b/prometheus_client/utils.py @@ -1,4 +1,7 @@ import math +from threading import Lock, RLock + +from .errors import PrometheusClientRuntimeError INF = float("inf") MINUS_INF = float("-inf") @@ -22,3 +25,40 @@ def floatToGoString(d): mantissa = f'{s[0]}.{s[1:dot]}{s[dot + 1:]}'.rstrip('0.') return f'{mantissa}e+0{dot - 1}' return s + + +class WarnLock: + """A wrapper around RLock and Lock that prevents deadlocks. + + Raises a RuntimeError when it detects attempts to re-enter the critical + section from a single thread. Intended to be used as a context manager. + """ + error_msg = ( + 'Attempt to enter a non reentrant context from a single thread.' + ' It is possible that the client code is trying to register or update' + ' metrics from within metric registration code or from a signal handler' + ' while metrics are being registered or updated.' + ' This is unsafe and cannot be allowed. It would result in a deadlock' + ' if this exception was not raised.' + ) + + def __init__(self): + self._rlock = RLock() + self._lock = Lock() + + def __enter__(self): + self._rlock.acquire() + if not self._lock.acquire(blocking=False): + self._rlock.release() + raise PrometheusClientRuntimeError(self.error_msg) + + def __exit__(self, exc_type, exc_value, traceback): + self._lock.release() + self._rlock.release() + + def _locked(self): + # For use in tests. + if self._rlock.acquire(blocking=False): + self._rlock.release() + return False + return True diff --git a/prometheus_client/values.py b/prometheus_client/values.py index 6ff85e3b..ed4e75cd 100644 --- a/prometheus_client/values.py +++ b/prometheus_client/values.py @@ -47,8 +47,7 @@ def MultiProcessValue(process_identifier=os.getpid): files = {} values = [] pid = {'value': process_identifier()} - # Use a single global lock when in multi-processing mode - # as we presume this means there is no threading going on. + # Use a single global lock when in multi-processing mode. # This avoids the need to also have mutexes in __MmapDict. lock = Lock() diff --git a/tests/test_core.py b/tests/test_core.py index f28c9abc..5b12e1c6 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -13,6 +13,7 @@ StateSetMetricFamily, Summary, SummaryMetricFamily, UntypedMetricFamily, ) from prometheus_client.decorator import getargspec +from prometheus_client.errors import PrometheusClientRuntimeError from prometheus_client.metrics import _get_use_created from prometheus_client.validation import ( disable_legacy_validation, enable_legacy_validation, @@ -1004,7 +1005,20 @@ def test_restricted_registry_does_not_yield_while_locked(self): m = Metric('target', 'Target metadata', 'info') m.samples = [Sample('target_info', {'foo': 'bar'}, 1)] for _ in registry.restricted_registry(['target_info', 's_sum']).collect(): - self.assertFalse(registry._lock.locked()) + self.assertFalse(registry._lock._locked()) + + def test_registry_deadlock_detection(self): + registry = CollectorRegistry(auto_describe=True) + + class RecursiveCollector: + def collect(self): + Counter('x', 'help', registry=registry) + return [CounterMetricFamily('c_total', 'help', value=1)] + + expected_msg = 'Attempt to enter a non reentrant context from a single thread.' + self.assertRaisesRegex( + PrometheusClientRuntimeError, expected_msg, registry.register, RecursiveCollector() + ) if __name__ == '__main__':