Skip to content

Commit 1e46679

Browse files
authored
Reapply "fix(span-buffer): Do not use fork (#93026)" (#93218)
the original PR was broken, but the error could only be reproduced if the sentry.conf.py was on a different path than `~/.sentry/sentry.conf.py`. otherwise the entire system still somehow worked due to hardcoded defaults in `sentry.runner.importer`.
1 parent 5e4739c commit 1e46679

File tree

2 files changed

+49
-20
lines changed

2 files changed

+49
-20
lines changed

src/sentry/spans/consumers/process/flusher.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import logging
22
import multiprocessing
3+
import multiprocessing.context
34
import threading
45
import time
56
from collections.abc import Callable
7+
from functools import partial
68

79
import orjson
810
import sentry_sdk
@@ -15,6 +17,7 @@
1517
from sentry.conf.types.kafka_definition import Topic
1618
from sentry.spans.buffer import SpansBuffer
1719
from sentry.utils import metrics
20+
from sentry.utils.arroyo import run_with_initialized_sentry
1821
from sentry.utils.kafka_config import get_kafka_producer_cluster_options, get_topic_definition
1922

2023
MAX_PROCESS_RESTARTS = 10
@@ -44,41 +47,45 @@ def __init__(
4447
self.buffer = buffer
4548
self.next_step = next_step
4649

47-
self.stopped = multiprocessing.Value("i", 0)
50+
self.mp_context = mp_context = multiprocessing.get_context("spawn")
51+
self.stopped = mp_context.Value("i", 0)
4852
self.redis_was_full = False
49-
self.current_drift = multiprocessing.Value("i", 0)
50-
self.backpressure_since = multiprocessing.Value("i", 0)
51-
self.healthy_since = multiprocessing.Value("i", 0)
53+
self.current_drift = mp_context.Value("i", 0)
54+
self.backpressure_since = mp_context.Value("i", 0)
55+
self.healthy_since = mp_context.Value("i", 0)
5256
self.process_restarts = 0
5357
self.produce_to_pipe = produce_to_pipe
5458

5559
self._create_process()
5660

5761
def _create_process(self):
58-
from sentry.utils.arroyo import _get_arroyo_subprocess_initializer
59-
6062
# Optimistically reset healthy_since to avoid a race between the
6163
# starting process and the next flush cycle. Keep back pressure across
6264
# the restart, however.
6365
self.healthy_since.value = int(time.time())
6466

65-
make_process: Callable[..., multiprocessing.Process | threading.Thread]
67+
make_process: Callable[..., multiprocessing.context.SpawnProcess | threading.Thread]
6668
if self.produce_to_pipe is None:
67-
initializer = _get_arroyo_subprocess_initializer(None)
68-
make_process = multiprocessing.Process
69+
target = run_with_initialized_sentry(
70+
SpanFlusher.main,
71+
# unpickling buffer will import sentry, so it needs to be
72+
# pickled separately. at the same time, pickling
73+
# synchronization primitives like multiprocessing.Value can
74+
# only be done by the Process
75+
self.buffer,
76+
)
77+
make_process = self.mp_context.Process
6978
else:
70-
initializer = None
79+
target = partial(SpanFlusher.main, self.buffer)
7180
make_process = threading.Thread
7281

7382
self.process = make_process(
74-
target=SpanFlusher.main,
83+
target=target,
7584
args=(
76-
initializer,
7785
self.stopped,
7886
self.current_drift,
7987
self.backpressure_since,
8088
self.healthy_since,
81-
self.buffer,
8289
self.produce_to_pipe,
8390
),
8491
daemon=True,
@@ -88,17 +95,13 @@ def _create_process(self):
8895

8996
@staticmethod
9097
def main(
91-
initializer: Callable | None,
98+
buffer: SpansBuffer,
9299
stopped,
93100
current_drift,
94101
backpressure_since,
95102
healthy_since,
96-
buffer: SpansBuffer,
97103
produce_to_pipe: Callable[[KafkaPayload], None] | None,
98104
) -> None:
99-
if initializer:
100-
initializer()
101-
102105
sentry_sdk.set_tag("sentry_spans_buffer_component", "flusher")
103106

104107
try:

src/sentry/utils/arroyo.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pickle
44
from collections.abc import Callable, Mapping
55
from functools import partial
6-
from typing import Any
6+
from typing import TYPE_CHECKING, Any
77

88
from arroyo.processing.strategies.run_task import RunTask
99
from arroyo.processing.strategies.run_task_with_multiprocessing import (
@@ -17,7 +17,9 @@
1717
from arroyo.utils.metrics import Metrics
1818
from django.conf import settings
1919

20-
from sentry.metrics.base import MetricsBackend
20+
if TYPE_CHECKING:
21+
from sentry.metrics.base import MetricsBackend
22+
2123

2224
Tags = Mapping[str, str]
2325

@@ -191,3 +193,27 @@ def run_task_with_multiprocessing(
191193
assert pool.pool is not None
192194

193195
return ArroyoRunTaskWithMultiprocessing(pool=pool.pool, function=function, **kwargs)
196+
197+
198+
def _import_and_run(
199+
initializer: Callable[[], None],
200+
main_fn_pickle: bytes,
201+
args_pickle: bytes,
202+
*additional_args: Any,
203+
) -> None:
204+
initializer()
205+
206+
# explicitly use pickle so that we can be sure arguments get unpickled
207+
# after sentry gets initialized
208+
main_fn = pickle.loads(main_fn_pickle)
209+
args = pickle.loads(args_pickle)
210+
211+
main_fn(*args, *additional_args)
212+
213+
214+
def run_with_initialized_sentry(main_fn: Callable[..., None], *args: Any) -> Callable[..., None]:
215+
main_fn_pickle = pickle.dumps(main_fn)
216+
args_pickle = pickle.dumps(args)
217+
return partial(
218+
_import_and_run, _get_arroyo_subprocess_initializer(None), main_fn_pickle, args_pickle
219+
)

0 commit comments

Comments
 (0)