Skip to content

Commit de38201

Browse files
authored
ref(db): Refactor EnvironmentProject to reduce rollbacks (#89265)
Similar to how it was done in: #88885 Refactor of `add_project` method of `Environment` model, preparing it for gradual rollout and to decrease the number of rollbacks it does since almost every attempt of insertions for this model results in a rollback. In this method `EnvironmentProject` was doing overly optimistic inserts leading to us having almost 100 rollbacks/second coming just from this model ## Why are we doing this Currently we are doing around 300 rollbacks per second, mostly caused by overly optimistic writes - almost all of the writes result in the rollback because the data already exists in the table, and for those occasions `get_or_create` is more suitable since SELECT statement is more performant than ROLLBACK when they happen most of the times. [Datadog notebok with investagtion](https://app.datadoghq.com/notebook/12067672/postgres-rollback-investigation?range=604800000&start=1743184480708&live=true), where 3 problematic models where detected: - ~GroupRelease~ - `Commit` - `EnvironmentProject` Models will be refactored one at the time, and the refactor will be rolled out gradually: 10% - 50% - 100%
1 parent 3e37d4c commit de38201

File tree

3 files changed

+50
-9
lines changed

3 files changed

+50
-9
lines changed

src/sentry/models/environment.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
sane_repr,
1616
)
1717
from sentry.db.models.manager.base import BaseManager
18+
from sentry.options.rollout import in_random_rollout
1819
from sentry.utils import metrics
1920
from sentry.utils.cache import cache
2021
from sentry.utils.hashlib import md5_text
@@ -111,16 +112,23 @@ def add_project(self, project, is_hidden=None):
111112
cache_key = f"envproj:c:{self.id}:{project.id}"
112113

113114
if cache.get(cache_key) is None:
114-
try:
115-
with transaction.atomic(router.db_for_write(EnvironmentProject)):
116-
EnvironmentProject.objects.create(
117-
project=project, environment=self, is_hidden=is_hidden
118-
)
119-
cache.set(cache_key, 1, 3600)
120-
except IntegrityError:
121-
incr_rollback_metrics(EnvironmentProject)
122-
# We've already created the object, should still cache the action.
115+
if in_random_rollout("environmentproject.new_add_project.rollout"):
116+
EnvironmentProject.objects.get_or_create(
117+
project=project, environment=self, defaults={"is_hidden": is_hidden}
118+
)
119+
# The object already exists, we cache the action to reduce the load on the database.
123120
cache.set(cache_key, 1, 3600)
121+
else:
122+
try:
123+
with transaction.atomic(router.db_for_write(EnvironmentProject)):
124+
EnvironmentProject.objects.create(
125+
project=project, environment=self, is_hidden=is_hidden
126+
)
127+
cache.set(cache_key, 1, 3600)
128+
except IntegrityError:
129+
incr_rollback_metrics(EnvironmentProject)
130+
# We've already created the object, should still cache the action.
131+
cache.set(cache_key, 1, 3600)
124132

125133
@staticmethod
126134
def get_name_from_path_segment(segment):

src/sentry/options/defaults.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2809,6 +2809,14 @@
28092809
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
28102810
)
28112811

2812+
# Killswitch for EnvironmentProject.get_or_create refactor
2813+
register(
2814+
"environmentproject.new_add_project.rollout",
2815+
default=0.0,
2816+
type=Float,
2817+
flags=FLAG_AUTOMATOR_MODIFIABLE,
2818+
)
2819+
28122820
# Killswitch for Postgres query timeout error handling
28132821
register(
28142822
"api.postgres-query-timeout-error-handling.enabled",

tests/sentry/models/test_environment.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from sentry.models.environment import Environment
44
from sentry.testutils.cases import TestCase
5+
from sentry.testutils.helpers import override_options
56

67

78
class GetOrCreateTest(TestCase):
@@ -23,6 +24,30 @@ def test_simple(self):
2324
with self.assertNumQueries(0):
2425
assert Environment.get_for_organization_id(project.organization_id, "prod").id == env.id
2526

27+
@override_options({"environmentproject.new_add_project.rollout": 1.0})
28+
def test_simple_with_new_option(self):
29+
"""
30+
Same test as test_simple, but with the new option enabled which has refactored
31+
the get_or_create method.
32+
Note (vgrozdanic): this test will be removed after the rollout is complete.
33+
"""
34+
project = self.create_project()
35+
36+
with pytest.raises(Environment.DoesNotExist):
37+
Environment.get_for_organization_id(project.organization_id, "prod")
38+
39+
env = Environment.get_or_create(project=project, name="prod")
40+
41+
assert env.name == "prod"
42+
assert env.projects.first().id == project.id
43+
44+
env2 = Environment.get_or_create(project=project, name="prod")
45+
46+
assert env2.id == env.id
47+
48+
with self.assertNumQueries(0):
49+
assert Environment.get_for_organization_id(project.organization_id, "prod").id == env.id
50+
2651

2752
@pytest.mark.parametrize(
2853
"val,expected",

0 commit comments

Comments
 (0)