Skip to content

Commit 9fdec02

Browse files
committed
STASH add platform to metadata and metrics
1 parent 4f9a337 commit 9fdec02

File tree

5 files changed

+53
-5
lines changed

5 files changed

+53
-5
lines changed

src/sentry/grouping/ingest/grouphash_metadata.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ def create_or_update_grouphash_metadata_if_needed(
119119
latest_grouping_config=grouping_config,
120120
hash_basis=hash_basis,
121121
hashing_metadata=hashing_metadata,
122+
platform=event.platform,
122123
)
123124
elif grouphash.metadata and grouphash.metadata.latest_grouping_config != grouping_config:
124125
# Keep track of the most recent config which computed this hash, so that once a
@@ -203,7 +204,9 @@ def get_hash_basis_and_metadata(
203204
return hash_basis, hashing_metadata
204205

205206

206-
def record_grouphash_metadata_metrics(grouphash_metadata: GroupHashMetadata) -> None:
207+
def record_grouphash_metadata_metrics(
208+
grouphash_metadata: GroupHashMetadata, platform: str | None
209+
) -> None:
207210
# TODO: Once https://peps.python.org/pep-0728 is a thing (still in draft but theoretically on
208211
# track for 3.14), we can mark the various hashing metadata types as closed and that should
209212
# narrow the types for the tag values such that we can stop stringifying everything
@@ -222,7 +225,7 @@ def is_stacktrace_hashing(
222225
hashing_metadata = grouphash_metadata.hashing_metadata
223226

224227
if hash_basis:
225-
hash_basis_tags: dict[str, str] = {"hash_basis": hash_basis}
228+
hash_basis_tags: dict[str, str] = {"hash_basis": hash_basis, "platform": platform}
226229
if hashing_metadata:
227230
hash_basis_tags["is_hybrid_fingerprint"] = str(
228231
hashing_metadata.get("is_hybrid_fingerprint", False)
@@ -244,7 +247,11 @@ def is_stacktrace_hashing(
244247
metrics.incr(
245248
f"grouping.grouphashmetadata.event_hashing_metadata.{hash_basis}",
246249
sample_rate=1.0,
247-
tags=hashing_metadata_tags,
250+
tags={
251+
**hashing_metadata_tags,
252+
# Add this in at the end so it's not the reason we log the metric
253+
"platform": platform,
254+
},
248255
)
249256

250257

src/sentry/grouping/ingest/hashing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def get_or_create_grouphashes(
241241
sentry_sdk.capture_exception(exc)
242242

243243
if grouphash.metadata:
244-
record_grouphash_metadata_metrics(grouphash.metadata)
244+
record_grouphash_metadata_metrics(grouphash.metadata, event.platform)
245245
else:
246246
# Collect a temporary metric to get a sense of how often we would be adding metadata to an
247247
# existing hash. (Yes, this is an overestimate, because this will fire every time we see a given
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Generated by Django 5.1.5 on 2025-02-07 05:09
2+
3+
from django.db import migrations, models
4+
5+
from sentry.new_migrations.migrations import CheckedMigration
6+
7+
8+
class Migration(CheckedMigration):
9+
# This flag is used to mark that a migration shouldn't be automatically run in production.
10+
# This should only be used for operations where it's safe to run the migration after your
11+
# code has deployed. So this should not be used for most operations that alter the schema
12+
# of a table.
13+
# Here are some things that make sense to mark as post deployment:
14+
# - Large data migrations. Typically we want these to be run manually so that they can be
15+
# monitored and not block the deploy for a long period of time while they run.
16+
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
17+
# run this outside deployments so that we don't block them. Note that while adding an index
18+
# is a schema change, it's completely safe to run the operation after the code has deployed.
19+
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment
20+
21+
is_post_deployment = False
22+
23+
dependencies = [
24+
("sentry", "0826_make_sentryapp_uuid_unique"),
25+
]
26+
27+
operations = [
28+
migrations.AddField(
29+
model_name="grouphashmetadata",
30+
name="platform",
31+
field=models.CharField(null=True),
32+
),
33+
]

src/sentry/models/grouphashmetadata.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ class GroupHashMetadata(Model):
5151
"sentry.GroupHash", related_name="_metadata", on_delete=models.CASCADE
5252
)
5353
date_added = models.DateTimeField(default=timezone.now)
54+
# The platform of the event when generated the metadata. Likely different than the project
55+
# platform, as event platforms are normalized to a handful of known values, whereas project
56+
# platforms are all over the place.
57+
platform = models.CharField(null=True)
5458

5559
# HASHING
5660

tests/sentry/grouping/test_grouphash_metadata.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,15 @@ def _assert_and_snapshot_results(
143143

144144
with patch("sentry.grouping.ingest.grouphash_metadata.metrics.incr") as mock_metrics_incr:
145145
record_grouphash_metadata_metrics(
146-
GroupHashMetadata(hash_basis=hash_basis, hashing_metadata=hashing_metadata)
146+
GroupHashMetadata(hash_basis=hash_basis, hashing_metadata=hashing_metadata),
147+
event.platform,
147148
)
148149

149150
metric_names = [call.args[0] for call in mock_metrics_incr.mock_calls]
150151
tags = [call.kwargs["tags"] for call in mock_metrics_incr.mock_calls]
152+
# Filter out all the `platform` tags, because many of the inputs don't have a `platform`
153+
# value, so we're going to get a lot of Nones
154+
[t.pop("platform", None) for t in tags]
151155
metrics_data = dict(zip(metric_names, tags))
152156

153157
expected_metric_names = ["grouping.grouphashmetadata.event_hash_basis"]

0 commit comments

Comments
 (0)