Skip to content

feat(replays): switch to replay_id column from tag #68950

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/sentry/models/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ class EventOrdering(Enum):
LATEST = ["-timestamp", "-event_id"]
OLDEST = ["timestamp", "event_id"]
MOST_HELPFUL = [
"-replayId",
"-replay.id",
"-profile.id",
"num_processing_errors",
"-trace.sampled",
Expand Down
9 changes: 0 additions & 9 deletions src/sentry/replays/endpoints/organization_replay_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,6 @@
from sentry.snuba.dataset import Dataset
from sentry.types.ratelimit import RateLimit, RateLimitCategory

MAX_REPLAY_COUNT = 51
MAX_VALS_PROVIDED = {
"issue.id": 25,
"transaction": 25,
"replay_id": 100,
}

FILTER_HAS_A_REPLAY = "AND !replayId:''"


class ReplayDataSourceValidator(serializers.Serializer):
data_source = serializers.ChoiceField(
Expand Down
31 changes: 16 additions & 15 deletions src/sentry/replays/usecases/replay_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from sentry.api.event_search import ParenExpression, SearchFilter, parse_search_query
from sentry.models.group import Group
from sentry.replays.query import query_replays_count
from sentry.search.events.builder import QueryBuilder
from sentry.search.events.types import QueryBuilderConfig, SnubaParams
from sentry.search.events.types import SnubaParams
from sentry.snuba import discover, issue_platform
from sentry.snuba.dataset import Dataset

MAX_REPLAY_COUNT = 51
Expand All @@ -20,7 +20,7 @@
"replay_id": 100,
}

FILTER_HAS_A_REPLAY = " AND !replayId:''"
FILTER_HAS_A_REPLAY = ' AND !replay.id:""'


def get_replay_counts(snuba_params: SnubaParams, query, return_ids, data_source) -> dict[str, Any]:
Expand All @@ -30,6 +30,7 @@ def get_replay_counts(snuba_params: SnubaParams, query, return_ids, data_source)
- If the identifier is 'replay_id', the returned count is always 1. Use this to check the existence of replay_ids
- Set the flag 'return_ids' to get the replay_ids (32 char hex strings) for each identifier
"""

if snuba_params.start is None or snuba_params.end is None or snuba_params.organization is None:
raise ValueError("Must provide start and end")

Expand Down Expand Up @@ -62,6 +63,12 @@ def _get_replay_id_mappings(
If select_column is replay_id, return an identity map of replay_id -> [replay_id].
The keys of the returned dict are UUIDs, represented as 32 char hex strings (all '-'s stripped)
"""

if data_source == Dataset.Discover:
search_query_func = discover.query
elif data_source == Dataset.IssuePlatform:
search_query_func = issue_platform.query # type: ignore[assignment]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just checking we actually write to this col from issue platform?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok - so as long as it's in the context for the event we pass to snuba it should work...

Are replays associated to transactions? If so, then perf issues from transactions should work automatically. Other issue platform types that make a "synthetic" event would need to explicitly include it there

Copy link
Member Author

@JoshFerge JoshFerge Apr 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, replays are associated to transactions. its done via the dynamic sampling context, which then adds the replay context in relay. and for others:

rage click issue:

contexts = {"replay": {"replay_id": replay_id}}

feedback issue:

ret_event["contexts"]["replay"] = {

Crons
-- not applicable

that should be everything


select_column, column_value = _get_select_column(query)
query = query + FILTER_HAS_A_REPLAY if data_source == Dataset.Discover else query

Expand Down Expand Up @@ -95,27 +102,21 @@ def _get_replay_id_mappings(
projects=[group.project for group in groups],
)

builder = QueryBuilder(
dataset=data_source,
results = search_query_func(
params={},
snuba_params=snuba_params,
selected_columns=["group_uniq_array(100,replayId)", select_column],
selected_columns=["group_uniq_array(100,replay.id)", select_column],
query=query,
limit=25,
offset=0,
config=QueryBuilderConfig(
functions_acl=["group_uniq_array"],
),
)

discover_results = builder.run_query(
referrer="api.organization-issue-replay-count", use_cache=True
functions_acl=["group_uniq_array"],
referrer="api.organization-issue-replay-count",
)

replay_id_to_issue_map = defaultdict(list)

for row in discover_results["data"]:
for replay_id in row["group_uniq_array_100_replayId"]:
for row in results["data"]:
for replay_id in row["group_uniq_array_100_replay_id"]:
# When no replay exists these strings are provided in their empty
# state rather than null. This can cause downstream problems so
# we filter them out.
Expand Down
7 changes: 3 additions & 4 deletions src/sentry/search/events/builder/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,13 +307,11 @@ def resolve_column_name(self, col: str) -> str:
# TODO when utils/snuba.py becomes typed don't need this extra annotation
column_resolver: Callable[[str], str] = resolve_column(self.dataset)
column_name = column_resolver(col)

# If the original column was passed in as tag[X], then there won't be a conflict
# and there's no need to prefix the tag
if not col.startswith("tags[") and column_name.startswith("tags["):
self.prefixed_to_tag_map[f"tags_{col}"] = col
self.tag_to_prefixed_map[col] = f"tags_{col}"

return column_name

def resolve_query(
Expand Down Expand Up @@ -1091,7 +1089,6 @@ def aliased_column(self, name: str) -> SelectType:
:param name: The unresolved sentry name.
:param alias: The expected alias in the result.
"""

# TODO: This method should use an aliased column from the SDK once
# that is available to skip these hacks that we currently have to
# do aliasing.
Expand Down Expand Up @@ -1578,14 +1575,16 @@ def default_filter_converter(
raise InvalidSearchQuery(INVALID_SPAN_ID.format(name))

# Validate event ids, trace ids, and profile ids are uuids
if name in {"id", "trace", "profile.id"}:
if name in {"id", "trace", "profile.id", "replay.id"}:
if search_filter.value.is_wildcard():
raise InvalidSearchQuery(WILDCARD_NOT_ALLOWED.format(name))
elif not search_filter.value.is_event_id():
if name == "trace":
label = "Filter Trace ID"
elif name == "profile.id":
label = "Filter Profile ID"
elif name == "replay.id":
label = "Filter Replay ID"
else:
label = "Filter ID"
raise InvalidSearchQuery(INVALID_ID_DETAILS.format(label))
Expand Down
16 changes: 14 additions & 2 deletions src/sentry/snuba/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,20 @@ class Columns(Enum):
REPLAY_ID = Column(
group_name=None,
event_name="replay_id",
transaction_name=None,
discover_name=None,
transaction_name="replay_id",
discover_name="replay_id",
issue_platform_name="replay_id",
alias="replay.id",
)
# We used to set the replay_id as a tag on error events as
# replayId. We allow this query for backwards compatibility,
# but in the future shouldn't be displayed in the UI anywhere
# as a suggested column.
REPLAY_ID_DEPRECATED = Column(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this used?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this was exposed to discover/dashboards, you'll want to make sure it's not being used by customers if you were to remove it.

group_name=None,
event_name="replay_id",
transaction_name="replay_id",
discover_name="replay_id",
issue_platform_name="replay_id",
alias="replayId",
)
Expand Down
5 changes: 3 additions & 2 deletions src/sentry/utils/snuba.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,13 @@ def log_snuba_info(content):
"category": "sentry_tags[category]",
"span.category": "sentry_tags[category]",
"span.status_code": "sentry_tags[status_code]",
"replay_id": "sentry_tags[replay_id]",
"replay.id": "sentry_tags[replay_id]",
"resource.render_blocking_status": "sentry_tags[resource.render_blocking_status]",
"http.response_content_length": "sentry_tags[http.response_content_length]",
"http.decoded_response_content_length": "sentry_tags[http.decoded_response_content_length]",
"http.response_transfer_size": "sentry_tags[http.response_transfer_size]",
"app_start_type": "sentry_tags[app_start_type]",
"replay.id": "sentry_tags[replay_id]",
"browser.name": "sentry_tags[browser.name]",
"origin.transaction": "sentry_tags[transaction]",
"is_transaction": "is_segment",
Expand Down Expand Up @@ -1225,6 +1226,7 @@ def _resolve_column(col):

# Some dataset specific logic:
if dataset == Dataset.Discover:

if isinstance(col, (list, tuple)) or col in ("project_id", "group_id"):
return col
elif (
Expand All @@ -1249,7 +1251,6 @@ def _resolve_column(col):
span_op_breakdown_name = get_span_op_breakdown_name(col)
if "span_op_breakdowns_key" in DATASETS[dataset] and span_op_breakdown_name:
return f"span_op_breakdowns[{span_op_breakdown_name}]"

return f"tags[{col}]"

return _resolve_column
Expand Down
66 changes: 19 additions & 47 deletions tests/sentry/replays/test_organization_replay_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ class OrganizationReplayCountEndpointTest(
def setUp(self):
super().setUp()
self.project.update(flags=F("flags").bitor(Project.flags.has_replays))
self.min_ago = before_now(minutes=1)
self.min_ago = before_now(minutes=2)
self.login_as(user=self.user)
self.url = reverse(
"sentry-api-0-organization-replay-count",
kwargs={"organization_slug": self.project.organization.slug},
)
self.features = {"organizations:session-replay": True}

def test_simple(self):
def test_simple_b(self):
event_id_a = "a" * 32
event_id_b = "b" * 32
replay1_id = uuid.uuid4().hex
Expand Down Expand Up @@ -70,7 +70,7 @@ def test_simple(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -79,7 +79,7 @@ def test_simple(self):
data={
"event_id": uuid.uuid4().hex,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay2_id},
"contexts": {"replay": {"replay_id": replay2_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -88,7 +88,9 @@ def test_simple(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": "z" * 32}, # a replay id that doesn't exist
"contexts": {
"replay": {"replay_id": uuid.uuid4().hex}
}, # a replay id that doesn't exist
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -97,14 +99,15 @@ def test_simple(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay3_id},
"contexts": {"replay": {"replay_id": replay3_id}},
"fingerprint": ["group-2"],
},
project_id=self.project.id,
)

query = {"query": f"(issue.id:[{event_a.group.id}, {event_c.group.id}] or abc)"}
with self.feature(self.features):

response = self.client.get(self.url, query, format="json")

expected = {
Expand Down Expand Up @@ -146,7 +149,7 @@ def test_simple_return_ids(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -155,7 +158,7 @@ def test_simple_return_ids(self):
data={
"event_id": uuid.uuid4().hex,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay2_id},
"contexts": {"replay": {"replay_id": replay2_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -164,7 +167,7 @@ def test_simple_return_ids(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": "z" * 32}, # a replay id that doesn't exist
"contexts": {"replay": {"replay_id": uuid.uuid4().hex}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -173,7 +176,7 @@ def test_simple_return_ids(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay3_id},
"contexts": {"replay": {"replay_id": replay3_id}},
"fingerprint": ["group-2"],
},
project_id=self.project.id,
Expand Down Expand Up @@ -315,7 +318,7 @@ def test_one_replay_multiple_issues(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -324,7 +327,7 @@ def test_one_replay_multiple_issues(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-2"],
},
project_id=self.project.id,
Expand Down Expand Up @@ -357,7 +360,7 @@ def test_one_replay_same_issue_twice(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -366,7 +369,7 @@ def test_one_replay_same_issue_twice(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand Down Expand Up @@ -397,7 +400,7 @@ def test_simple_transaction(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"transaction": "t-1",
},
project_id=self.project.id,
Expand All @@ -413,36 +416,6 @@ def test_simple_transaction(self):
assert response.status_code == 200, response.content
assert response.data == expected

def test_max_51(self):
replay_ids = [uuid.uuid4().hex for _ in range(100)]
for replay_id in replay_ids:
self.store_replays(
mock_replay(
datetime.datetime.now() - datetime.timedelta(seconds=22),
self.project.id,
replay_id,
)
)
event_a = self.store_event(
data={
"event_id": uuid.uuid4().hex,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay_id},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
)

query = {"query": f"issue.id:[{event_a.group.id}]"}
with self.feature(self.features):
response = self.client.get(self.url, query, format="json")

expected = {
event_a.group.id: 51,
}
assert response.status_code == 200, response.content
assert response.data == expected

def test_invalid_params_need_one_issue_id(self):
query = {"query": ""}
with self.feature(self.features):
Expand Down Expand Up @@ -580,7 +553,7 @@ def test_cross_organization_lookups(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -607,7 +580,6 @@ def test_cross_organization_lookups(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay2_id},
"fingerprint": ["group-2"],
},
project_id=project.id,
Expand Down
Loading