diff --git a/src/sentry/replays/endpoints/organization_replay_details.py b/src/sentry/replays/endpoints/organization_replay_details.py index 5f280fefefa00e..4cae311c740200 100644 --- a/src/sentry/replays/endpoints/organization_replay_details.py +++ b/src/sentry/replays/endpoints/organization_replay_details.py @@ -54,13 +54,15 @@ def get(self, request: Request, organization: Organization, replay_id: str) -> R try: filter_params = self.get_filter_params( - request, organization, project_ids=ALL_ACCESS_PROJECTS + request, + organization, + project_ids=ALL_ACCESS_PROJECTS, + date_filter_optional=False, ) except NoProjects: return Response(status=404) - - if not filter_params["start"] or not filter_params["end"]: - return Response(status=404) + # "start" and "end" keys are expected to exist due to date_filter_optional=False. + # The fx returns defaults if filters aren't in the request try: replay_id = str(uuid.UUID(replay_id)) diff --git a/src/sentry/replays/endpoints/organization_replay_index.py b/src/sentry/replays/endpoints/organization_replay_index.py index 18692184936e3f..e9d516b240c68d 100644 --- a/src/sentry/replays/endpoints/organization_replay_index.py +++ b/src/sentry/replays/endpoints/organization_replay_index.py @@ -18,7 +18,7 @@ from sentry.exceptions import InvalidSearchQuery from sentry.models.organization import Organization from sentry.replays.post_process import ReplayDetailsResponse, process_raw_response -from sentry.replays.query import query_replays_collection_raw, replay_url_parser_config +from sentry.replays.query import query_replays_collection_paginated, replay_url_parser_config from sentry.replays.usecases.errors import handled_snuba_exceptions from sentry.replays.validators import ReplayValidator from sentry.utils.cursors import Cursor, CursorResult @@ -83,7 +83,7 @@ def data_fn(offset: int, limit: int): # to do this for completeness sake. return Response({"detail": "Missing start or end period."}, status=400) - return query_replays_collection_raw( + return query_replays_collection_paginated( project_ids=filter_params["project_id"], start=start, end=end, diff --git a/src/sentry/replays/endpoints/project_replay_viewed_by.py b/src/sentry/replays/endpoints/project_replay_viewed_by.py index d91ca065f74b35..206a79fbac1055 100644 --- a/src/sentry/replays/endpoints/project_replay_viewed_by.py +++ b/src/sentry/replays/endpoints/project_replay_viewed_by.py @@ -10,6 +10,7 @@ from sentry.api.api_owners import ApiOwner from sentry.api.api_publish_status import ApiPublishStatus from sentry.api.base import region_silo_endpoint +from sentry.api.bases.organization import NoProjects from sentry.api.bases.project import ProjectEndpoint, ProjectEventPermission from sentry.apidocs.constants import RESPONSE_BAD_REQUEST, RESPONSE_FORBIDDEN, RESPONSE_NOT_FOUND from sentry.apidocs.examples.replay_examples import ReplayExamples @@ -66,7 +67,12 @@ def get(self, request: Request, project: Project, replay_id: str) -> Response: return Response(status=404) # query for user ids who viewed the replay - filter_params = self.get_filter_params(request, project, date_filter_optional=False) + try: + filter_params = self.get_filter_params(request, project, date_filter_optional=False) + except NoProjects: + return Response(status=404) + # "start" and "end" keys are expected to exist due to date_filter_optional=False. + # The fx returns defaults if filters aren't in the request # If no rows were found then the replay does not exist and a 404 is returned. viewed_by_ids_response: list[dict[str, Any]] = query_replay_viewed_by_ids( diff --git a/src/sentry/replays/post_process.py b/src/sentry/replays/post_process.py index 7b9b731c8d1dda..ffba5a277f9bad 100644 --- a/src/sentry/replays/post_process.py +++ b/src/sentry/replays/post_process.py @@ -11,10 +11,10 @@ class DeviceResponseType(TypedDict, total=False): - name: str | None brand: str | None - model: str | None family: str | None + model: str | None + name: str | None class SDKResponseType(TypedDict, total=False): @@ -33,47 +33,47 @@ class BrowserResponseType(TypedDict, total=False): class UserResponseType(TypedDict, total=False): - id: str | None - username: str | None + display_name: str | None email: str | None + id: str | None ip: str | None - display_name: str | None + username: str | None @extend_schema_serializer(exclude_fields=["info_ids", "warning_ids"]) class ReplayDetailsResponse(TypedDict, total=False): - id: str - project_id: str - trace_ids: list[str] - error_ids: list[str] - environment: str | None - tags: dict[str, list[str]] | list - user: UserResponseType - sdk: SDKResponseType - os: OSResponseType + activity: int | None browser: BrowserResponseType - device: DeviceResponseType - is_archived: bool | None - urls: list[str] | None clicks: list[dict[str, Any]] count_dead_clicks: int | None - count_rage_clicks: int | None count_errors: int | None + count_infos: int | None + count_rage_clicks: int | None + count_segments: int | None + count_urls: int | None + count_warnings: int | None + device: DeviceResponseType + dist: str | None duration: int | None + environment: str | None + error_ids: list[str] finished_at: str | None - started_at: str | None - activity: int | None - count_urls: int | None - replay_type: str - count_segments: int | None + has_viewed: bool + id: str + info_ids: list[str] | None + is_archived: bool | None + os: OSResponseType platform: str | None + project_id: str releases: list[str] - dist: str | None + replay_type: str + sdk: SDKResponseType + started_at: str | None + tags: dict[str, list[str]] | list + trace_ids: list[str] + urls: list[str] | None + user: UserResponseType warning_ids: list[str] | None - info_ids: list[str] | None - count_warnings: int | None - count_infos: int | None - has_viewed: bool def process_raw_response( @@ -95,38 +95,65 @@ def generate_restricted_fieldset( yield from response -def _strip_dashes(field: str) -> str: +def _strip_dashes(field: str | None) -> str: if field: return field.replace("-", "") - return field + return "" def generate_normalized_output( response: list[dict[str, Any]] ) -> Generator[ReplayDetailsResponse, None, None]: - """For each payload in the response strip "agg_" prefixes.""" + """Skip archives, strip "agg_" prefixes, coerce correct types, and compute/nest new fields""" + for item in response: + ret_item: ReplayDetailsResponse = {} - if item["isArchived"]: + if item.get("isArchived"): yield _archived_row(item["replay_id"], item["project_id"]) # type: ignore[misc] continue - ret_item["id"] = _strip_dashes(item.pop("replay_id", None)) + # required fields ret_item["project_id"] = str(item["project_id"]) - ret_item["trace_ids"] = item.pop("traceIds", []) - ret_item["error_ids"] = item.pop("errorIds", []) - ret_item["environment"] = item.pop("agg_environment", None) + + # modified + renamed fields + ret_item["environment"] = item.get("agg_environment", None) + # Returns a UInt8 of either 0 or 1. We coerce to a bool. + ret_item["has_viewed"] = bool(item.get("has_viewed", 0)) + ret_item["id"] = _strip_dashes(item.get("replay_id", None)) + ret_item["releases"] = list(filter(bool, item.get("releases", []))) + + # computed fields + ret_item["browser"] = { + "name": item.get("browser_name", None), + "version": item.get("browser_version", None), + } + ret_item["clicks"] = extract_click_fields(item) + ret_item["device"] = { + "name": item.get("device_name", None), + "brand": item.get("device_brand", None), + "model": item.get("device_model", None), + "family": item.get("device_family", None), + } + ret_item["os"] = { + "name": item.get("os_name", None), + "version": item.get("os_version", None), + } + ret_item["sdk"] = { + "name": item.get("sdk_name", None), + "version": item.get("sdk_version", None), + } ret_item["tags"] = dict_unique_list( zip( - item.pop("tk", None) or [], - item.pop("tv", None) or [], + item.get("tk", None) or [], + item.get("tv", None) or [], ) ) ret_item["user"] = { - "id": item.pop("user_id", None), - "username": item.pop("user_username", None), - "email": item.pop("user_email", None), - "ip": item.pop("user_ip", None), + "id": item.get("user_id", None), + "username": item.get("user_username", None), + "email": item.get("user_email", None), + "ip": item.get("user_ip", None), } ret_item["user"]["display_name"] = ( ret_item["user"]["username"] @@ -134,56 +161,32 @@ def generate_normalized_output( or ret_item["user"]["id"] or ret_item["user"]["ip"] ) - ret_item["sdk"] = { - "name": item.pop("sdk_name", None), - "version": item.pop("sdk_version", None), - } - ret_item["os"] = { - "name": item.pop("os_name", None), - "version": item.pop("os_version", None), - } - ret_item["browser"] = { - "name": item.pop("browser_name", None), - "version": item.pop("browser_version", None), - } - ret_item["device"] = { - "name": item.pop("device_name", None), - "brand": item.pop("device_brand", None), - "model": item.pop("device_model", None), - "family": item.pop("device_family", None), - } - - item.pop("agg_urls", None) - ret_item["urls"] = item.pop("urls_sorted", None) - - ret_item["is_archived"] = bool(item.pop("isArchived", 0)) - item.pop("clickClass", None) - item.pop("click_selector", None) - ret_item["activity"] = item.pop("activity", None) - # don't need clickClass or click_selector - # for the click field, as they are only used for searching. + # optional fields + ret_item["activity"] = item.get("activity", None) + ret_item["count_dead_clicks"] = item.get("count_dead_clicks", None) + ret_item["count_errors"] = item.get("count_errors", None) + ret_item["count_infos"] = item.get("count_infos", None) + ret_item["count_rage_clicks"] = item.get("count_rage_clicks", None) + ret_item["count_segments"] = item.get("count_segments", None) + ret_item["count_urls"] = item.get("count_urls", None) + ret_item["count_warnings"] = item.get("count_warnings", None) + ret_item["dist"] = item.get("dist", None) + ret_item["duration"] = item.get("duration", None) + ret_item["error_ids"] = item.get("errorIds", []) + ret_item["finished_at"] = item.get("finished_at", None) + ret_item["info_ids"] = item.get("info_ids", None) + ret_item["is_archived"] = item.get("isArchived", None) + ret_item["platform"] = item.get("platform", None) + ret_item["replay_type"] = item.get("replay_type", "session") + ret_item["started_at"] = item.get("started_at", None) + ret_item["trace_ids"] = item.get("traceIds", []) + ret_item["urls"] = item.get("urls_sorted", None) + ret_item["warning_ids"] = item.get("warning_ids", None) + + # excluded fields: agg_urls, clickClass, click_selector + # Don't need clickClass and click_selector for the click field, as they are only used for searching. # (click.classes contains the full list of classes for a click) - ret_item["clicks"] = extract_click_fields(item) - ret_item["count_dead_clicks"] = item.pop("count_dead_clicks", None) - ret_item["count_errors"] = item.pop("count_errors", None) - ret_item["count_rage_clicks"] = item.pop("count_rage_clicks", None) - ret_item["count_segments"] = item.pop("count_segments", None) - ret_item["count_urls"] = item.pop("count_urls", None) - ret_item["dist"] = item.pop("dist", None) - ret_item["duration"] = item.pop("duration", None) - ret_item["finished_at"] = item.pop("finished_at", None) - ret_item["platform"] = item.pop("platform", None) - ret_item["releases"] = list(filter(bool, item.pop("releases", []))) - ret_item["replay_type"] = item.pop("replay_type", "session") - ret_item["started_at"] = item.pop("started_at", None) - - ret_item["warning_ids"] = item.pop("warning_ids", None) - ret_item["info_ids"] = item.pop("info_ids", None) - ret_item["count_infos"] = item.pop("count_infos", None) - ret_item["count_warnings"] = item.pop("count_warnings", None) - # Returns a UInt8 of either 0 or 1. We coerce to a bool. - ret_item["has_viewed"] = bool(item.get("has_viewed", 0)) yield ret_item @@ -209,32 +212,16 @@ def dict_unique_list(items: Iterable[tuple[str, str]]) -> dict[str, list[str]]: def _archived_row(replay_id: str, project_id: int) -> dict[str, Any]: archived_replay_response = { + "browser": {"name": None, "version": None}, + "device": {"name": None, "brand": None, "model": None, "family": None}, + "error_ids": [], "id": _strip_dashes(replay_id), + "os": {"name": None, "version": None}, "project_id": str(project_id), - "trace_ids": [], - "error_ids": [], - "environment": None, + "sdk": {"name": None, "version": None}, "tags": [], + "trace_ids": [], "user": {"id": "Archived Replay", "display_name": "Archived Replay"}, - "sdk": {"name": None, "version": None}, - "os": {"name": None, "version": None}, - "browser": {"name": None, "version": None}, - "device": {"name": None, "brand": None, "model": None, "family": None}, - "urls": None, - "activity": None, - "count_dead_clicks": None, - "count_rage_clicks": None, - "count_errors": None, - "duration": None, - "finished_at": None, - "started_at": None, - "is_archived": True, - "count_segments": None, - "count_urls": None, - "dist": None, - "platform": None, - "releases": None, - "clicks": None, } for field in VALID_FIELD_SET: if field not in archived_replay_response: diff --git a/src/sentry/replays/query.py b/src/sentry/replays/query.py index 4c9ba625333ec0..f26d0d143c562e 100644 --- a/src/sentry/replays/query.py +++ b/src/sentry/replays/query.py @@ -39,12 +39,7 @@ ELIGIBLE_SUBQUERY_SORTS = {"started_at", "browser.name", "os.name"} -# Compatibility function for getsentry code. -def query_replays_collection(*args, **kwargs): - return query_replays_collection_raw(*args, **kwargs)[0] - - -def query_replays_collection_raw( +def query_replays_collection_paginated( project_ids: list[int], start: datetime, end: datetime, @@ -56,8 +51,8 @@ def query_replays_collection_raw( search_filters: Sequence[SearchFilter], organization: Organization | None = None, actor: Any | None = None, -): - """Query aggregated replay collection.""" +) -> tuple[list[dict[str, Any]], bool]: + """Query aggregated replay collection. Returns (response, has_more)""" paginators = Paginators(limit, offset) return query_using_optimized_search( diff --git a/src/sentry/replays/scripts/delete_replays.py b/src/sentry/replays/scripts/delete_replays.py index 2227f5f54dbd7b..9ef66fa08225e6 100644 --- a/src/sentry/replays/scripts/delete_replays.py +++ b/src/sentry/replays/scripts/delete_replays.py @@ -9,7 +9,7 @@ from sentry.models.organization import Organization from sentry.replays.lib.kafka import initialize_replays_publisher from sentry.replays.post_process import generate_normalized_output -from sentry.replays.query import query_replays_collection, replay_url_parser_config +from sentry.replays.query import query_replays_collection_paginated, replay_url_parser_config from sentry.replays.tasks import archive_replay, delete_replay_recording_async logger = logging.getLogger() @@ -31,7 +31,7 @@ def delete_replays( while True: replays = list( generate_normalized_output( - query_replays_collection( + query_replays_collection_paginated( project_ids=[project_id], start=start_utc, end=end_utc, @@ -42,7 +42,7 @@ def delete_replays( search_filters=search_filters, sort="started_at", organization=Organization.objects.filter(project__id=project_id).get(), - ) + )[0] ) ) diff --git a/src/sentry/replays/usecases/query/__init__.py b/src/sentry/replays/usecases/query/__init__.py index 880ffe806d5f50..e92923b8259763 100644 --- a/src/sentry/replays/usecases/query/__init__.py +++ b/src/sentry/replays/usecases/query/__init__.py @@ -15,7 +15,7 @@ from __future__ import annotations -from collections.abc import Mapping, Sequence +from collections.abc import Iterable, Mapping, Sequence from datetime import datetime, timedelta from typing import Any, cast @@ -152,7 +152,7 @@ def query_using_optimized_search( period_start: datetime, period_stop: datetime, request_user_id: int | None = None, -): +) -> tuple[list, bool]: tenant_id = _make_tenant_id(organization) # Environments is provided to us outside of the ?query= url parameter. It's stil filtered like @@ -358,15 +358,15 @@ def _make_tenant_id(organization: Organization | None) -> dict[str, int]: return {"organization_id": organization.id} -def _make_ordered(replay_ids: list[str], results: Any) -> list[Any]: - if not replay_ids: +def _make_ordered(replay_id_ordering: list[str], results: Iterable[Any]) -> list[Any]: + if not replay_id_ordering: return [] elif not results: return [] replay_id_to_index = {} i = 0 - for replay_id in replay_ids: + for replay_id in replay_id_ordering: if replay_id not in replay_id_to_index: replay_id_to_index[replay_id] = i i += 1 diff --git a/src/sentry/replays/validators.py b/src/sentry/replays/validators.py index d89cd80d9970df..c29f2ee5d17493 100644 --- a/src/sentry/replays/validators.py +++ b/src/sentry/replays/validators.py @@ -3,18 +3,23 @@ VALID_FIELD_SET = ( "activity", "browser", + "clicks", "count_dead_clicks", "count_errors", + "count_infos", "count_rage_clicks", "count_segments", "count_urls", + "count_warnings", "device", "dist", "duration", "environment", "error_ids", "finished_at", + "has_viewed", "id", + "info_ids", "is_archived", "os", "platform", @@ -26,12 +31,7 @@ "trace_ids", "urls", "user", - "clicks", - "info_ids", "warning_ids", - "count_warnings", - "count_infos", - "has_viewed", ) diff --git a/tests/sentry/replays/test_organization_replay_index.py b/tests/sentry/replays/test_organization_replay_index.py index a60d3e22f23736..407fb525ad4e11 100644 --- a/tests/sentry/replays/test_organization_replay_index.py +++ b/tests/sentry/replays/test_organization_replay_index.py @@ -1387,7 +1387,7 @@ def test_get_replays_memory_error(self): with self.feature(REPLAYS_FEATURES): # Invalid field-names error regardless of ordering. with mock.patch( - "sentry.replays.endpoints.organization_replay_index.query_replays_collection_raw", + "sentry.replays.endpoints.organization_replay_index.query_replays_collection_paginated", side_effect=QueryMemoryLimitExceeded("mocked error"), ): response = self.client.get(self.url)