Skip to content

feat(similarity-embedding): Add endpoint to call backfill script #68751

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 47 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
3af8c26
feat(similarity-embedding): Create backfill script for inserting records
jangjodi Apr 8, 2024
5f9e6ca
fix: Fix query
jangjodi Apr 9, 2024
0d5c986
Merge branch 'master' into jodi/similarity-embeddings-backfill
jangjodi Apr 9, 2024
95a76e7
ref: Add seer timeout, remove script
jangjodi Apr 10, 2024
fb42e6b
ref: Move script back and change to use hash
jangjodi Apr 11, 2024
f19acd1
Merge branch 'master' into jodi/similarity-embeddings-backfill
jangjodi Apr 11, 2024
f33b4d3
fix: Typing and typo
jangjodi Apr 11, 2024
0a2ccb8
feat(similarity-embedding): Add endpoint to call backfill script
jangjodi Apr 11, 2024
c70b217
fix: Typo
jangjodi Apr 11, 2024
b3f5bad
:hammer_and_wrench: apply pre-commit fixes
getsantry[bot] Apr 11, 2024
812cf9e
ref: Rename files, add superuser check
jangjodi Apr 11, 2024
2fbc24c
fix: Typo
jangjodi Apr 12, 2024
799dddd
ref: Change snuba cross tenant id to cross org
jangjodi Apr 12, 2024
168b5d9
fix: Remove unused import
jangjodi Apr 15, 2024
18ab9af
fix: Remove line
jangjodi Apr 15, 2024
c1c5095
:hammer_and_wrench: apply pre-commit fixes
getsantry[bot] Apr 15, 2024
a2fe821
ref: Use metrics wraps and change metadata structure
jangjodi Apr 15, 2024
51851f6
Merge branch 'master' into jodi/similarity-embeddings-backfill
jangjodi May 3, 2024
81558a9
:hammer_and_wrench: apply pre-commit fixes
getsantry[bot] May 3, 2024
00c74e3
ref: Change import
jangjodi May 3, 2024
b02f0dd
fix: Fix call
jangjodi May 3, 2024
81efe15
:hammer_and_wrench: apply pre-commit fixes
getsantry[bot] May 3, 2024
1579982
fix: Merge conflict
jangjodi May 6, 2024
e5234d3
ref: Add seer sleep and seer delete table feature flag
jangjodi May 6, 2024
fed1b77
ref: Make seperate async calls, remove option
jangjodi May 7, 2024
cb09dbf
Merge branch 'master' into jodi/similarity-embeddings-backfill
jangjodi May 7, 2024
3e4ad4d
:hammer_and_wrench: apply pre-commit fixes
getsantry[bot] May 7, 2024
a158726
fix: Remove task
jangjodi May 8, 2024
97bf841
Merge branch 'jodi/similarity-embeddings-backfill' into jodi/record-b…
jangjodi May 8, 2024
f2ed707
fix: Typo
jangjodi May 8, 2024
49a06a0
fix: Delete renamed files
jangjodi May 9, 2024
ec25516
ref: Spawn tasks with delay linearly
jangjodi May 9, 2024
3efc188
ref: Add time limits on task
jangjodi May 9, 2024
ec9a5aa
fix: Typing
jangjodi May 9, 2024
01fbead
Merge branch 'master' into jodi/similarity-embeddings-backfill
jangjodi May 10, 2024
0e5f265
:hammer_and_wrench: apply pre-commit fixes
getsantry[bot] May 10, 2024
ffa4917
fix: Merge and early return
jangjodi May 10, 2024
ad4bb8a
Merge branch 'jodi/similarity-embeddings-backfill' into jodi/record-b…
jangjodi May 10, 2024
de5a5a3
fix: Merge
jangjodi May 10, 2024
5f62163
Merge branch 'jodi/similarity-embeddings-backfill' into jodi/record-b…
jangjodi May 10, 2024
4e90a0b
ref: Change post request to match backfill
jangjodi May 10, 2024
7dda2b6
Merge branch 'master' into jodi/similarity-embeddings-backfill
jangjodi May 14, 2024
2f7885f
ref: Add new feature flag
jangjodi May 14, 2024
e2adbe0
Merge branch 'jodi/similarity-embeddings-backfill' into jodi/record-b…
jangjodi May 14, 2024
116a4d8
Merge branch 'master' into jodi/record-backfill-script-endpoint
jangjodi May 14, 2024
0a35cf6
fix: Feature flag
jangjodi May 14, 2024
24eb106
fix: Merge
jangjodi May 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/sentry/api/endpoints/event_grouping_info.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Any

from django.http import HttpRequest, HttpResponse

Expand All @@ -8,17 +9,20 @@
from sentry.api.base import region_silo_endpoint
from sentry.api.bases.project import ProjectEndpoint
from sentry.api.exceptions import ResourceDoesNotExist
from sentry.eventstore.models import Event
from sentry.grouping.api import GroupingConfigNotFound
from sentry.grouping.variants import PerformanceProblemVariant
from sentry.grouping.variants import BaseVariant, PerformanceProblemVariant
from sentry.models.project import Project
from sentry.utils import json, metrics
from sentry.utils.performance_issues.performance_detection import EventPerformanceProblem

logger = logging.getLogger(__name__)


def get_grouping_info(config_name: str | None, project: Project, event_id: str):
event = eventstore.backend.get_event_by_id(project.id, event_id)
def get_grouping_info(
config_name: str | None, project: Project, event_id: str, event: Event | None = None
):
event = event if event else eventstore.backend.get_event_by_id(project.id, event_id)
if event is None:
raise ResourceDoesNotExist

Expand All @@ -40,7 +44,7 @@ def get_grouping_info(config_name: str | None, project: Project, event_id: str):
# Create a variant for every problem associated with the event
# TODO: Generate more unique keys, in case this event has more than
# one problem of a given type
variants = {
variants: dict[str, BaseVariant] | Any = {
problem.problem.type.slug: PerformanceProblemVariant(problem)
for problem in problems
if problem
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from rest_framework.request import Request
from rest_framework.response import Response

from sentry import features
from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import region_silo_endpoint
from sentry.api.bases.project import ProjectEndpoint
from sentry.auth.superuser import is_active_superuser
from sentry.tasks.backfill_seer_grouping_records import backfill_seer_grouping_records

region_silo_endpoint
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
region_silo_endpoint

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why this keeps re-appearing - it doesn't show up in my code editor locally either



@region_silo_endpoint
class ProjectBackfillSimilarIssuesEmbeddingsRecords(ProjectEndpoint):
owner = ApiOwner.ISSUES
publish_status = {
"POST": ApiPublishStatus.PRIVATE,
}

def post(self, request: Request, project) -> Response:
if not features.has(
"projects:similarity-embeddings-grouping", project
) or not is_active_superuser(request):
return Response(status=404)
backfill_seer_grouping_records.delay(project)
return Response(status=204)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from rest_framework.request import Request
from rest_framework.response import Response

from sentry import features
from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import region_silo_endpoint
from sentry.api.bases.project import ProjectEndpoint
from sentry.tasks.backfill_seer_grouping_records import backfill_seer_grouping_records


@region_silo_endpoint
class ProjectSimilarIssuesEmbeddingsRecords(ProjectEndpoint):
owner = ApiOwner.ISSUES
publish_status = {
"POST": ApiPublishStatus.PRIVATE,
}

def post(self, request: Request, project) -> Response:
if not features.has("projects:similarity-embeddings-grouping", project):
return Response(status=404)
backfill_seer_grouping_records.delay(project)
return Response(status=204)
8 changes: 8 additions & 0 deletions src/sentry/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
OrganizationUnsubscribeIssue,
OrganizationUnsubscribeProject,
)
from sentry.api.endpoints.project_backfill_similar_issues_embeddings_records import (
ProjectBackfillSimilarIssuesEmbeddingsRecords,
)
from sentry.api.endpoints.project_stacktrace_coverage import ProjectStacktraceCoverageEndpoint
from sentry.api.endpoints.project_statistical_detectors import ProjectStatisticalDetectors
from sentry.api.endpoints.release_thresholds.release_threshold import ReleaseThresholdEndpoint
Expand Down Expand Up @@ -2481,6 +2484,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]:
ProjectRuleTaskDetailsEndpoint.as_view(),
name="sentry-api-0-project-rule-task-details",
),
re_path(
r"^(?P<organization_slug>[^\/]+)/(?P<project_slug>[^\/]+)/backfill-similar-embeddings-records/$",
ProjectBackfillSimilarIssuesEmbeddingsRecords.as_view(),
name="sentry-api-0-project-backfill-similar-embeddings-records",
),
re_path(
r"^(?P<organization_slug>[^\/]+)/(?P<project_slug>[^\/]+)/stats/$",
ProjectStatsEndpoint.as_view(),
Expand Down
1 change: 1 addition & 0 deletions src/sentry/conf/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
"sentry.tasks.auto_remove_inbox",
"sentry.tasks.auto_resolve_issues",
"sentry.tasks.backfill_outboxes",
"sentry.tasks.backfill_seer_grouping_records",
"sentry.tasks.beacon",
"sentry.tasks.check_auth",
"sentry.tasks.clear_expired_snoozes",
Expand Down
60 changes: 54 additions & 6 deletions src/sentry/seer/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import logging
from typing import TypedDict

import sentry_sdk
from django.conf import settings
from urllib3 import Retry
from urllib3.exceptions import ReadTimeoutError

from sentry.net.http import connection_from_url
from sentry.utils import json

POST_BULK_GROUPING_RECORDS_TIMEOUT = 10000

logger = logging.getLogger(__name__)


class SeerException(Exception):
pass
Expand Down Expand Up @@ -42,10 +48,6 @@ class BreakpointResponse(TypedDict):

seer_staging_connection_pool = connection_from_url(
settings.SEER_AUTOFIX_URL,
retries=Retry(
total=5,
status_forcelist=[408, 429, 502, 503, 504],
),
timeout=settings.ANOMALY_DETECTION_TIMEOUT,
)

Expand Down Expand Up @@ -117,5 +119,51 @@ def get_similar_issues_embeddings(
try:
return json.loads(response.data.decode("utf-8"))
except AttributeError:
empty_response: SimilarIssuesEmbeddingsResponse = {"responses": []}
return empty_response
return SimilarIssuesEmbeddingsResponse(responses=[])


class CreateGroupingRecordData(TypedDict):
hash: str
project_id: int
message: str


class CreateGroupingRecordsRequest(TypedDict):
group_id_list: list[int]
data: list[CreateGroupingRecordData]
stacktrace_list: list[str]


class BulkCreateGroupingRecordsResponse(TypedDict):
success: bool


def post_bulk_grouping_records(
grouping_records_request: CreateGroupingRecordsRequest,
) -> BulkCreateGroupingRecordsResponse:
"""Call /v0/issues/similar-issues/grouping-record endpoint from seer."""
extra = {
"group_ids": json.dumps(grouping_records_request["group_id_list"]),
"project_id": grouping_records_request["data"][0]["project_id"],
}

try:
response = seer_staging_connection_pool.urlopen(
"POST",
"/v0/issues/similar-issues/grouping-record",
body=json.dumps(grouping_records_request),
headers={"Content-Type": "application/json;charset=utf-8"},
timeout=POST_BULK_GROUPING_RECORDS_TIMEOUT,
)
except ReadTimeoutError:
extra.update({"reason": "ReadTimeoutError", "timeout": POST_BULK_GROUPING_RECORDS_TIMEOUT})
logger.info("seer.post_bulk_grouping_records.failure", extra=extra)
return {"success": False}

if response.status >= 200 and response.status < 300:
logger.info("seer.post_bulk_grouping_records.success", extra=extra)
return json.loads(response.data.decode("utf-8"))
else:
extra.update({"reason": response.reason})
logger.info("seer.post_bulk_grouping_records.failure", extra=extra)
return {"success": False}
1 change: 1 addition & 0 deletions src/sentry/snuba/referrer.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ class Referrer(Enum):
GROUP_FILTER_BY_EVENT_ID = "group.filter_by_event_id"
GROUP_GET_LATEST = "Group.get_latest"
GROUP_UNHANDLED_FLAG = "group.unhandled-flag"
GROUPING_RECORDS_BACKFILL_REFERRER = "getsentry.tasks.backfill_grouping_records"
INCIDENTS_GET_INCIDENT_AGGREGATES_PRIMARY = "incidents.get_incident_aggregates.primary"
INCIDENTS_GET_INCIDENT_AGGREGATES = "incidents.get_incident_aggregates"
IS_ESCALATING_GROUP = "sentry.issues.escalating.is_escalating"
Expand Down
Loading