116
116
from sentry .utils import json , metrics
117
117
from sentry .utils .cache import cache_key_for_event
118
118
from sentry .utils .canonical import CanonicalKeyDict
119
+ from sentry .utils .circuit_breaker import (
120
+ ERROR_COUNT_CACHE_KEY ,
121
+ CircuitBreakerPassthrough ,
122
+ circuit_breaker_activated ,
123
+ )
119
124
from sentry .utils .dates import to_datetime
120
125
from sentry .utils .event import has_event_minified_stack_trace , has_stacktrace , is_handled
121
126
from sentry .utils .eventuser import EventUser
141
146
142
147
HIGH_SEVERITY_THRESHOLD = 0.1
143
148
149
+ SEER_GLOBAL_RATE_LIMIT_DEFAULT = 20 # 20 requests per second
150
+ SEER_PROJECT_RATE_LIMIT_DEFAULT = 5 # 5 requests per second
151
+ SEER_ERROR_COUNT_KEY = ERROR_COUNT_CACHE_KEY ("sentry.seer.severity-failures" )
152
+
144
153
145
154
@dataclass
146
155
class GroupInfo :
@@ -1829,9 +1838,16 @@ def _create_group(project: Project, event: Event, **group_creation_kwargs: Any)
1829
1838
group_data .setdefault ("metadata" , {}).update (sdk_metadata_from_event (event ))
1830
1839
1831
1840
# add severity to metadata for alert filtering
1832
- group_type = group_creation_kwargs .get ("type" , None )
1833
- severity = _get_severity_metadata_for_group (event , project .id , group_type )
1834
- group_data ["metadata" ].update (severity )
1841
+ try :
1842
+ group_type = group_creation_kwargs .get ("type" , None )
1843
+ severity = _get_severity_metadata_for_group (event , project .id , group_type )
1844
+ group_data ["metadata" ].update (severity )
1845
+ except Exception as e :
1846
+ logger .exception (
1847
+ "Failed to get severity metadata for group" ,
1848
+ repr (e ),
1849
+ extra = {"event_id" : event .event_id },
1850
+ )
1835
1851
1836
1852
if features .has ("projects:issue-priority" , project , actor = None ):
1837
1853
# the kwargs only include priority for non-error issue platform events, which takes precedence.
@@ -2163,7 +2179,10 @@ def _get_severity_metadata_for_group(
2163
2179
from sentry .receivers .rules import PLATFORMS_WITH_PRIORITY_ALERTS
2164
2180
2165
2181
if killswitch_matches_context ("issues.skip-seer-requests" , {"project_id" : event .project_id }):
2166
- logger .warning ("get_severity_metadata_for_group.seer_killswitch_enabled" )
2182
+ logger .warning (
2183
+ "get_severity_metadata_for_group.seer_killswitch_enabled" ,
2184
+ extra = {"event_id" : event .event_id , "project_id" : project_id },
2185
+ )
2167
2186
metrics .incr ("issues.severity.seer_killswitch_enabled" )
2168
2187
return {}
2169
2188
@@ -2180,25 +2199,32 @@ def _get_severity_metadata_for_group(
2180
2199
if not is_supported_platform or not is_error_group :
2181
2200
return {}
2182
2201
2202
+ passthrough_data = options .get (
2203
+ "issues.severity.seer-circuit-breaker-passthrough-limit" ,
2204
+ CircuitBreakerPassthrough (limit = 1 , window = 10 ),
2205
+ )
2206
+ if circuit_breaker_activated ("sentry.seer.severity" , passthrough_data = passthrough_data ):
2207
+ logger .warning (
2208
+ "get_severity_metadata_for_group.circuit_breaker_activated" ,
2209
+ extra = {"event_id" : event .event_id , "project_id" : project_id },
2210
+ )
2211
+ return {}
2212
+
2183
2213
from sentry import ratelimits as ratelimiter
2184
2214
2185
- limit = options .get ("issues.severity.seer-global-rate-limit" , 20 )
2215
+ limit = options .get ("issues.severity.seer-global-rate-limit" , SEER_GLOBAL_RATE_LIMIT_DEFAULT )
2186
2216
if ratelimiter .backend .is_limited (
2187
- "seer:severity-calculation:global-limit" ,
2188
- limit = limit ,
2189
- window = 1 , # starting this out 20 requests per second
2217
+ "seer:severity-calculation:global-limit" , limit = limit , window = 1
2190
2218
):
2191
2219
logger .warning (
2192
2220
"get_severity_metadata_for_group.rate_limited_globally" ,
2193
2221
extra = {"event_id" : event .event_id , "project_id" : project_id },
2194
2222
)
2195
2223
metrics .incr ("issues.severity.rate_limited_globally" )
2196
2224
2197
- limit = options .get ("issues.severity.seer-project-rate-limit" , 5 )
2225
+ limit = options .get ("issues.severity.seer-project-rate-limit" , SEER_PROJECT_RATE_LIMIT_DEFAULT )
2198
2226
if ratelimiter .backend .is_limited (
2199
- f"seer:severity-calculation:{ project_id } " ,
2200
- limit = limit ,
2201
- window = 1 , # starting this out 5 requests per second
2227
+ f"seer:severity-calculation:{ project_id } " , limit = limit , window = 1
2202
2228
):
2203
2229
logger .warning (
2204
2230
"get_severity_metadata_for_group.rate_limited_for_project" ,
@@ -2215,7 +2241,7 @@ def _get_severity_metadata_for_group(
2215
2241
}
2216
2242
except Exception as e :
2217
2243
logger .warning ("Failed to calculate severity score for group" , repr (e ))
2218
-
2244
+ update_severity_error_count ()
2219
2245
return {}
2220
2246
2221
2247
@@ -2259,6 +2285,19 @@ def _get_priority_for_group(severity: Mapping[str, Any], kwargs: Mapping[str, An
2259
2285
return PriorityLevel .MEDIUM
2260
2286
2261
2287
2288
+ def update_severity_error_count (reset = False ) -> None :
2289
+ timeout = 60 * 60 # 1 hour
2290
+ if reset :
2291
+ cache .set (SEER_ERROR_COUNT_KEY , 0 , timeout = timeout )
2292
+ return
2293
+
2294
+ try :
2295
+ cache .incr (SEER_ERROR_COUNT_KEY )
2296
+ cache .touch (SEER_ERROR_COUNT_KEY , timeout = timeout )
2297
+ except ValueError :
2298
+ cache .set (SEER_ERROR_COUNT_KEY , 1 , timeout = timeout )
2299
+
2300
+
2262
2301
def _get_severity_score (event : Event ) -> tuple [float , str ]:
2263
2302
# Short circuit the severity value if we know the event is fatal or info/debug
2264
2303
level = str (event .data .get ("level" , "error" ))
@@ -2344,20 +2383,23 @@ def _get_severity_score(event: Event) -> tuple[float, str]:
2344
2383
extra = logger_data ,
2345
2384
)
2346
2385
reason = "microservice_max_retry"
2386
+ update_severity_error_count ()
2347
2387
except Exception as e :
2348
2388
logger .warning (
2349
2389
"Unable to get severity score from microservice. Got: %s." ,
2350
2390
repr (e ),
2351
2391
extra = logger_data ,
2352
2392
)
2353
2393
reason = "microservice_error"
2394
+ update_severity_error_count ()
2354
2395
else :
2355
2396
logger .info (
2356
2397
"Got severity score of %s for event %s" ,
2357
2398
severity ,
2358
2399
event .data ["event_id" ],
2359
2400
extra = logger_data ,
2360
2401
)
2402
+ update_severity_error_count (reset = True )
2361
2403
2362
2404
return severity , reason
2363
2405
0 commit comments