[dbm] add optional execution_indicator to rule out false positive query metrics move (#20037)

lu-zhengda · web-flow · commit be13318e266c · 2025-04-16T16:02:06.000Z
* add optional execution_indicator to rule out false positive query metrics move

* add changelog

* update comment

* update changelog

* remove oracle and db2 from comments
diff --git a/datadog_checks_base/changelog.d/20037.added b/datadog_checks_base/changelog.d/20037.added
@@ -0,0 +1 @@
+Added execution indicators to StatementMetrics to filter out false positives from normalized queries being evicted and re-inserted with same call count and slight duration change.
diff --git a/datadog_checks_base/datadog_checks/base/utils/db/statement_metrics.py b/datadog_checks_base/datadog_checks/base/utils/db/statement_metrics.py
@@ -13,9 +13,7 @@ class StatementMetrics:
 
         - Postgres: pg_stat_statements
         - MySQL: performance_schema.events_statements_summary_by_digest
-        - Oracle: V$SQLAREA
         - SQL Server: sys.dm_exec_query_stats
-        - DB2: mon_db_summary
 
     These tables are monotonically increasing, so the metrics are computed from the difference
     in values between check runs.
@@ -24,7 +22,7 @@ class StatementMetrics:
     def __init__(self):
         self._previous_statements = {}
 
-    def compute_derivative_rows(self, rows, metrics, key):
+    def compute_derivative_rows(self, rows, metrics, key, execution_indicators=None):
         """
         Compute the first derivative of column-based metrics for a given set of rows. This function
         takes the difference of the previous check run's values and the current check run's values
@@ -41,10 +39,20 @@ def compute_derivative_rows(self, rows, metrics, key):
         :params rows (_List[dict]_): rows from current check run
         :params metrics (_List[str]_): the metrics to compute for each row
         :params key (_callable_): function for an ID which uniquely identifies a row across runs
+        :params execution_indicators (_List[str]_): list of metrics that must change to consider a query as executed.
+            These are typically metrics that increment only when a query actually executes, such as:
+            - PostgreSQL: 'calls' from pg_stat_statements
+            - MySQL: 'exec_count' from performance_schema.events_statements_summary_by_digest
+            - SQL Server: 'execution_count' from sys.dm_exec_query_stats
+            This helps filter out cases where a normalized query was evicted then re-inserted with same call count
+            (usually 1) and slight duration change. In this case, the new normalized query entry should be treated
+            as the baseline for future diffs.
         :return (_List[dict]_): a list of rows with the first derivative of the metrics
         """
         result = []
         metrics = set(metrics)
+        if execution_indicators:
+            execution_indicators = set(execution_indicators)
 
         merged_rows, dropped_metrics = _merge_duplicate_rows(rows, metrics, key)
         if dropped_metrics:
@@ -69,6 +77,12 @@ def compute_derivative_rows(self, rows, metrics, key):
             # 2. No changes since the previous run: There is no need to store metrics of 0, since that is implied by
             #    the absence of metrics. On any given check run, most rows will have no difference so this optimization
             #    avoids having to send a lot of unnecessary metrics.
+            #
+            # 3. Execution indicators: If execution_indicators is specified, only consider a query as changed if at
+            #    least one of the execution indicator metrics has changed. This helps filter out cases where an old or
+            #    less frequently executed normalized query was evicted due to the stats table being full, and then
+            #    re-inserted to the stats table with a small call count and slight duration change. In this case,
+            #    the new normalized query entry should be treated as the baseline for future diffs.
 
             diffed_row = {k: row[k] - prev[k] if k in metric_columns else row[k] for k in row.keys()}
 
@@ -79,6 +93,12 @@ def compute_derivative_rows(self, rows, metrics, key):
                 # of potentially including truncated rows that exceed previous run counts.
                 continue
 
+            # If execution_indicators is specified, check if any of the execution indicator metrics have changed
+            if execution_indicators:
+                indicator_columns = execution_indicators & metric_columns
+                if not any(diffed_row[k] > 0 for k in indicator_columns):
+                    continue
+
             # No changes to the query; no metric needed
             if all(diffed_row[k] == 0 for k in metric_columns):
                 continue
diff --git a/datadog_checks_base/tests/base/utils/db/test_db_statements.py b/datadog_checks_base/tests/base/utils/db/test_db_statements.py
@@ -303,3 +303,124 @@ def test_compute_derivative_rows_mem_usage(self):
     def test_compute_derivative_rows_benchmark(self, benchmark):
         sm = StatementMetrics()
         benchmark(self.__run_compute_derivative_rows, sm)
+
+    def test_compute_derivative_rows_with_execution_indicators(self):
+        sm = StatementMetrics()
+
+        def key(row):
+            return (row['query'], row['db'], row['user'])
+
+        metrics = ['calls', 'total_time', 'rows']
+        execution_indicators = ['calls']
+
+        # Initial state
+        rows1 = [
+            {'calls': 10, 'total_time': 1000, 'rows': 50, 'query': 'SELECT 1', 'db': 'test', 'user': 'user1'},
+            {'calls': 5, 'total_time': 500, 'rows': 25, 'query': 'SELECT 2', 'db': 'test', 'user': 'user1'},
+        ]
+
+        sm.compute_derivative_rows(rows1, metrics, key=key, execution_indicators=execution_indicators)
+
+        # Second run - only duration changes (should be ignored)
+        rows2 = [
+            {'calls': 10, 'total_time': 1001, 'rows': 50, 'query': 'SELECT 1', 'db': 'test', 'user': 'user1'},
+            {'calls': 5, 'total_time': 501, 'rows': 25, 'query': 'SELECT 2', 'db': 'test', 'user': 'user1'},
+        ]
+        assert [] == sm.compute_derivative_rows(rows2, metrics, key=key, execution_indicators=execution_indicators)
+
+        # Third run - calls change (should be included)
+        rows3 = [
+            {'calls': 11, 'total_time': 1002, 'rows': 51, 'query': 'SELECT 1', 'db': 'test', 'user': 'user1'},
+            {'calls': 5, 'total_time': 502, 'rows': 25, 'query': 'SELECT 2', 'db': 'test', 'user': 'user1'},
+        ]
+        result = sm.compute_derivative_rows(rows3, metrics, key=key, execution_indicators=execution_indicators)
+        assert len(result) == 1
+        assert result[0]['calls'] == 1
+        assert result[0]['total_time'] == 1
+        assert result[0]['rows'] == 1
+
+    def test_compute_derivative_rows_with_multiple_execution_indicators(self):
+        sm = StatementMetrics()
+
+        def key(row):
+            return (row['query'], row['db'], row['user'])
+
+        metrics = ['calls', 'executions', 'total_time', 'rows']
+        execution_indicators = ['calls', 'executions']
+
+        # Initial state
+        rows1 = [
+            {
+                'calls': 10,
+                'executions': 10,
+                'total_time': 1000,
+                'rows': 50,
+                'query': 'SELECT 1',
+                'db': 'test',
+                'user': 'user1',
+            },
+        ]
+
+        sm.compute_derivative_rows(rows1, metrics, key=key, execution_indicators=execution_indicators)
+
+        # Second run - only one execution indicator changes
+        rows2 = [
+            {
+                'calls': 11,
+                'executions': 10,
+                'total_time': 1001,
+                'rows': 50,
+                'query': 'SELECT 1',
+                'db': 'test',
+                'user': 'user1',
+            },
+        ]
+        result = sm.compute_derivative_rows(rows2, metrics, key=key, execution_indicators=execution_indicators)
+        assert len(result) == 1
+        assert result[0]['calls'] == 1
+        assert result[0]['executions'] == 0
+        assert result[0]['total_time'] == 1
+        assert result[0]['rows'] == 0
+
+        # Third run - both execution indicators change
+        rows3 = [
+            {
+                'calls': 12,
+                'executions': 11,
+                'total_time': 1002,
+                'rows': 51,
+                'query': 'SELECT 1',
+                'db': 'test',
+                'user': 'user1',
+            },
+        ]
+        result = sm.compute_derivative_rows(rows3, metrics, key=key, execution_indicators=execution_indicators)
+        assert len(result) == 1
+        assert result[0]['calls'] == 1
+        assert result[0]['executions'] == 1
+        assert result[0]['total_time'] == 1
+        assert result[0]['rows'] == 1
+
+    def test_compute_derivative_rows_with_invalid_execution_indicators(self):
+        sm = StatementMetrics()
+
+        def key(row):
+            return (row['query'], row['db'], row['user'])
+
+        metrics = ['calls', 'total_time', 'rows']
+
+        # Test with empty execution indicators
+        rows1 = [
+            {'calls': 10, 'total_time': 1000, 'rows': 50, 'query': 'SELECT 1', 'db': 'test', 'user': 'user1'},
+        ]
+        rows2 = [
+            {'calls': 11, 'total_time': 1001, 'rows': 51, 'query': 'SELECT 1', 'db': 'test', 'user': 'user1'},
+        ]
+
+        # Empty execution indicators should behave like no execution indicators specified
+        _ = sm.compute_derivative_rows(rows1, metrics, key=key, execution_indicators=[])
+        result = sm.compute_derivative_rows(rows2, metrics, key=key, execution_indicators=[])
+        assert len(result) == 1
+        assert result[0]['calls'] == 1
+        assert result[0]['total_time'] == 1
+        assert result[0]['rows'] == 1

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Added execution indicators to StatementMetrics to filter out false positives from normalized queries being evicted and re-inserted with same call count and slight duration change.`