Skip to content

Commit 9b43811

Browse files
authored
fix: allow EXPLAIN on multi-statement SQL beginning with SET (#20106)
* feat: allow EXPLAIN on multi-statement SQL beginning with SET SQL strings containing multiple statements cause problems with the ex- isting explain logic. This change adds a regex to strip an arbitrary number of `SET` commands from the head of all incoming SQL, such that the eventual `EXPLAIN` will be performed on just the trailing SQL. This doesn't fully address all multi-statement SQL strings: in part- icular clients might send e.g. both a SELECT and an UPDATE in the same string. As before, this would be passed as-is to the EXPLAIN machinery. Refs: DBMON-2626 * Add changelog entry for new EXPLAIN behavior * Simplified regex * Address code review feedback * This is more of a fix than feature * Add integration tests, fix logic * Refactor * Formatting fix * Handle leading comment
1 parent 15c5db2 commit 9b43811

File tree

5 files changed

+114
-5
lines changed

5 files changed

+114
-5
lines changed

postgres/changelog.d/20106.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow EXPLAIN on multi-statement SQL where one or more SET commands appear before another supported statement type

postgres/datadog_checks/postgres/statement_samples.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from datadog_checks.base.utils.tracking import tracked_method
3333
from datadog_checks.postgres.explain_parameterized_queries import ExplainParameterizedQueries
3434

35-
from .util import DatabaseConfigurationError, DBExplainError, warning_with_tags
35+
from .util import DatabaseConfigurationError, DBExplainError, trim_leading_set_stmts, warning_with_tags
3636
from .version_utils import V9_6, V10
3737

3838
# according to https://unicodebook.readthedocs.io/unicode_encodings.html, the max supported size of a UTF-8 encoded
@@ -749,15 +749,22 @@ def _run_and_track_explain(self, dbname, statement, obfuscated_statement, query_
749749
@tracked_method(agent_check_getter=agent_check_getter)
750750
def _run_explain_safe(self, dbname, statement, obfuscated_statement, query_signature):
751751
# type: (str, str, str, str) -> Tuple[Optional[Dict], Optional[DBExplainError], Optional[str]]
752+
753+
orig_statement = statement
754+
755+
# remove leading SET statements from our SQL
756+
if obfuscated_statement[:3].lower() == "set":
757+
statement = trim_leading_set_stmts(statement)
758+
obfuscated_statement = trim_leading_set_stmts(obfuscated_statement)
759+
752760
if not self._can_explain_statement(obfuscated_statement):
753761
return None, DBExplainError.no_plans_possible, None
754762

755763
track_activity_query_size = self._get_track_activity_query_size()
756764

757-
if (
758-
self._get_truncation_state(track_activity_query_size, statement, query_signature)
759-
== StatementTruncationState.truncated
760-
):
765+
# truncation check is on the original query, not the trimmed version
766+
stmt_trunc = self._get_truncation_state(track_activity_query_size, orig_statement, query_signature)
767+
if stmt_trunc == StatementTruncationState.truncated:
761768
return (
762769
None,
763770
DBExplainError.query_truncated,

postgres/datadog_checks/postgres/util.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# (C) Datadog, Inc. 2019-present
22
# All rights reserved
33
# Licensed under Simplified BSD License (see LICENSE)
4+
import re
45
import string
56
from enum import Enum
67
from typing import Any, List, Tuple # noqa: F401
@@ -130,6 +131,43 @@ def get_list_chunks(lst, n):
130131
yield lst[i : i + n]
131132

132133

134+
SET_TRIM_PATTERN = re.compile(
135+
r"""
136+
^(?:
137+
# match one leading comment
138+
(?:
139+
\s*
140+
/\*
141+
.*?
142+
\*/
143+
)?
144+
# match leading SET commands
145+
\s*SET\b
146+
(?:
147+
[^';]*? | # keywords, integer literals, etc.
148+
(?:'[^']*?')* # single-quoted strings
149+
)+
150+
;
151+
)+
152+
\s*(.+?)$ # actual non-SET cmds
153+
""",
154+
flags=(re.I | re.X),
155+
)
156+
157+
158+
# Expects one or more SQL statements in a string. If the string
159+
# begins with any SET statements, they are removed and the rest
160+
# of the string is returned. Otherwise, the string is returned
161+
# as it was received.
162+
def trim_leading_set_stmts(sql):
163+
match = SET_TRIM_PATTERN.match(sql)
164+
165+
if match:
166+
return match.group(1)
167+
else:
168+
return sql
169+
170+
133171
fmt = PartialFormatter()
134172

135173
AWS_RDS_HOSTNAME_SUFFIX = ".rds.amazonaws.com"

postgres/tests/test_statements.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,44 @@ def test_get_db_explain_setup_state(integration_check, dbm_instance, dbname, exp
583583
failed_explain_test_repeat_count = 5
584584

585585

586+
@pytest.mark.parametrize(
587+
"query",
588+
[
589+
"SELECT * FROM pg_class",
590+
"SET LOCAL datestyle TO postgres; SELECT * FROM pg_class",
591+
],
592+
)
593+
def test_successful_explain(
594+
integration_check,
595+
dbm_instance,
596+
aggregator,
597+
query,
598+
):
599+
dbname = "datadog_test"
600+
# Don't need metrics for this one
601+
dbm_instance['query_metrics']['enabled'] = False
602+
dbm_instance['query_samples']['explain_parameterized_queries'] = False
603+
check = integration_check(dbm_instance)
604+
check._connect()
605+
606+
# run check so all internal state is correctly initialized
607+
run_one_check(check)
608+
609+
# clear out contents of aggregator so we measure only the metrics generated during this specific part of the test
610+
aggregator.reset()
611+
612+
db_explain_error, err = check.statement_samples._get_db_explain_setup_state(dbname)
613+
assert db_explain_error is None
614+
assert err is None
615+
616+
plan, *rest = check.statement_samples._run_and_track_explain(dbname, query, query, query)
617+
assert plan is not None
618+
619+
plan = plan['Plan']
620+
assert plan['Node Type'] == 'Seq Scan'
621+
assert plan['Relation Name'] == 'pg_class'
622+
623+
586624
@pytest.mark.parametrize(
587625
"query,expected_error_tag,explain_function_override,expected_fail_count,skip_on_versions",
588626
[

postgres/tests/test_unit.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,28 @@ def test_database_identifier(pg_instance, template, expected, tags):
201201
pg_instance['tags'] = tags
202202
check = PostgreSql('postgres', {}, [pg_instance])
203203
assert check.database_identifier == expected
204+
205+
206+
@pytest.mark.unit
207+
@pytest.mark.parametrize(
208+
"query,expected_trimmed_query",
209+
[
210+
("SELECT * FROM pg_settings WHERE name = $1", "SELECT * FROM pg_settings WHERE name = $1"),
211+
("SELECT * FROM pg_settings; DELETE FROM pg_settings;", "SELECT * FROM pg_settings; DELETE FROM pg_settings;"),
212+
("SET search_path TO 'my_schema', public; SELECT * FROM pg_settings", "SELECT * FROM pg_settings"),
213+
("SET TIME ZONE 'Europe/Rome'; SELECT * FROM pg_settings", "SELECT * FROM pg_settings"),
214+
(
215+
"SET LOCAL request_id = 1234; SET LOCAL hostname TO 'Bob''s Laptop'; SELECT * FROM pg_settings",
216+
"SELECT * FROM pg_settings",
217+
),
218+
("SET LONG;" * 1024 + "SELECT *;", "SELECT *;"),
219+
("SET " + "'quotable'" * 1024 + "; SELECT *;", "SELECT *;"),
220+
("SET 'l" + "o" * 1024 + "ng'; SELECT *;", "SELECT *;"),
221+
(" /** pl/pgsql **/ SET 'comment'; SELECT *;", "SELECT *;"),
222+
("this isn't SQL", "this isn't SQL"),
223+
("", ""),
224+
],
225+
)
226+
def test_trim_set_stmts(query, expected_trimmed_query):
227+
trimmed_query = util.trim_leading_set_stmts(query)
228+
assert trimmed_query == expected_trimmed_query

0 commit comments

Comments
 (0)