Skip to content

Commit 3515759

Browse files
authored
migrate index usage and index fragmentation metrics to database metrics (#17374)
* migrate index usage and index fragmentation metrics to database metrics * add changelog * migrate database backup metrics * fix lint * fix lint
1 parent 45f0ac4 commit 3515759

14 files changed

+928
-206
lines changed

sqlserver/changelog.d/17374.added

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Migrate `SQLServer index usage metrics`, `SQLServer database index fragmentation metrics` and `SQLServer database backup metrics` to `database_metrics`.
2+
Increase `SQLServer database index fragmentation metrics` and `SQLServer database backup metrics` default collection interval to 5 mins.

sqlserver/datadog_checks/sqlserver/connection.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from datadog_checks.base import AgentCheck, ConfigurationError
1111
from datadog_checks.base.log import get_check_logger
1212
from datadog_checks.sqlserver.cursor import CommenterCursorWrapper
13+
from datadog_checks.sqlserver.utils import construct_use_statement
1314

1415
try:
1516
import adodbapi
@@ -689,3 +690,29 @@ def test_network_connectivity(self):
689690
return "ERROR: {}".format(e.strerror if hasattr(e, 'strerror') else repr(e))
690691

691692
return None
693+
694+
def _get_current_database_context(self):
695+
"""
696+
Get the current database name.
697+
"""
698+
with self.get_managed_cursor() as cursor:
699+
cursor.execute('select DB_NAME()')
700+
data = cursor.fetchall()
701+
return data[0][0]
702+
703+
@contextmanager
704+
def restore_current_database_context(self):
705+
"""
706+
Restores the default database after executing use statements.
707+
"""
708+
current_db = self._get_current_database_context()
709+
try:
710+
yield
711+
finally:
712+
if current_db:
713+
try:
714+
self.log.debug("Restoring the original database context %s", current_db)
715+
with self.get_managed_cursor() as cursor:
716+
cursor.execute(construct_use_statement(current_db))
717+
except Exception as e:
718+
self.log.error("Failed to switch back to the original database context %s: %s", current_db, e)

sqlserver/datadog_checks/sqlserver/const.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
ENGINE_EDITION_AZURE_SQL_EDGE = 9
2222
ENGINE_EDITION_AZURE_SYNAPSE_SERVERLESS_POOL = 11
2323

24-
DEFAULT_INDEX_USAGE_STATS_INTERVAL = 5 * 60 # 5 minutes
25-
2624
# Keys of the static info cache, used to cache server info which does not change
2725
STATIC_INFO_VERSION = 'version'
2826
STATIC_INFO_MAJOR_VERSION = 'major_version'
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# (C) Datadog, Inc. 2024-present
2+
# All rights reserved
3+
# Licensed under a 3-clause BSD style license (see LICENSE)
4+
from .database_backup_metrics import SqlserverDatabaseBackupMetrics
5+
from .db_fragmentation_metrics import SqlserverDBFragmentationMetrics
6+
from .index_usage_metrics import SqlserverIndexUsageMetrics
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# (C) Datadog, Inc. 2024-present
2+
# All rights reserved
3+
# Licensed under a 3-clause BSD style license (see LICENSE)
4+
5+
6+
from typing import Callable, List, Optional
7+
8+
from datadog_checks.base.log import get_check_logger
9+
from datadog_checks.base.utils.db.core import QueryExecutor
10+
from datadog_checks.sqlserver.const import STATIC_INFO_ENGINE_EDITION, STATIC_INFO_MAJOR_VERSION
11+
12+
13+
class SqlserverDatabaseMetricsBase:
14+
def __init__(
15+
self,
16+
instance_config,
17+
new_query_executor,
18+
server_static_info,
19+
execute_query_handler,
20+
track_operation_time=False,
21+
databases=None,
22+
):
23+
self.instance_config: dict = instance_config
24+
self.server_static_info: dict = server_static_info
25+
self.new_query_executor: Callable[
26+
[List[dict], Callable, Optional[List[str]], Optional[bool]], QueryExecutor
27+
] = new_query_executor
28+
self.execute_query_handler: Callable[[str, Optional[str]], List[tuple]] = execute_query_handler
29+
self.track_operation_time: bool = track_operation_time
30+
self._databases: Optional[List[str]] = databases
31+
self._query_executors: List[QueryExecutor] = []
32+
self.log = get_check_logger()
33+
34+
@property
35+
def major_version(self) -> Optional[int]:
36+
return self.server_static_info.get(STATIC_INFO_MAJOR_VERSION)
37+
38+
@property
39+
def engine_edition(self) -> Optional[int]:
40+
return self.server_static_info.get(STATIC_INFO_ENGINE_EDITION)
41+
42+
@property
43+
def enabled(self) -> bool:
44+
raise NotImplementedError
45+
46+
@property
47+
def queries(self) -> List[dict]:
48+
raise NotImplementedError
49+
50+
@property
51+
def databases(self) -> Optional[List[str]]:
52+
return self._databases
53+
54+
@property
55+
def query_executors(self) -> List[QueryExecutor]:
56+
'''
57+
Returns a list of QueryExecutor objects for the database metrics.
58+
'''
59+
if not self._query_executors:
60+
self._query_executors = self._build_query_executors()
61+
return self._query_executors
62+
63+
def _build_query_executors(self) -> List[QueryExecutor]:
64+
'''
65+
Builds a list of QueryExecutor objects for the database metrics.
66+
'''
67+
executor = self.new_query_executor(
68+
self.queries, executor=self.execute_query_handler, track_operation_time=self.track_operation_time
69+
)
70+
executor.compile_queries()
71+
return [executor]
72+
73+
def __repr__(self) -> str:
74+
return (
75+
f"{self.__class__.__name__}("
76+
f"enabled={self.enabled}, "
77+
f"major_version={self.major_version}, "
78+
f"engine_edition={self.engine_edition})"
79+
)
80+
81+
def metric_names(self) -> List[str]:
82+
'''
83+
Returns a list of metric names for the queries in the database metrics.
84+
Note: This method is used for testing purposes in order to verify that the correct metrics are being collected.
85+
'''
86+
metric_names = []
87+
for query in self.queries:
88+
names = [
89+
"sqlserver." + c["name"]
90+
for c in query["columns"]
91+
if not c["type"].startswith("tag") and c["type"] != "source"
92+
]
93+
if query.get("extras"):
94+
names.extend(
95+
["sqlserver." + e["name"] for e in query["extras"] if e.get("submit_type") or e.get("type")]
96+
)
97+
metric_names.append(names)
98+
return metric_names
99+
100+
def tag_names(self) -> List[str]:
101+
'''
102+
Returns a list of tag names for the queries in the database metrics.
103+
Note: This method is used for testing purposes in order to verify that the correct tags are being collected.
104+
'''
105+
tag_names = []
106+
for query in self.queries:
107+
tag_names.append([c["name"] for c in query["columns"] if c["type"].startswith("tag")])
108+
return tag_names
109+
110+
def execute(self) -> None:
111+
if not self.enabled:
112+
self.log.debug("%s: not enabled, skipping execution", str(self))
113+
return
114+
for query_executor in self.query_executors:
115+
query_executor.execute()
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# (C) Datadog, Inc. 2024-present
2+
# All rights reserved
3+
# Licensed under a 3-clause BSD style license (see LICENSE)
4+
5+
6+
from datadog_checks.sqlserver.utils import is_azure_sql_database
7+
8+
from .base import SqlserverDatabaseMetricsBase
9+
10+
DATABASE_BACKUP_METRICS_QUERY = {
11+
"name": "msdb.dbo.backupset",
12+
"query": """SELECT
13+
sys.databases.name as db,
14+
sys.databases.name as database_name,
15+
count(backup_set_id) as backup_set_id_count
16+
from msdb.dbo.backupset right outer join sys.databases
17+
on sys.databases.name = msdb.dbo.backupset.database_name
18+
group by sys.databases.name
19+
""",
20+
"columns": [
21+
{"name": "db", "type": "tag"},
22+
{"name": "database", "type": "tag"},
23+
{"name": "database.backup_count", "type": "gauge"},
24+
],
25+
}
26+
27+
28+
class SqlserverDatabaseBackupMetrics(SqlserverDatabaseMetricsBase):
29+
# msdb.dbo.backupset
30+
# Contains a row for each backup set. A backup set
31+
# contains the backup from a single, successful backup operation.
32+
# https://docs.microsoft.com/en-us/sql/relational-databases/system-tables/backupset-transact-sql?view=sql-server-ver15
33+
@property
34+
def enabled(self):
35+
if is_azure_sql_database(self.engine_edition):
36+
return False
37+
return True
38+
39+
@property
40+
def _default_collection_interval(self) -> int:
41+
'''
42+
Returns the default interval in seconds at which to collect database backup metrics.
43+
'''
44+
return 5 * 60 # 5 minutes
45+
46+
@property
47+
def collection_interval(self) -> int:
48+
'''
49+
Returns the interval in seconds at which to collect database backup metrics.
50+
Note: The database backup metrics query can be expensive, so it is recommended to set a higher interval.
51+
'''
52+
return int(self.instance_config.get('database_backup_metrics_interval', self._default_collection_interval))
53+
54+
@property
55+
def queries(self):
56+
# make a copy of the query to avoid modifying the original
57+
# in case different instances have different collection intervals
58+
query = DATABASE_BACKUP_METRICS_QUERY.copy()
59+
query['collection_interval'] = self.collection_interval
60+
return [query]
61+
62+
def __repr__(self) -> str:
63+
return (
64+
f"{self.__class__.__name__}("
65+
f"enabled={self.enabled}, "
66+
f"engine_edition={self.engine_edition}, "
67+
f"collection_interval={self.collection_interval})"
68+
)
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
# (C) Datadog, Inc. 2024-present
2+
# All rights reserved
3+
# Licensed under a 3-clause BSD style license (see LICENSE)
4+
5+
import copy
6+
import functools
7+
8+
from datadog_checks.base.config import is_affirmative
9+
from datadog_checks.base.errors import ConfigurationError
10+
11+
from .base import SqlserverDatabaseMetricsBase
12+
13+
DB_FRAGMENTATION_QUERY = {
14+
"name": "sys.dm_db_index_physical_stats",
15+
"query": """SELECT
16+
DB_NAME(DDIPS.database_id) as database_name,
17+
OBJECT_NAME(DDIPS.object_id, DDIPS.database_id) as object_name,
18+
DDIPS.index_id as index_id,
19+
I.name as index_name,
20+
DDIPS.fragment_count as fragment_count,
21+
DDIPS.avg_fragment_size_in_pages as avg_fragment_size_in_pages,
22+
DDIPS.page_count as page_count,
23+
DDIPS.avg_fragmentation_in_percent as avg_fragmentation_in_percent
24+
FROM sys.dm_db_index_physical_stats (DB_ID('{db}'),null,null,null,null) as DDIPS
25+
INNER JOIN sys.indexes as I WITH (NOLOCK) ON I.object_id = DDIPS.object_id
26+
AND DDIPS.index_id = I.index_id
27+
WHERE DDIPS.fragment_count is not null
28+
""",
29+
"columns": [
30+
{"name": "database_name", "type": "tag"},
31+
{"name": "object_name", "type": "tag"},
32+
{"name": "index_id", "type": "tag"},
33+
{"name": "index_name", "type": "tag"},
34+
{"name": "database.fragment_count", "type": "gauge"},
35+
{"name": "database.avg_fragment_size_in_pages", "type": "gauge"},
36+
{"name": "database.index_page_count", "type": "gauge"},
37+
{"name": "database.avg_fragmentation_in_percent", "type": "gauge"},
38+
],
39+
}
40+
41+
42+
class SqlserverDBFragmentationMetrics(SqlserverDatabaseMetricsBase):
43+
# sys.dm_db_index_physical_stats
44+
#
45+
# Returns size and fragmentation information for the data and
46+
# indexes of the specified table or view in SQL Server.
47+
#
48+
# There are reports of this query being very slow for large datasets,
49+
# so debug query timing are included to help monitor it.
50+
# https://dba.stackexchange.com/q/76374
51+
#
52+
# https://docs.microsoft.com/en-us/sql/relational-databases/system-dynamic-management-views/sys-dm-db-index-physical-stats-transact-sql?view=sql-server-ver15
53+
@property
54+
def include_db_fragmentation_metrics(self):
55+
return is_affirmative(self.instance_config.get('include_db_fragmentation_metrics', False))
56+
57+
@property
58+
def include_db_fragmentation_metrics_tempdb(self):
59+
return is_affirmative(self.instance_config.get('include_db_fragmentation_metrics_tempdb', False))
60+
61+
@property
62+
def db_fragmentation_object_names(self):
63+
return self.instance_config.get('db_fragmentation_object_names', [])
64+
65+
@property
66+
def enabled(self):
67+
if not self.include_db_fragmentation_metrics:
68+
return False
69+
return True
70+
71+
@property
72+
def _default_collection_interval(self) -> int:
73+
'''
74+
Returns the default interval in seconds at which to collect database index fragmentation metrics.
75+
'''
76+
return 5 * 60 # 5 minutes
77+
78+
@property
79+
def collection_interval(self) -> int:
80+
'''
81+
Returns the interval in seconds at which to collect database index fragmentation metrics.
82+
Note: The index fragmentation metrics query can be expensive, so it is recommended to set a higher interval.
83+
'''
84+
return int(self.instance_config.get('db_fragmentation_metrics_interval', self._default_collection_interval))
85+
86+
@property
87+
def databases(self):
88+
'''
89+
Returns a list of databases to collect index fragmentation metrics for.
90+
By default, tempdb is excluded.
91+
'''
92+
if not self._databases:
93+
raise ConfigurationError("No databases configured for index usage metrics")
94+
if not self.include_db_fragmentation_metrics_tempdb:
95+
try:
96+
self._databases.remove('tempdb')
97+
except ValueError:
98+
pass
99+
return self._databases
100+
101+
@property
102+
def queries(self):
103+
# make a copy of the query to avoid modifying the original
104+
# in case different instances have different collection intervals
105+
query = DB_FRAGMENTATION_QUERY.copy()
106+
query['collection_interval'] = self.collection_interval
107+
return [query]
108+
109+
def __repr__(self) -> str:
110+
return (
111+
f"{self.__class__.__name__}("
112+
f"enabled={self.enabled}, "
113+
f"include_db_fragmentation_metrics={self.include_db_fragmentation_metrics}, "
114+
f"include_db_fragmentation_metrics_tempdb={self.include_db_fragmentation_metrics_tempdb}, "
115+
f"db_fragmentation_object_names={self.db_fragmentation_object_names}, "
116+
f"collection_interval={self.collection_interval})"
117+
)
118+
119+
def _build_query_executors(self):
120+
executors = []
121+
for database in self.databases:
122+
queries = copy.deepcopy(self.queries)
123+
for query in queries:
124+
query['query'] = query['query'].format(db=database)
125+
if self.db_fragmentation_object_names:
126+
query['query'] += " AND OBJECT_NAME(DDIPS.object_id, DDIPS.database_id) IN ({})".format(
127+
','.join(["'{}'".format(name) for name in self.db_fragmentation_object_names])
128+
)
129+
executor = self.new_query_executor(
130+
queries,
131+
executor=functools.partial(self.execute_query_handler, db=database),
132+
extra_tags=['db:{}'.format(database)],
133+
track_operation_time=self.track_operation_time,
134+
)
135+
executor.compile_queries()
136+
executors.append(executor)
137+
return executors

0 commit comments

Comments
 (0)