Skip to content

Commit 88af1c4

Browse files
authored
Add metadata check on PG metrics and fix mismatching metadata type (#20324)
Some metrics were sent as rate but declared as gauge in metadata.csv, fix metadata to match how they are submitted. We add additional assertion in common test to check that metadata matches the reported metrics.
1 parent 35b5437 commit 88af1c4

File tree

4 files changed

+25
-13
lines changed

4 files changed

+25
-13
lines changed

postgres/metadata.csv

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ postgresql.analyze.ext_stats_total,gauge,,,,"Number of extended statistics. This
1111
postgresql.analyze.sample_blks_scanned,gauge,,block,,"Number of heap blocks scanned. This metric is tagged with db, table, child_relation, phase.",0,postgres,postgres analyze blks scanned,
1212
postgresql.analyze.sample_blks_total,gauge,,block,,"Total number of heap blocks that will be sampled. This metric is tagged with db, table, child_relation, phase.",0,postgres,postgres analyze blks total,
1313
postgresql.analyzed,count,,,,"Enabled with `relations`. The number of times this table has been manually analyzed. This metric is tagged with db, schema, table.",0,postgres,analyze,
14+
postgresql.archiver.archived_count,count,,,,"Number of WAL files that have been successfully archived.",0,postgres,archived cnt,
15+
postgresql.archiver.failed_count,count,,,,"Number of failed attempts for archiving WAL files.",-1,postgres,failed archived cnt,
1416
postgresql.autoanalyzed,count,,,,"Enabled with `relations`. The number of times this table has been analyzed by the autovacuum daemon. This metric is tagged with db, schema, table.",0,postgres,auto analyze,
1517
postgresql.autovacuumed,count,,,,"Enabled with `relations`. The number of times this table has been vacuumed by the autovacuum daemon. This metric is tagged with db, schema, table.",0,postgres,auto vac,
1618
postgresql.before_xid_wraparound,gauge,,transaction,,The number of transactions that can occur until a transaction wraparound. This metric is tagged with db.,0,postgres,tx before xid wraparound,
@@ -26,7 +28,7 @@ postgresql.bgwriter.sync_time,count,,millisecond,,The total amount of checkpoint
2628
postgresql.bgwriter.write_time,count,,millisecond,,The total amount of checkpoint processing time spent writing files to disk.,0,postgres,bgw wrt time,
2729
postgresql.blk_read_time,count,,millisecond,,Time spent reading data file blocks by backends in this database if track_io_timing is enabled. This metric is tagged with db.,0,postgres,db blk read,
2830
postgresql.blk_write_time,count,,millisecond,,Time spent writing data file blocks by backends in this database if track_io_timing is enabled. This metric is tagged with db.,0,postgres,db blk write,
29-
postgresql.buffer_hit,gauge,,hit,second,"The number of times disk blocks were found in the buffer cache, preventing the need to read from the database. This metric is tagged with db.",1,postgres,buff hit,
31+
postgresql.buffer_hit,rate,,hit,second,"The number of times disk blocks were found in the buffer cache, preventing the need to read from the database. This metric is tagged with db.",1,postgres,buff hit,
3032
postgresql.buffercache.dirty_buffers,gauge,,buffer,,"Number of dirty shared buffers. pg_buffercache extension needs to be installed. This metric is tagged by db, schema and relation.",0,postgres,buffercache dirty buffers,
3133
postgresql.buffercache.pinning_backends,gauge,,,,"Number of backends pinning shared buffers. pg_buffercache extension needs to be installed. This metric is tagged by db, schema and relation.",0,postgres,buffercache pinning backends,
3234
postgresql.buffercache.unused_buffers,gauge,,buffer,,"Number of unused shared buffers. pg_buffercache extension needs to be installed.",0,postgres,buffercache unused buffers,
@@ -39,7 +41,7 @@ postgresql.cluster_vacuum.heap_blks_total,gauge,,block,,"Total number of heap bl
3941
postgresql.cluster_vacuum.heap_tuples_scanned,gauge,,,,"Number of heap tuples scanned. This counter only advances when the phase is seq scanning heap, index scanning heap or writing new heap. Only available with PostgreSQL 12 and newer. This metric is tagged with db, table, command, phase, index.",0,postgres,postgres cluster tuple_scanned,
4042
postgresql.cluster_vacuum.heap_tuples_written,gauge,,,,"Number of heap tuples written. This counter only advances when the phase is seq scanning heap, index scanning heap or writing new heap. Only available with PostgreSQL 12 and newer. This metric is tagged with db, table, command, phase, index.",0,postgres,postgres cluster tuple_written,
4143
postgresql.cluster_vacuum.index_rebuild_count,gauge,,,,"Number of indexes rebuilt. This counter only advances when the phase is rebuilding index. Only available with PostgreSQL 12 and newer. This metric is tagged with db, table, command, phase, index.",0,postgres,postgres cluster rebuild_count,
42-
postgresql.commits,gauge,,transaction,second,The number of transactions that have been committed in this database. This metric is tagged with db.,0,postgres,commits,
44+
postgresql.commits,rate,,transaction,second,The number of transactions that have been committed in this database. This metric is tagged with db.,0,postgres,commits,
4345
postgresql.conflicts.bufferpin,count,,query,,Number of queries in this database that have been canceled due to pinned buffers. Buffer pin conflicts will occur when the walreceiver process tries to apply a buffer cleanup like HOT chain pruning. This require a complete lock of the buffer and any query pinning the buffer will conflict with the cleaning. This metric is tagged with db.,-1,postgres,cfl bufferpin,
4446
postgresql.conflicts.deadlock,count,,query,,"Number of queries in this database that have been canceled due to deadlocks. Deadlock conflicts will happen when the walreceiver tries to apply a buffer like HOT chain pruning. If the conflict takes more than deadlock_timeout seconds, a deadlock check will be triggered and conflicting queries will be canceled until the buffer is unpinned. This metric is tagged with db.",-1,postgres,cfl deadlock,
4547
postgresql.conflicts.lock,count,,query,,Number of queries in this database that have been canceled due to lock timeouts. This will occur when the walreceiver process tries to apply a change requiring an ACCESS EXCLUSIVE lock while a query on the replica is reading the table. The conflicting query will be killed after waiting up to max_standby_streaming_delay seconds. This metric is tagged with db.,-1,postgres,cfl lock,
@@ -62,9 +64,9 @@ postgresql.create_index.tuples_total,gauge,,,,"Total number of tuples to be proc
6264
postgresql.database_size,gauge,,byte,,The disk space used by this database. This metric is tagged with db.,0,postgres,db size,
6365
postgresql.db.count,gauge,,item,,The number of available databases.,0,postgres,db cnt,
6466
postgresql.dead_rows,gauge,,row,,"Enabled with `relations`. The estimated number of dead rows. This metric is tagged with db, schema, table.",0,postgres,dead rows,
65-
postgresql.deadlocks,gauge,,lock,second,The rate of deadlocks detected in this database. This metric is tagged with db.,-1,postgres,deadlocks,
67+
postgresql.deadlocks,rate,,lock,second,The rate of deadlocks detected in this database. This metric is tagged with db.,-1,postgres,deadlocks,
6668
postgresql.deadlocks.count,count,,lock,,The number of deadlocks detected in this database. This metric is tagged with db.,-1,postgres,deadlocks.count,
67-
postgresql.disk_read,gauge,,block,second,The number of disk blocks read in this database. This metric is tagged with db.,0,postgres,disk read,
69+
postgresql.disk_read,rate,,block,second,The number of disk blocks read in this database. This metric is tagged with db.,0,postgres,disk read,
6870
postgresql.function.calls,rate,,,,"Enabled with `collect_function_metrics`. The number of calls made to a function. This metric is tagged with db, schema, function.",0,postgres,postgres_function_calls,
6971
postgresql.function.self_time,rate,,,,"Enabled with `collect_function_metrics`. Total time spent in this function itself, not including other functions called by it. This metric is tagged with db, schema, function.",0,postgres,postgres_function_self_time,
7072
postgresql.function.total_time,rate,,,,"Enabled with `collect_function_metrics`. Total time spent in this function and all other functions called by it. This metric is tagged with db, schema, function.",0,postgres,postgres_function_total_time,
@@ -139,13 +141,13 @@ postgresql.replication_slot.stream_txns,count,,transaction,,"Number of in-progre
139141
postgresql.replication_slot.total_bytes,count,,byte,,"Amount of transaction data decoded for sending transactions to the decoding output plugin while decoding changes from WAL for this slot. Extracted from pg_stat_replication_slots. Only available with PostgreSQL 14 and newer. This metric is tagged with slot_name, slot_type, slot_state.",0,postgres,repslot total_bytes,
140142
postgresql.replication_slot.total_txns,count,,transaction,,"Number of decoded transactions sent to the decoding output plugin for this slot. Extracted from pg_stat_replication_slots. Only available with PostgreSQL 14 and newer. This metric is tagged with slot_name, slot_type, slot_state.",0,postgres,repslot total_txn,
141143
postgresql.replication_slot.xmin_age,gauge,,transaction,,"The age of the oldest transaction that this slot needs the database to retain. Only physical replication slot will have a xmin. Orphaned replication slot (no consumer or consumer is not connected) will prevent the xmin horizon from progressing. This metric is tagged with slot_name, slot_type, slot_persistence, slot_state.",-1,postgres,repslot xmin,
142-
postgresql.rollbacks,gauge,,transaction,second,The number of transactions that have been rolled back in this database. This metric is tagged with db.,-1,postgres,rollbacks,
143-
postgresql.rows_deleted,gauge,,row,second,Enabled with `relations`. The number of rows deleted by queries in this database. This metric is tagged with db.,0,postgres,rows del,
144-
postgresql.rows_fetched,gauge,,row,second,The number of rows fetched by queries in this database. This metric is tagged with db.,0,postgres,rows fetch,
145-
postgresql.rows_hot_updated,gauge,,row,second,"Enabled with `relations`. The number of rows HOT updated, meaning no separate index update was needed. This metric is tagged with db, schema, table.",0,postgres,rows hot updated,
146-
postgresql.rows_inserted,gauge,,row,second,Enabled with `relations`. The number of rows inserted by queries in this database. This metric is tagged with db.,0,postgres,rows insrt,
147-
postgresql.rows_returned,gauge,,row,second,The number of rows returned by queries in this database. This metric is tagged with db.,0,postgres,rows ret,
148-
postgresql.rows_updated,gauge,,row,second,Enabled with `relations`. The number of rows updated by queries in this database. This metric is tagged with db.,0,postgres,rows updt,
144+
postgresql.rollbacks,rate,,transaction,second,The number of transactions that have been rolled back in this database. This metric is tagged with db.,-1,postgres,rollbacks,
145+
postgresql.rows_deleted,rate,,row,second,Enabled with `relations`. The number of rows deleted by queries in this database. This metric is tagged with db.,0,postgres,rows del,
146+
postgresql.rows_fetched,rate,,row,second,The number of rows fetched by queries in this database. This metric is tagged with db.,0,postgres,rows fetch,
147+
postgresql.rows_hot_updated,rate,,row,second,"Enabled with `relations`. The number of rows HOT updated, meaning no separate index update was needed. This metric is tagged with db, schema, table.",0,postgres,rows hot updated,
148+
postgresql.rows_inserted,rate,,row,second,Enabled with `relations`. The number of rows inserted by queries in this database. This metric is tagged with db.,0,postgres,rows insrt,
149+
postgresql.rows_returned,rate,,row,second,The number of rows returned by queries in this database. This metric is tagged with db.,0,postgres,rows ret,
150+
postgresql.rows_updated,rate,,row,second,Enabled with `relations`. The number of rows updated by queries in this database. This metric is tagged with db.,0,postgres,rows updt,
149151
postgresql.running,gauge,,,,The number of instances running.,0,postgres,running,
150152
postgresql.seq_rows_read,gauge,,row,second,"Enabled with `relations`. The number of live rows fetched by sequential scans. This metric is tagged with db, schema, table.",0,postgres,seq rows rd,
151153
postgresql.seq_scans,gauge,,scan,second,"Enabled with `relations`. The number of sequential scans initiated on this table. This metric is tagged with db, schema, table.",0,postgres,seq scans,
@@ -175,8 +177,8 @@ postgresql.subscription.sync_error,count,,,,Number of errors that occurred durin
175177
postgresql.table.count,gauge,,table,,"The number of user tables in this database. This metric is tagged with db, schema.",0,postgres,tbl count,
176178
postgresql.table_bloat,gauge,,percent,,"Enabled with `collect_bloat_metrics`. The estimated percentage of table bloat. This metric is tagged with db, schema, table.",0,postgres,tbloat,
177179
postgresql.table_size,gauge,,byte,,"Enabled with `relations`. The disk space used by the specified table with TOAST data. Free space map and visibility map are not included. This metric is tagged with db, schema, table.",0,postgres,tbl size,
178-
postgresql.temp_bytes,gauge,,byte,second,The amount of data written to temporary files by queries in this database. This metric is tagged with db.,0,postgres,temp bytes,
179-
postgresql.temp_files,gauge,,file,second,The number of temporary files created by queries in this database. This metric is tagged with db.,0,postgres,temp files,
180+
postgresql.temp_bytes,rate,,byte,second,The amount of data written to temporary files by queries in this database. This metric is tagged with db.,0,postgres,temp bytes,
181+
postgresql.temp_files,rate,,file,second,The number of temporary files created by queries in this database. This metric is tagged with db.,0,postgres,temp files,
180182
postgresql.toast.autovacuumed,count,,,,"Enabled with `relations`. The number of times the toast table of a relation has been autovacuumed. This metric is tagged with db, schema, table.",0,postgres,toast autovacuumed,
181183
postgresql.toast.dead_rows,gauge,,,,"Enabled with `relations`. The number of dead rows on the toast table of a relation. This metric is tagged with db, schema, table.",0,postgres,toast dead rows,
182184
postgresql.toast.index_scans,count,,,,"Enabled with `relations`. The number of index scans done on the toast table of a relation. This metric is tagged with db, schema, table.",0,postgres,toast idx scans,

postgres/tests/common.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from datadog_checks.base.stubs.aggregator import normalize_tags
1010
from datadog_checks.dev import get_docker_hostname
1111
from datadog_checks.dev.docker import get_container_ip
12+
from datadog_checks.dev.utils import get_metadata_metrics
1213
from datadog_checks.postgres.util import (
1314
CHECKSUM_METRICS,
1415
NEWER_14_METRICS,
@@ -500,3 +501,8 @@ def check_stat_io_metrics(aggregator, expected_tags, count=1):
500501
]
501502
for metric_name in _iterate_metric_name(STAT_IO_METRICS):
502503
aggregator.assert_metric(metric_name, count=count, tags=expected_stat_io_tags)
504+
505+
506+
def check_metrics_metadata(aggregator):
507+
exclude = ['dd.postgres.operation.time']
508+
aggregator.assert_metrics_using_metadata(get_metadata_metrics(), exclude=exclude)

postgres/tests/test_pg_integration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
check_db_count,
3535
check_file_wal_metrics,
3636
check_logical_replication_slots,
37+
check_metrics_metadata,
3738
check_performance_metrics,
3839
check_physical_replication_slots,
3940
check_slru_metrics,
@@ -97,6 +98,7 @@ def test_common_metrics(aggregator, integration_check, pg_instance, is_aurora):
9798
check_performance_metrics(aggregator, expected_tags=check.debug_stats_kwargs()['tags'], is_aurora=is_aurora)
9899

99100
aggregator.assert_all_metrics_covered()
101+
check_metrics_metadata(aggregator)
100102

101103

102104
def _increase_txid(cur):

postgres/tests/test_pg_replication.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
check_control_metrics,
1818
check_db_count,
1919
check_file_wal_metrics,
20+
check_metrics_metadata,
2021
check_performance_metrics,
2122
check_replication_delay,
2223
check_slru_metrics,
@@ -56,6 +57,7 @@ def test_common_replica_metrics(aggregator, integration_check, metrics_cache_rep
5657
check_performance_metrics(aggregator, expected_tags=check.debug_stats_kwargs()['tags'])
5758

5859
aggregator.assert_all_metrics_covered()
60+
check_metrics_metadata(aggregator)
5961

6062

6163
@requires_over_10

0 commit comments

Comments
 (0)