From bb6ffc0001ea76d5443dde6806ebc10ceb16c5a9 Mon Sep 17 00:00:00 2001 From: heniek Date: Thu, 2 Nov 2023 14:59:36 +0100 Subject: [PATCH] On branch PMM-12650-vacuum-monitoring Changes to be committed: modified: cmd/postgres_exporter/postgres_exporter.go modified: cmd/postgres_exporter/queries.go Adding vacuum monitoring and adjusting pg_stat_activity for better troubleshooting. --- cmd/postgres_exporter/postgres_exporter.go | 54 ++++++++++++++ cmd/postgres_exporter/queries.go | 84 +++++++++++++++++++++- 2 files changed, 136 insertions(+), 2 deletions(-) diff --git a/cmd/postgres_exporter/postgres_exporter.go b/cmd/postgres_exporter/postgres_exporter.go index fa9468925..a5e55aa79 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/cmd/postgres_exporter/postgres_exporter.go @@ -182,6 +182,59 @@ var builtinMetricMaps = map[string]intermediateMetricMap{ true, 0, }, + "pg_autovacuum_workers": { + map[string]ColumnMapping{ + "duration": {GAUGE, "Duration in seconds that autovacuum is running for one table", nil, nil}, + "mode": {LABEL, "Type of vacuum", nil, nil}, + "database": {LABEL, "Name of database", nil, nil}, + "relation": {LABEL, "Vacuumed relation", nil, nil}, + "phase": {LABEL, "Vacuum phase", nil, nil}, + "table_size": {GAUGE, "Table size", nil, nil}, + "total_size": {GAUGE, "Total relation size", nil, nil}, + "scanned": {GAUGE, "Bytes scanned by vacuum", nil, nil}, + "vacuumed": {GAUGE, "Bytes vacuumed by vacuum", nil, nil}, + "scanned_pct": {GAUGE, "Percentage scanned by vacuum", nil, nil}, + "vacuumed_pct": {GAUGE, "Percentage vacuumed by vacuum", nil, nil}, + "index_vacuum_count": {GAUGE, "Count of vacuumed indexes", nil, nil}, + }, + true, + 0, + }, + "pg_table_size": { + map[string]ColumnMapping{ + "table_name": {LABEL, "Table name", nil, nil}, + "bytes": {GAUGE, "Number of dead rows", nil, nil}, + "xid_age": {GAUGE, "Relation xid age", nil, nil}, + }, + true, + 0, + }, + "pg_wraparound": { + map[string]ColumnMapping{ + "oldest_current_xid": {GAUGE, "Oldest txid", nil, nil}, + "percent_towards_wraparound": {GAUGE, "Percentage towards wraparound", nil, nil}, + "percent_towards_emergency_autovacuum": {GAUGE, "Percentage towards emergency autovacuum", nil, nil}, + }, + true, + 0, + }, + "pg_autovacuum_disabled": { + map[string]ColumnMapping{ + "relname": {LABEL, "Table name", nil, nil}, + "xid_age": {GAUGE, "Relation age", nil, nil}, + }, + true, + 0, + }, + "pg_index_size": { + map[string]ColumnMapping{ + "schema": {LABEL, "Table schema", nil, nil}, + "index_name": {LABEL, "Table name", nil, nil}, + "bytes": {GAUGE, "Number of dead rows", nil, nil}, + }, + true, + 0, + }, "pg_stat_replication": { map[string]ColumnMapping{ "procpid": {DISCARD, "Process ID of a WAL sender process", nil, semver.MustParseRange("<9.2.0")}, @@ -254,6 +307,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{ }, "pg_stat_activity": { map[string]ColumnMapping{ + "pid": {LABEL, "Process ID", nil, nil}, "datname": {LABEL, "Name of this database", nil, nil}, "state": {LABEL, "connection state", nil, semver.MustParseRange(">=9.2.0")}, "usename": {LABEL, "Name of the user logged into this backend", nil, nil}, diff --git a/cmd/postgres_exporter/queries.go b/cmd/postgres_exporter/queries.go index 8b9300dc7..bc53cdce0 100644 --- a/cmd/postgres_exporter/queries.go +++ b/cmd/postgres_exporter/queries.go @@ -88,7 +88,85 @@ var queryOverrides = map[string][]OverrideQuery{ `, }, }, - + "pg_autovacuum_workers": { + { + semver.MustParseRange(">=11.0.0"), + ` + SELECT + EXTRACT(EPOCH FROM (now() - a.xact_start))::int AS duration, + CASE WHEN a.query ~*'^autovacuum.*to prevent wraparound' THEN 'wraparound' WHEN a.query ~*'^vacuum' THEN 'user' ELSE 'regular' END AS mode, + p.datname AS database, p.relid::regclass AS relation,p.phase, + p.heap_blks_total * current_setting('block_size')::int AS table_size, + pg_total_relation_size(relid) AS total_size, + p.heap_blks_scanned * current_setting('block_size')::int AS scanned, + p.heap_blks_vacuumed * current_setting('block_size')::int AS vacuumed, + round(100.0 * p.heap_blks_scanned / p.heap_blks_total, 0) AS scanned_pct, + round(100.0 * p.heap_blks_vacuumed / p.heap_blks_total, 0) AS vacuumed_pct, + p.index_vacuum_count + FROM pg_stat_progress_vacuum p JOIN pg_stat_activity a using (pid) + `, + }, + }, + "pg_wraparound": { + { + semver.MustParseRange(">=9.4.0"), + ` + WITH max_age AS ( + SELECT 2000000000 as max_old_xid + , setting AS autovacuum_freeze_max_age + FROM pg_catalog.pg_settings + WHERE name = 'autovacuum_freeze_max_age' ) + , per_database_stats AS ( + SELECT datname + , m.max_old_xid::int + , m.autovacuum_freeze_max_age::int + , age(d.datfrozenxid) AS oldest_current_xid + FROM pg_catalog.pg_database d + JOIN max_age m ON (true) + WHERE d.datallowconn ) + SELECT max(oldest_current_xid) AS oldest_current_xid + , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound + , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovacuum + FROM per_database_stats + `, + }, + }, + "pg_table_size": { + { + semver.MustParseRange(">=9.4.0"), + ` + SELECT nspname||'.'||relname AS table_name,pg_relation_size (C.oid) AS bytes,age(C.relfrozenxid) as xid_age + FROM pg_class C + LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) + WHERE nspname NOT IN ('pg_catalog','information_schema') + AND C.relkind not in ('i','v') + AND nspname !~ '^pg_toast' + `, + }, + }, + "pg_autovacuum_disabled": { + { + semver.MustParseRange(">=9.4.0"), + ` + select n.nspname||'.'||c.relname,age(c.relfrozenxid) as xid_age from pg_class c + LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) + where c.relkind = 'r' and 'autovacuum_enabled=off'=ANY(c.reloptions) + `, + }, + }, + "pg_index_size": { + { + semver.MustParseRange(">=9.4.0"), + ` + SELECT relnamespace as schema,relname AS index_name,pg_relation_size (C .oid) AS bytes + FROM pg_class C + LEFT JOIN pg_namespace N ON (N.oid = C .relnamespace) + WHERE nspname NOT IN ('pg_catalog','information_schema') + AND C .relkind = 'i' + AND nspname !~ '^pg_toast' + `, + }, + }, "pg_replication_slots": { { semver.MustParseRange(">=9.4.0 <10.0.0"), @@ -125,6 +203,7 @@ var queryOverrides = map[string][]OverrideQuery{ SELECT pg_database.datname, tmp.state, + tmp2.pid, tmp2.usename, tmp2.application_name, COALESCE(count,0) as count, @@ -142,12 +221,13 @@ var queryOverrides = map[string][]OverrideQuery{ ( SELECT datname, + pid, state, usename, application_name, count(*) AS count, MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration - FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2 + FROM pg_stat_activity GROUP BY datname,state,usename,application_name,pid) AS tmp2 ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname `, },