Skip to content

Commit f7d4f6c

Browse files
committed
Update compression stats when merging chunks
Merge compression chunk size stats when merging chunks. The merged chunk's stats is simply the sum of the stats of all compressed chunks that are merged. This ensures that the aggregate stats do not change due to the merge. Still, the stats might not accurately represent the merged chunk since merging both compressed and non-compressed chunks will leave some data uncompressed and this data won't be reflected in the stats of the merged chunk.
1 parent e99a034 commit f7d4f6c

File tree

6 files changed

+280
-2
lines changed

6 files changed

+280
-2
lines changed

.unreleased/pg_7909

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixes: #7909 Update compression stats when merging chunks

src/ts_catalog/compression_chunk_size.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
* LICENSE-APACHE for a copy of the license.
55
*/
66
#include <postgres.h>
7+
#include <access/htup_details.h>
8+
#include <executor/tuptable.h>
79

10+
#include "export.h"
811
#include "scan_iterator.h"
912
#include "scanner.h"
1013
#include "ts_catalog/catalog.h"
@@ -43,3 +46,75 @@ ts_compression_chunk_size_delete(int32 uncompressed_chunk_id)
4346

4447
return count;
4548
}
49+
50+
TSDLLEXPORT bool
51+
ts_compression_chunk_size_get(int32 chunk_id, Form_compression_chunk_size form)
52+
{
53+
ScanIterator iterator =
54+
ts_scan_iterator_create(COMPRESSION_CHUNK_SIZE, AccessExclusiveLock, CurrentMemoryContext);
55+
bool found = false;
56+
57+
Assert(form != NULL);
58+
59+
init_scan_by_uncompressed_chunk_id(&iterator, chunk_id);
60+
ts_scanner_foreach(&iterator)
61+
{
62+
TupleInfo *ti = ts_scan_iterator_tuple_info(&iterator);
63+
bool should_free;
64+
HeapTuple tuple = ts_scanner_fetch_heap_tuple(ti, false, &should_free);
65+
memcpy(form, GETSTRUCT(tuple), sizeof(*form));
66+
found = true;
67+
Assert(form->chunk_id == chunk_id);
68+
69+
if (should_free)
70+
heap_freetuple(tuple);
71+
72+
break;
73+
}
74+
75+
ts_scan_iterator_close(&iterator);
76+
77+
return found;
78+
}
79+
80+
TSDLLEXPORT bool
81+
ts_compression_chunk_size_update(int32 chunk_id, Form_compression_chunk_size form)
82+
{
83+
ScanIterator iterator =
84+
ts_scan_iterator_create(COMPRESSION_CHUNK_SIZE, RowExclusiveLock, CurrentMemoryContext);
85+
bool found = false;
86+
CatalogSecurityContext sec_ctx;
87+
88+
Assert(form != NULL);
89+
90+
init_scan_by_uncompressed_chunk_id(&iterator, chunk_id);
91+
ts_scanner_foreach(&iterator)
92+
{
93+
TupleInfo *ti = ts_scan_iterator_tuple_info(&iterator);
94+
bool should_free;
95+
HeapTuple tuple = ts_scanner_fetch_heap_tuple(ti, false, &should_free);
96+
HeapTuple copy = heap_copytuple(tuple);
97+
Form_compression_chunk_size tupform = (Form_compression_chunk_size) GETSTRUCT(copy);
98+
99+
/* Don't update chunk IDs so copy from existing tuple */
100+
form->chunk_id = tupform->chunk_id;
101+
form->compressed_chunk_id = tupform->compressed_chunk_id;
102+
103+
memcpy(tupform, form, sizeof(FormData_compression_chunk_size));
104+
ts_catalog_database_info_become_owner(ts_catalog_database_info_get(), &sec_ctx);
105+
ts_catalog_update_tid_only(ti->scanrel, ts_scanner_get_tuple_tid(ti), copy);
106+
ts_catalog_restore_user(&sec_ctx);
107+
found = true;
108+
109+
heap_freetuple(copy);
110+
111+
if (should_free)
112+
heap_freetuple(tuple);
113+
114+
break;
115+
}
116+
117+
ts_scan_iterator_close(&iterator);
118+
119+
return found;
120+
}

src/ts_catalog/compression_chunk_size.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,10 @@
88
#include <compat/compat.h>
99
#include <postgres.h>
1010

11+
#include <ts_catalog/catalog.h>
12+
1113
extern TSDLLEXPORT int ts_compression_chunk_size_delete(int32 uncompressed_chunk_id);
14+
extern TSDLLEXPORT bool ts_compression_chunk_size_get(int32 chunk_id,
15+
Form_compression_chunk_size form);
16+
extern TSDLLEXPORT bool ts_compression_chunk_size_update(int32 chunk_id,
17+
Form_compression_chunk_size form);

tsl/src/chunk.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
#include "hypercube.h"
6767
#include "hypertable.h"
6868
#include "hypertable_cache.h"
69+
#include "ts_catalog/catalog.h"
70+
#include "ts_catalog/compression_chunk_size.h"
6971
#include "utils.h"
7072

7173
/* Data in a frozen chunk cannot be modified. So any operation
@@ -216,6 +218,7 @@ typedef struct RelationMergeInfo
216218
{
217219
Oid relid;
218220
struct VacuumCutoffs cutoffs;
221+
FormData_compression_chunk_size ccs;
219222
Chunk *chunk;
220223
Relation rel;
221224
char relpersistence;
@@ -827,6 +830,9 @@ merge_relinfos(RelationMergeInfo *relinfos, int nrelids, int mergeindex)
827830
ExclusiveLock);
828831
Relation new_rel = table_open(new_relid, AccessExclusiveLock);
829832
double total_num_tuples = 0.0;
833+
FormData_compression_chunk_size merged_ccs;
834+
835+
memset(&merged_ccs, 0, sizeof(FormData_compression_chunk_size));
830836

831837
pg17_workaround_init(new_rel, relinfos, nrelids);
832838

@@ -843,6 +849,26 @@ merge_relinfos(RelationMergeInfo *relinfos, int nrelids, int mergeindex)
843849
total_num_tuples += num_tuples;
844850
relinfo->rel = NULL;
845851
}
852+
853+
/*
854+
* Merge compression chunk size stats.
855+
*
856+
* Simply sum up the stats for all compressed relations that are
857+
* merged. Note that we don't add anything for non-compressed
858+
* relations that are merged because they don't have stats. This is a
859+
* bit weird because the data from uncompressed relations will not be
860+
* reflected in the stats of the merged chunk although the data is
861+
* part of the chunk.
862+
*/
863+
merged_ccs.compressed_heap_size += relinfo->ccs.compressed_heap_size;
864+
merged_ccs.compressed_toast_size += relinfo->ccs.compressed_toast_size;
865+
merged_ccs.compressed_index_size += relinfo->ccs.compressed_index_size;
866+
merged_ccs.uncompressed_heap_size += relinfo->ccs.uncompressed_heap_size;
867+
merged_ccs.uncompressed_toast_size += relinfo->ccs.uncompressed_toast_size;
868+
merged_ccs.uncompressed_index_size += relinfo->ccs.uncompressed_index_size;
869+
merged_ccs.numrows_post_compression += relinfo->ccs.numrows_post_compression;
870+
merged_ccs.numrows_pre_compression += relinfo->ccs.numrows_pre_compression;
871+
merged_ccs.numrows_frozen_immediately += relinfo->ccs.numrows_frozen_immediately;
846872
}
847873

848874
pg17_workaround_cleanup(new_rel);
@@ -853,6 +879,22 @@ merge_relinfos(RelationMergeInfo *relinfos, int nrelids, int mergeindex)
853879
table_close(new_rel, NoLock);
854880
table_close(relRelation, RowExclusiveLock);
855881

882+
/*
883+
* Update compression chunk size stats, but only if at least one of the
884+
* merged chunks was compressed. In that case the merged metadata should
885+
* be non-zero.
886+
*/
887+
if (merged_ccs.compressed_heap_size > 0)
888+
{
889+
/*
890+
* The result relation should always be compressed because we pick the
891+
* first compressed one, if one exists.
892+
*/
893+
894+
Assert(result_minfo->ccs.compressed_heap_size > 0);
895+
ts_compression_chunk_size_update(result_minfo->chunk->fd.id, &merged_ccs);
896+
}
897+
856898
return new_relid;
857899
}
858900

@@ -1048,6 +1090,14 @@ chunk_merge_chunks(PG_FUNCTION_ARGS)
10481090

10491091
if (mergeindex == -1)
10501092
mergeindex = i;
1093+
1094+
/* Read compression chunk size stats */
1095+
bool found = ts_compression_chunk_size_get(chunk->fd.id, &relinfo->ccs);
1096+
1097+
if (!found)
1098+
elog(WARNING,
1099+
"missing compression chunk size stats for compressed chunk \"%s\"",
1100+
NameStr(chunk->fd.table_name));
10511101
}
10521102

10531103
if (ts_chunk_is_frozen(chunk))

tsl/test/expected/merge_chunks.out

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,20 @@ select * from chunk_info;
283283
_hyper_1_5_chunk | heap | (("time" >= 'Wed Jan 03 16:00:00 2024 PST'::timestamp with time zone) AND ("time" < 'Thu Jan 04 16:00:00 2024 PST'::timestamp with time zone))
284284
(10 rows)
285285

286+
select * from _timescaledb_catalog.compression_chunk_size order by chunk_id;
287+
chunk_id | compressed_chunk_id | uncompressed_heap_size | uncompressed_toast_size | uncompressed_index_size | compressed_heap_size | compressed_toast_size | compressed_index_size | numrows_pre_compression | numrows_post_compression | numrows_frozen_immediately
288+
----------+---------------------+------------------------+-------------------------+-------------------------+----------------------+-----------------------+-----------------------+-------------------------+--------------------------+----------------------------
289+
1 | 6 | 8192 | 0 | 32768 | 16384 | 8192 | 16384 | 1 | 1 | 1
290+
3 | 7 | 8192 | 0 | 32768 | 16384 | 8192 | 16384 | 1 | 1 | 1
291+
(2 rows)
292+
286293
call merge_chunks('{_timescaledb_internal._hyper_1_1_chunk, _timescaledb_internal._hyper_1_2_chunk, _timescaledb_internal._hyper_1_3_chunk}');
294+
select * from _timescaledb_catalog.compression_chunk_size order by chunk_id;
295+
chunk_id | compressed_chunk_id | uncompressed_heap_size | uncompressed_toast_size | uncompressed_index_size | compressed_heap_size | compressed_toast_size | compressed_index_size | numrows_pre_compression | numrows_post_compression | numrows_frozen_immediately
296+
----------+---------------------+------------------------+-------------------------+-------------------------+----------------------+-----------------------+-----------------------+-------------------------+--------------------------+----------------------------
297+
1 | 6 | 16384 | 0 | 65536 | 32768 | 16384 | 32768 | 2 | 2 | 2
298+
(1 row)
299+
287300
select * from chunk_info;
288301
chunk | tam | checkconstraint
289302
------------------+------+------------------------------------------------------------------------------------------------------------------------------------------------
@@ -627,7 +640,65 @@ select * from partitions;
627640
_hyper_1_14_chunk | device | 1431655764 | 9223372036854775807
628641
(24 rows)
629642

630-
-- Merge all chunks until only 1 remains
643+
-- Show which chunks are compressed. Their compression_chunk_size
644+
-- metadata should be merged.
645+
select chunk_name from timescaledb_information.chunks
646+
where is_compressed=true order by chunk_name;
647+
chunk_name
648+
------------------
649+
_hyper_1_1_chunk
650+
_hyper_1_2_chunk
651+
(2 rows)
652+
653+
--
654+
-- Check that compression_chunk_size stats are also merged when we
655+
-- merge compressed chunks.
656+
--
657+
-- Use a view to compare merged stats against the total sum of that
658+
-- stats for all chunks. There are only two compressed chunks, 1 and
659+
-- 2. Show each chunks stats as the fraction of the total size. This
660+
-- is to make the test work across different architectures that show
661+
-- slightly different absolute disk sizes.
662+
---
663+
select
664+
sum(ccs.uncompressed_heap_size) as total_uncompressed_heap_size,
665+
sum(ccs.uncompressed_toast_size) as total_uncompressed_toast_size,
666+
sum(ccs.uncompressed_index_size) as total_uncompressed_index_size,
667+
sum(ccs.compressed_heap_size) as total_compressed_heap_size,
668+
sum(ccs.compressed_toast_size) as total_compressed_toast_size,
669+
sum(ccs.compressed_index_size) as total_compressed_index_size,
670+
sum(ccs.numrows_pre_compression) as total_numrows_pre_compression,
671+
sum(ccs.numrows_post_compression) as total_numrows_post_compression,
672+
sum(ccs.numrows_frozen_immediately) as total_numrows_frozen_immediately
673+
from _timescaledb_catalog.compression_chunk_size ccs \gset
674+
-- View to show current chunk compression size stats as a fraction of
675+
-- the totals.
676+
create view compression_size_fraction as
677+
select
678+
ccs.chunk_id,
679+
ccs.compressed_chunk_id,
680+
round(ccs.uncompressed_heap_size::numeric / :total_uncompressed_heap_size, 1) as uncompressed_heap_size_fraction,
681+
ccs.uncompressed_toast_size::numeric as uncompressed_toast_size_fraction,
682+
round(ccs.uncompressed_index_size::numeric / :total_uncompressed_index_size, 1) as uncompressed_index_size_fraction,
683+
round(ccs.compressed_heap_size::numeric / :total_compressed_heap_size, 1) as compressed_heap_size_fraction,
684+
round(ccs.compressed_toast_size::numeric / :total_compressed_toast_size, 1) as compressed_toast_size_fraction,
685+
round(ccs.compressed_index_size::numeric / :total_compressed_index_size, 1) as compressed_index_size_fraction,
686+
round(ccs.numrows_pre_compression ::numeric/ :total_numrows_pre_compression, 1) as numrows_pre_compression_fraction,
687+
round(ccs.numrows_post_compression::numeric / :total_numrows_post_compression, 1) as numrows_post_compression_fraction,
688+
round(ccs.numrows_frozen_immediately::numeric / :total_numrows_frozen_immediately, 1) as numrows_frozen_immediately_fraction
689+
from _timescaledb_catalog.compression_chunk_size ccs
690+
order by chunk_id;
691+
--
692+
-- Merge all chunks until only 1 remains. Also check that metadata is
693+
-- merged.
694+
---
695+
select * from compression_size_fraction;
696+
chunk_id | compressed_chunk_id | uncompressed_heap_size_fraction | uncompressed_toast_size_fraction | uncompressed_index_size_fraction | compressed_heap_size_fraction | compressed_toast_size_fraction | compressed_index_size_fraction | numrows_pre_compression_fraction | numrows_post_compression_fraction | numrows_frozen_immediately_fraction
697+
----------+---------------------+---------------------------------+----------------------------------+----------------------------------+-------------------------------+--------------------------------+--------------------------------+----------------------------------+-----------------------------------+-------------------------------------
698+
1 | 17 | 0.5 | 0 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5
699+
2 | 18 | 0.5 | 0 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5
700+
(2 rows)
701+
631702
select count(*), sum(device), round(sum(temp)::numeric, 4) from mergeme;
632703
count | sum | round
633704
--------+---------+---------------
@@ -689,6 +760,15 @@ select * from partitions;
689760
(12 rows)
690761

691762
call merge_chunks(ARRAY['_timescaledb_internal._hyper_1_3_chunk', '_timescaledb_internal._hyper_1_11_chunk','_timescaledb_internal._hyper_1_14_chunk', '_timescaledb_internal._hyper_1_16_chunk']);
763+
-- Final merge, involving the two compressed chunks 1 and 2. The stats
764+
-- should also be merged.
765+
select * from compression_size_fraction;
766+
chunk_id | compressed_chunk_id | uncompressed_heap_size_fraction | uncompressed_toast_size_fraction | uncompressed_index_size_fraction | compressed_heap_size_fraction | compressed_toast_size_fraction | compressed_index_size_fraction | numrows_pre_compression_fraction | numrows_post_compression_fraction | numrows_frozen_immediately_fraction
767+
----------+---------------------+---------------------------------+----------------------------------+----------------------------------+-------------------------------+--------------------------------+--------------------------------+----------------------------------+-----------------------------------+-------------------------------------
768+
1 | 17 | 0.5 | 0 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5
769+
2 | 18 | 0.5 | 0 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5
770+
(2 rows)
771+
692772
select count(*), sum(device), round(sum(temp)::numeric, 4) from mergeme;
693773
count | sum | round
694774
--------+---------+---------------
@@ -707,6 +787,12 @@ select * from partitions;
707787
(6 rows)
708788

709789
call merge_chunks(ARRAY['_timescaledb_internal._hyper_1_3_chunk', '_timescaledb_internal._hyper_1_1_chunk','_timescaledb_internal._hyper_1_2_chunk']);
790+
select * from compression_size_fraction;
791+
chunk_id | compressed_chunk_id | uncompressed_heap_size_fraction | uncompressed_toast_size_fraction | uncompressed_index_size_fraction | compressed_heap_size_fraction | compressed_toast_size_fraction | compressed_index_size_fraction | numrows_pre_compression_fraction | numrows_post_compression_fraction | numrows_frozen_immediately_fraction
792+
----------+---------------------+---------------------------------+----------------------------------+----------------------------------+-------------------------------+--------------------------------+--------------------------------+----------------------------------+-----------------------------------+-------------------------------------
793+
1 | 17 | 1.0 | 0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0
794+
(1 row)
795+
710796
select count(*), sum(device), round(sum(temp)::numeric, 4) from mergeme;
711797
count | sum | round
712798
--------+---------+---------------

0 commit comments

Comments
 (0)