Skip to content

Commit d31700e

Browse files
committed
Collect relation-level stats during compression
During compression, column min/max stats are collected on a per-segment basis for orderby columns and those that have indexes. This change uses the same mechanism to collect relation-level min/max stats to be used by chunk skipping. This avoids, in worst case, an extra full table scan to gather these chunk column stats. For simplicity, stats gathering is enabled for all columns that can support it, even though a column might use neither segment-level stats nor relation-level (chunk column) stats. The overhead of collecting min/max values should be negligible.
1 parent 396caf6 commit d31700e

File tree

10 files changed

+233
-85
lines changed

10 files changed

+233
-85
lines changed

src/ts_catalog/chunk_column_stats.c

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,8 @@ ts_chunk_column_stats_lookup(int32 hypertable_id, int32 chunk_id, const char *co
739739
* updated.
740740
*/
741741
int
742-
ts_chunk_column_stats_calculate(const Hypertable *ht, const Chunk *chunk)
742+
ts_chunk_column_stats_calculate(const Hypertable *ht, const Chunk *chunk,
743+
ChunkColumnStats **statsarray)
743744
{
744745
Size i = 0;
745746
ChunkRangeSpace *rs = ht->range_space;
@@ -755,22 +756,38 @@ ts_chunk_column_stats_calculate(const Hypertable *ht, const Chunk *chunk)
755756

756757
for (int range_index = 0; range_index < rs->num_range_cols; range_index++)
757758
{
758-
Datum minmax[2];
759+
const Form_chunk_column_stats form = &rs->range_cols[range_index];
760+
const ChunkColumnStats *colstats = NULL;
761+
ChunkColumnStats computed_stats;
762+
759763
AttrNumber attno;
760-
char *col_name = NameStr(rs->range_cols[range_index].column_name);
764+
const char *col_name = NameStr(form->column_name);
761765
Oid col_type;
762766

763767
/* Get the attribute number in the HT for this column, and map to the chunk */
768+
/* TODO: fix unnecessary mapping */
764769
attno = get_attnum(ht->main_table_relid, col_name);
765770
attno = ts_map_attno(ht->main_table_relid, chunk->table_id, attno);
766771
col_type = get_atttype(ht->main_table_relid, attno);
767772

773+
if (statsarray)
774+
colstats = statsarray[AttrNumberGetAttrOffset(attno)];
775+
776+
if (NULL == colstats && ts_chunk_get_minmax(chunk->table_id,
777+
col_type,
778+
attno,
779+
"column range",
780+
computed_stats.minmax))
781+
{
782+
colstats = &computed_stats;
783+
}
784+
768785
/* calculate the min/max range for this column on this chunk */
769-
if (ts_chunk_get_minmax(chunk->table_id, col_type, attno, "column range", minmax))
786+
if (colstats)
770787
{
771788
Form_chunk_column_stats range;
772-
int64 min = ts_time_value_to_internal(minmax[0], col_type);
773-
int64 max = ts_time_value_to_internal(minmax[1], col_type);
789+
int64 min = ts_time_value_to_internal(colstats->minmax[0], col_type);
790+
int64 max = ts_time_value_to_internal(colstats->minmax[1], col_type);
774791

775792
/* The end value is exclusive to the range, so incr by 1 */
776793
if (max != DIMENSION_SLICE_MAXVALUE)
@@ -821,7 +838,10 @@ ts_chunk_column_stats_calculate(const Hypertable *ht, const Chunk *chunk)
821838
}
822839
}
823840
else
824-
ereport(WARNING, errmsg("unable to calculate min/max values for column ranges"));
841+
ereport(WARNING,
842+
errmsg("unable to calculate min/max column range for \"%s\" %u",
843+
col_name,
844+
attno));
825845
}
826846

827847
MemoryContextSwitchTo(orig_mcxt);

src/ts_catalog/chunk_column_stats.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ typedef struct ChunkRangeSpace
2323
FormData_chunk_column_stats range_cols[FLEXIBLE_ARRAY_MEMBER];
2424
} ChunkRangeSpace;
2525

26+
typedef struct ChunkColumnStats
27+
{
28+
/* Min and max, in that order */
29+
AttrNumber attnum;
30+
Datum minmax[2];
31+
bool isnull[2];
32+
} ChunkColumnStats;
33+
2634
#define CHUNKRANGESPACE_SIZE(num_columns) \
2735
(sizeof(ChunkRangeSpace) + (sizeof(NameData) * (num_columns)))
2836

@@ -35,7 +43,8 @@ extern int ts_chunk_column_stats_update_by_id(int32 chunk_column_stats_id,
3543
extern Form_chunk_column_stats ts_chunk_column_stats_lookup(int32 hypertable_id, int32 chunk_id,
3644
const char *col_name);
3745

38-
extern TSDLLEXPORT int ts_chunk_column_stats_calculate(const Hypertable *ht, const Chunk *chunk);
46+
extern TSDLLEXPORT int ts_chunk_column_stats_calculate(const Hypertable *ht, const Chunk *chunk,
47+
ChunkColumnStats **statsarray);
3948
extern int ts_chunk_column_stats_insert(const Hypertable *ht, const Chunk *chunk);
4049

4150
extern void ts_chunk_column_stats_drop(const Hypertable *ht, const char *col_name, bool *dropped);

tsl/src/compression/api.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -525,10 +525,15 @@ compress_chunk_impl(Oid hypertable_relid, Oid chunk_relid)
525525
* In the future, we can look at computing min/max entries in the compressed chunk
526526
* using the batch metadata and then recompute the range to handle DELETE cases.
527527
*/
528-
if (cxt.srcht->range_space)
529-
ts_chunk_column_stats_calculate(cxt.srcht, cxt.srcht_chunk);
530528

531529
cstat = compress_chunk(cxt.srcht_chunk->table_id, compress_ht_chunk->table_id, insert_options);
530+
531+
if (cxt.srcht->range_space && cstat.colstats)
532+
{
533+
ts_chunk_column_stats_calculate(cxt.srcht, cxt.srcht_chunk, cstat.colstats);
534+
pfree(cstat.colstats);
535+
}
536+
532537
after_size = ts_relation_size_impl(compress_ht_chunk->table_id);
533538

534539
if (new_compressed_chunk)
@@ -1370,7 +1375,7 @@ recompress_chunk_segmentwise_impl(Chunk *uncompressed_chunk)
13701375
*/
13711376
Hypertable *ht = ts_hypertable_get_by_id(uncompressed_chunk->fd.hypertable_id);
13721377
if (ht->range_space)
1373-
ts_chunk_column_stats_calculate(ht, uncompressed_chunk);
1378+
ts_chunk_column_stats_calculate(ht, uncompressed_chunk, NULL);
13741379

13751380
/*************** tuplesort state *************************/
13761381
Tuplesortstate *segment_tuplesortstate;

tsl/src/compression/compression.c

Lines changed: 77 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <libpq/pqformat.h>
1313
#include <storage/predicate.h>
1414
#include <utils/datum.h>
15+
#include <utils/palloc.h>
1516
#include <utils/snapmgr.h>
1617
#include <utils/syscache.h>
1718
#include <utils/typcache.h>
@@ -34,6 +35,7 @@
3435
#include "segment_meta.h"
3536
#include "ts_catalog/array_utils.h"
3637
#include "ts_catalog/catalog.h"
38+
#include "ts_catalog/chunk_column_stats.h"
3739
#include "ts_catalog/compression_chunk_size.h"
3840
#include "ts_catalog/compression_settings.h"
3941

@@ -287,6 +289,7 @@ compress_chunk(Oid in_table, Oid out_table, int insert_options)
287289

288290
TupleDesc in_desc = RelationGetDescr(in_rel);
289291
TupleDesc out_desc = RelationGetDescr(out_rel);
292+
290293
/* Before calling row compressor relation should be segmented and sorted as configured
291294
* by compress_segmentby and compress_orderby.
292295
* Cost of sorting can be mitigated if we find an existing BTREE index defined for
@@ -494,7 +497,8 @@ compress_chunk(Oid in_table, Oid out_table, int insert_options)
494497
tuplesort_end(sorted_rel);
495498
}
496499

497-
row_compressor_close(&row_compressor);
500+
cstat.colstats = row_compressor_close(&row_compressor);
501+
498502
if (!ts_guc_enable_delete_after_compression)
499503
{
500504
DEBUG_WAITPOINT("compression_done_before_truncate_uncompressed");
@@ -720,6 +724,17 @@ build_column_map(CompressionSettings *settings, Relation uncompressed_table,
720724
bool is_segmentby = ts_array_is_member(settings->fd.segmentby, NameStr(attr->attname));
721725
bool is_orderby = ts_array_is_member(settings->fd.orderby, NameStr(attr->attname));
722726

727+
SegmentMetaMinMaxBuilder *segment_min_max_builder = NULL;
728+
TypeCacheEntry *type = lookup_type_cache(attr->atttypid, TYPECACHE_LT_OPR);
729+
730+
if (OidIsValid(type->lt_opr))
731+
{
732+
/* Always run the min-max builder if the type allows. It is
733+
* useful to collect, e.g., column stats for chunk skipping. */
734+
segment_min_max_builder =
735+
segment_meta_min_max_builder_create(attr->atttypid, attr->attcollation);
736+
}
737+
723738
if (!is_segmentby)
724739
{
725740
if (compressed_column_attr->atttypid != compressed_data_type_oid)
@@ -742,18 +757,6 @@ build_column_map(CompressionSettings *settings, Relation uncompressed_table,
742757
int16 segment_min_attr_offset = segment_min_attr_number - 1;
743758
int16 segment_max_attr_offset = segment_max_attr_number - 1;
744759

745-
SegmentMetaMinMaxBuilder *segment_min_max_builder = NULL;
746-
if (segment_min_attr_number != InvalidAttrNumber ||
747-
segment_max_attr_number != InvalidAttrNumber)
748-
{
749-
Ensure(segment_min_attr_number != InvalidAttrNumber,
750-
"could not find the min metadata column");
751-
Ensure(segment_max_attr_number != InvalidAttrNumber,
752-
"could not find the min metadata column");
753-
segment_min_max_builder =
754-
segment_meta_min_max_builder_create(attr->atttypid, attr->attcollation);
755-
}
756-
757760
Ensure(!is_orderby || segment_min_max_builder != NULL,
758761
"orderby columns must have minmax metadata");
759762

@@ -777,6 +780,7 @@ build_column_map(CompressionSettings *settings, Relation uncompressed_table,
777780
.segmentby_column_index = index,
778781
.min_metadata_attr_offset = -1,
779782
.max_metadata_attr_offset = -1,
783+
.min_max_metadata_builder = segment_min_max_builder,
780784
};
781785
}
782786
}
@@ -965,7 +969,9 @@ row_compressor_append_row(RowCompressor *row_compressor, TupleTableSlot *row)
965969
bool is_null;
966970
Datum val;
967971

968-
/* if there is no compressor, this must be a segmenter, so just skip */
972+
/* if there is no compressor, this must be a segmenter, so just
973+
* skip. Note that, for segmentby columns, min/max stats are updated
974+
* per segment (on flush) for instead of per row. */
969975
if (compressor == NULL)
970976
continue;
971977

@@ -1024,11 +1030,9 @@ row_compressor_flush(RowCompressor *row_compressor, CommandId mycid, bool change
10241030
row_compressor->compressed_values[compressed_col] =
10251031
PointerGetDatum(compressed_data);
10261032

1027-
if (column->min_max_metadata_builder != NULL)
1033+
if (column->min_max_metadata_builder != NULL && column->min_metadata_attr_offset >= 0 &&
1034+
column->max_metadata_attr_offset >= 0)
10281035
{
1029-
Assert(column->min_metadata_attr_offset >= 0);
1030-
Assert(column->max_metadata_attr_offset >= 0);
1031-
10321036
if (!segment_meta_min_max_builder_empty(column->min_max_metadata_builder))
10331037
{
10341038
Assert(compressed_data != NULL);
@@ -1050,6 +1054,17 @@ row_compressor_flush(RowCompressor *row_compressor, CommandId mycid, bool change
10501054
}
10511055
else if (column->segment_info != NULL)
10521056
{
1057+
/* Update min/max for segmentby column. It is done here on flush
1058+
* instead of per row since for the segment the value is always
1059+
* the same. */
1060+
if (column->min_max_metadata_builder != NULL)
1061+
{
1062+
if (column->segment_info->is_null)
1063+
segment_meta_min_max_builder_update_null(column->min_max_metadata_builder);
1064+
else
1065+
segment_meta_min_max_builder_update_val(column->min_max_metadata_builder,
1066+
column->segment_info->val);
1067+
}
10531068
row_compressor->compressed_values[compressed_col] = column->segment_info->val;
10541069
row_compressor->compressed_is_null[compressed_col] = column->segment_info->is_null;
10551070
}
@@ -1091,23 +1106,31 @@ row_compressor_flush(RowCompressor *row_compressor, CommandId mycid, bool change
10911106

10921107
/* don't free the segment-bys if we've overflowed the row, we still need them */
10931108
if (column->segment_info != NULL && !changed_groups)
1109+
{
1110+
/* Still need to reset the min/max builder to save per-column
1111+
* min/max based on per-segment min/max. */
1112+
segment_meta_min_max_builder_reset(column->min_max_metadata_builder);
10941113
continue;
1114+
}
10951115

10961116
if (column->compressor != NULL || !column->segment_info->typ_by_val)
10971117
pfree(DatumGetPointer(row_compressor->compressed_values[compressed_col]));
10981118

10991119
if (column->min_max_metadata_builder != NULL)
11001120
{
1101-
/* segment_meta_min_max_builder_reset will free the values, so clear here */
1102-
if (!row_compressor->compressed_is_null[column->min_metadata_attr_offset])
1121+
/* segment_meta_min_max_builder_reset will free the values, so clear here */
1122+
if (column->min_metadata_attr_offset > 0 && column->max_metadata_attr_offset > 0)
11031123
{
1104-
row_compressor->compressed_values[column->min_metadata_attr_offset] = 0;
1105-
row_compressor->compressed_is_null[column->min_metadata_attr_offset] = true;
1106-
}
1107-
if (!row_compressor->compressed_is_null[column->max_metadata_attr_offset])
1108-
{
1109-
row_compressor->compressed_values[column->max_metadata_attr_offset] = 0;
1110-
row_compressor->compressed_is_null[column->max_metadata_attr_offset] = true;
1124+
if (!row_compressor->compressed_is_null[column->min_metadata_attr_offset])
1125+
{
1126+
row_compressor->compressed_values[column->min_metadata_attr_offset] = 0;
1127+
row_compressor->compressed_is_null[column->min_metadata_attr_offset] = true;
1128+
}
1129+
if (!row_compressor->compressed_is_null[column->max_metadata_attr_offset])
1130+
{
1131+
row_compressor->compressed_values[column->max_metadata_attr_offset] = 0;
1132+
row_compressor->compressed_is_null[column->max_metadata_attr_offset] = true;
1133+
}
11111134
}
11121135
segment_meta_min_max_builder_reset(column->min_max_metadata_builder);
11131136
}
@@ -1133,12 +1156,38 @@ row_compressor_reset(RowCompressor *row_compressor)
11331156
row_compressor->first_iteration = true;
11341157
}
11351158

1136-
void
1159+
ChunkColumnStats **
11371160
row_compressor_close(RowCompressor *row_compressor)
11381161
{
11391162
if (row_compressor->bistate)
11401163
FreeBulkInsertState(row_compressor->bistate);
11411164
CatalogCloseIndexes(row_compressor->resultRelInfo);
1165+
1166+
ChunkColumnStats **colstats =
1167+
palloc(sizeof(ChunkColumnStats *) * row_compressor->n_input_columns);
1168+
1169+
/* Get any relation-level stats (min and max) collected during compression
1170+
* and return it to caller */
1171+
for (int i = 0; i < row_compressor->n_input_columns; i++)
1172+
{
1173+
const PerColumn *column = &row_compressor->per_column[i];
1174+
SegmentMetaMinMaxBuilder *builder = column->min_max_metadata_builder;
1175+
1176+
if (builder && segment_meta_has_relation_stats(builder))
1177+
{
1178+
ChunkColumnStats *colstat = palloc(sizeof(ChunkColumnStats));
1179+
colstat->attnum = AttrOffsetGetAttrNumber(i);
1180+
colstat->minmax[0] = segment_meta_min_max_builder_relation_min(builder);
1181+
colstat->minmax[1] = segment_meta_min_max_builder_relation_max(builder);
1182+
colstats[i] = colstat;
1183+
}
1184+
else
1185+
{
1186+
colstats[i] = NULL;
1187+
}
1188+
}
1189+
1190+
return colstats;
11421191
}
11431192

11441193
/******************

tsl/src/compression/compression.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <fmgr.h>
1212
#include <lib/stringinfo.h>
1313
#include <nodes/execnodes.h>
14+
#include <utils/palloc.h>
1415
#include <utils/relcache.h>
1516

1617
typedef struct BulkInsertStateData *BulkInsertState;
@@ -200,11 +201,14 @@ typedef enum CompressionAlgorithm
200201
_MAX_NUM_COMPRESSION_ALGORITHMS = 128,
201202
} CompressionAlgorithm;
202203

204+
typedef struct ChunkColumnStats ChunkColumnStats;
205+
203206
typedef struct CompressionStats
204207
{
205208
int64 rowcnt_pre_compression;
206209
int64 rowcnt_post_compression;
207210
int64 rowcnt_frozen;
211+
ChunkColumnStats **colstats;
208212
} CompressionStats;
209213

210214
typedef struct PerColumn
@@ -368,7 +372,7 @@ extern void row_compressor_init(CompressionSettings *settings, RowCompressor *ro
368372
int16 num_columns_in_compressed_table, bool need_bistate,
369373
int insert_options);
370374
extern void row_compressor_reset(RowCompressor *row_compressor);
371-
extern void row_compressor_close(RowCompressor *row_compressor);
375+
extern struct ChunkColumnStats **row_compressor_close(RowCompressor *row_compressor);
372376
extern void row_compressor_append_sorted_rows(RowCompressor *row_compressor,
373377
Tuplesortstate *sorted_rel, TupleDesc sorted_desc,
374378
Relation in_rel);

0 commit comments

Comments
 (0)