12
12
#include <libpq/pqformat.h>
13
13
#include <storage/predicate.h>
14
14
#include <utils/datum.h>
15
+ #include <utils/palloc.h>
15
16
#include <utils/snapmgr.h>
16
17
#include <utils/syscache.h>
17
18
#include <utils/typcache.h>
34
35
#include "segment_meta.h"
35
36
#include "ts_catalog/array_utils.h"
36
37
#include "ts_catalog/catalog.h"
38
+ #include "ts_catalog/chunk_column_stats.h"
37
39
#include "ts_catalog/compression_chunk_size.h"
38
40
#include "ts_catalog/compression_settings.h"
39
41
@@ -287,6 +289,7 @@ compress_chunk(Oid in_table, Oid out_table, int insert_options)
287
289
288
290
TupleDesc in_desc = RelationGetDescr (in_rel );
289
291
TupleDesc out_desc = RelationGetDescr (out_rel );
292
+
290
293
/* Before calling row compressor relation should be segmented and sorted as configured
291
294
* by compress_segmentby and compress_orderby.
292
295
* Cost of sorting can be mitigated if we find an existing BTREE index defined for
@@ -494,7 +497,8 @@ compress_chunk(Oid in_table, Oid out_table, int insert_options)
494
497
tuplesort_end (sorted_rel );
495
498
}
496
499
497
- row_compressor_close (& row_compressor );
500
+ cstat .colstats = row_compressor_close (& row_compressor );
501
+
498
502
if (!ts_guc_enable_delete_after_compression )
499
503
{
500
504
DEBUG_WAITPOINT ("compression_done_before_truncate_uncompressed" );
@@ -720,6 +724,17 @@ build_column_map(CompressionSettings *settings, Relation uncompressed_table,
720
724
bool is_segmentby = ts_array_is_member (settings -> fd .segmentby , NameStr (attr -> attname ));
721
725
bool is_orderby = ts_array_is_member (settings -> fd .orderby , NameStr (attr -> attname ));
722
726
727
+ SegmentMetaMinMaxBuilder * segment_min_max_builder = NULL ;
728
+ TypeCacheEntry * type = lookup_type_cache (attr -> atttypid , TYPECACHE_LT_OPR );
729
+
730
+ if (OidIsValid (type -> lt_opr ))
731
+ {
732
+ /* Always run the min-max builder if the type allows. It is
733
+ * useful to collect, e.g., column stats for chunk skipping. */
734
+ segment_min_max_builder =
735
+ segment_meta_min_max_builder_create (attr -> atttypid , attr -> attcollation );
736
+ }
737
+
723
738
if (!is_segmentby )
724
739
{
725
740
if (compressed_column_attr -> atttypid != compressed_data_type_oid )
@@ -742,18 +757,6 @@ build_column_map(CompressionSettings *settings, Relation uncompressed_table,
742
757
int16 segment_min_attr_offset = segment_min_attr_number - 1 ;
743
758
int16 segment_max_attr_offset = segment_max_attr_number - 1 ;
744
759
745
- SegmentMetaMinMaxBuilder * segment_min_max_builder = NULL ;
746
- if (segment_min_attr_number != InvalidAttrNumber ||
747
- segment_max_attr_number != InvalidAttrNumber )
748
- {
749
- Ensure (segment_min_attr_number != InvalidAttrNumber ,
750
- "could not find the min metadata column" );
751
- Ensure (segment_max_attr_number != InvalidAttrNumber ,
752
- "could not find the min metadata column" );
753
- segment_min_max_builder =
754
- segment_meta_min_max_builder_create (attr -> atttypid , attr -> attcollation );
755
- }
756
-
757
760
Ensure (!is_orderby || segment_min_max_builder != NULL ,
758
761
"orderby columns must have minmax metadata" );
759
762
@@ -777,6 +780,7 @@ build_column_map(CompressionSettings *settings, Relation uncompressed_table,
777
780
.segmentby_column_index = index ,
778
781
.min_metadata_attr_offset = -1 ,
779
782
.max_metadata_attr_offset = -1 ,
783
+ .min_max_metadata_builder = segment_min_max_builder ,
780
784
};
781
785
}
782
786
}
@@ -965,7 +969,9 @@ row_compressor_append_row(RowCompressor *row_compressor, TupleTableSlot *row)
965
969
bool is_null ;
966
970
Datum val ;
967
971
968
- /* if there is no compressor, this must be a segmenter, so just skip */
972
+ /* if there is no compressor, this must be a segmenter, so just
973
+ * skip. Note that, for segmentby columns, min/max stats are updated
974
+ * per segment (on flush) for instead of per row. */
969
975
if (compressor == NULL )
970
976
continue ;
971
977
@@ -1024,11 +1030,9 @@ row_compressor_flush(RowCompressor *row_compressor, CommandId mycid, bool change
1024
1030
row_compressor -> compressed_values [compressed_col ] =
1025
1031
PointerGetDatum (compressed_data );
1026
1032
1027
- if (column -> min_max_metadata_builder != NULL )
1033
+ if (column -> min_max_metadata_builder != NULL && column -> min_metadata_attr_offset >= 0 &&
1034
+ column -> max_metadata_attr_offset >= 0 )
1028
1035
{
1029
- Assert (column -> min_metadata_attr_offset >= 0 );
1030
- Assert (column -> max_metadata_attr_offset >= 0 );
1031
-
1032
1036
if (!segment_meta_min_max_builder_empty (column -> min_max_metadata_builder ))
1033
1037
{
1034
1038
Assert (compressed_data != NULL );
@@ -1050,6 +1054,17 @@ row_compressor_flush(RowCompressor *row_compressor, CommandId mycid, bool change
1050
1054
}
1051
1055
else if (column -> segment_info != NULL )
1052
1056
{
1057
+ /* Update min/max for segmentby column. It is done here on flush
1058
+ * instead of per row since for the segment the value is always
1059
+ * the same. */
1060
+ if (column -> min_max_metadata_builder != NULL )
1061
+ {
1062
+ if (column -> segment_info -> is_null )
1063
+ segment_meta_min_max_builder_update_null (column -> min_max_metadata_builder );
1064
+ else
1065
+ segment_meta_min_max_builder_update_val (column -> min_max_metadata_builder ,
1066
+ column -> segment_info -> val );
1067
+ }
1053
1068
row_compressor -> compressed_values [compressed_col ] = column -> segment_info -> val ;
1054
1069
row_compressor -> compressed_is_null [compressed_col ] = column -> segment_info -> is_null ;
1055
1070
}
@@ -1091,23 +1106,31 @@ row_compressor_flush(RowCompressor *row_compressor, CommandId mycid, bool change
1091
1106
1092
1107
/* don't free the segment-bys if we've overflowed the row, we still need them */
1093
1108
if (column -> segment_info != NULL && !changed_groups )
1109
+ {
1110
+ /* Still need to reset the min/max builder to save per-column
1111
+ * min/max based on per-segment min/max. */
1112
+ segment_meta_min_max_builder_reset (column -> min_max_metadata_builder );
1094
1113
continue ;
1114
+ }
1095
1115
1096
1116
if (column -> compressor != NULL || !column -> segment_info -> typ_by_val )
1097
1117
pfree (DatumGetPointer (row_compressor -> compressed_values [compressed_col ]));
1098
1118
1099
1119
if (column -> min_max_metadata_builder != NULL )
1100
1120
{
1101
- /* segment_meta_min_max_builder_reset will free the values, so clear here */
1102
- if (! row_compressor -> compressed_is_null [ column -> min_metadata_attr_offset ] )
1121
+ /* segment_meta_min_max_builder_reset will free the values, so clear here */
1122
+ if (column -> min_metadata_attr_offset > 0 && column -> max_metadata_attr_offset > 0 )
1103
1123
{
1104
- row_compressor -> compressed_values [column -> min_metadata_attr_offset ] = 0 ;
1105
- row_compressor -> compressed_is_null [column -> min_metadata_attr_offset ] = true;
1106
- }
1107
- if (!row_compressor -> compressed_is_null [column -> max_metadata_attr_offset ])
1108
- {
1109
- row_compressor -> compressed_values [column -> max_metadata_attr_offset ] = 0 ;
1110
- row_compressor -> compressed_is_null [column -> max_metadata_attr_offset ] = true;
1124
+ if (!row_compressor -> compressed_is_null [column -> min_metadata_attr_offset ])
1125
+ {
1126
+ row_compressor -> compressed_values [column -> min_metadata_attr_offset ] = 0 ;
1127
+ row_compressor -> compressed_is_null [column -> min_metadata_attr_offset ] = true;
1128
+ }
1129
+ if (!row_compressor -> compressed_is_null [column -> max_metadata_attr_offset ])
1130
+ {
1131
+ row_compressor -> compressed_values [column -> max_metadata_attr_offset ] = 0 ;
1132
+ row_compressor -> compressed_is_null [column -> max_metadata_attr_offset ] = true;
1133
+ }
1111
1134
}
1112
1135
segment_meta_min_max_builder_reset (column -> min_max_metadata_builder );
1113
1136
}
@@ -1133,12 +1156,38 @@ row_compressor_reset(RowCompressor *row_compressor)
1133
1156
row_compressor -> first_iteration = true;
1134
1157
}
1135
1158
1136
- void
1159
+ ChunkColumnStats * *
1137
1160
row_compressor_close (RowCompressor * row_compressor )
1138
1161
{
1139
1162
if (row_compressor -> bistate )
1140
1163
FreeBulkInsertState (row_compressor -> bistate );
1141
1164
CatalogCloseIndexes (row_compressor -> resultRelInfo );
1165
+
1166
+ ChunkColumnStats * * colstats =
1167
+ palloc (sizeof (ChunkColumnStats * ) * row_compressor -> n_input_columns );
1168
+
1169
+ /* Get any relation-level stats (min and max) collected during compression
1170
+ * and return it to caller */
1171
+ for (int i = 0 ; i < row_compressor -> n_input_columns ; i ++ )
1172
+ {
1173
+ const PerColumn * column = & row_compressor -> per_column [i ];
1174
+ SegmentMetaMinMaxBuilder * builder = column -> min_max_metadata_builder ;
1175
+
1176
+ if (builder && segment_meta_has_relation_stats (builder ))
1177
+ {
1178
+ ChunkColumnStats * colstat = palloc (sizeof (ChunkColumnStats ));
1179
+ colstat -> attnum = AttrOffsetGetAttrNumber (i );
1180
+ colstat -> minmax [0 ] = segment_meta_min_max_builder_relation_min (builder );
1181
+ colstat -> minmax [1 ] = segment_meta_min_max_builder_relation_max (builder );
1182
+ colstats [i ] = colstat ;
1183
+ }
1184
+ else
1185
+ {
1186
+ colstats [i ] = NULL ;
1187
+ }
1188
+ }
1189
+
1190
+ return colstats ;
1142
1191
}
1143
1192
1144
1193
/******************
0 commit comments