Skip to content

Commit 3decd6a

Browse files
committed
Use arrow cache only for index scans
Since compressed segments are accessed sequentially during sequence/columnar scans, it is not necessary to cache the decompressed arrow arrays. Instead, use the arrow cache only if an index scan is detected. Also make it possible to manually disable the cache by setting the GUC hypercore_arrow_cache_max_entries to 0.
1 parent 852fa33 commit 3decd6a

File tree

9 files changed

+112
-47
lines changed

9 files changed

+112
-47
lines changed

src/guc.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,12 +1120,13 @@ _guc_init(void)
11201120
/* short_desc= */ "max number of entries in arrow data cache",
11211121
/* long_desc= */
11221122
"The max number of decompressed arrow segments that can be "
1123-
"cached before entries are evicted. This mainly affects the "
1124-
"performance of index scans on the Hypercore TAM "
1125-
"when segments are accessed in non-sequential order.",
1123+
"cached before entries are evicted. This only affects the "
1124+
"performance of index scans when using Hypercore TAM "
1125+
"and segments are accessed in non-sequential order. "
1126+
"Set to 0 to disable the use of the cache.",
11261127
/* valueAddr= */ &ts_guc_hypercore_arrow_cache_max_entries,
11271128
/* bootValue= */ 25000,
1128-
/* minValue= */ 1,
1129+
/* minValue= */ 0,
11291130
/* maxValue= */ INT_MAX,
11301131
/* context= */ PGC_USERSET,
11311132
/* flags= */ 0,

tsl/src/hypercore/arrow_array.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,15 @@ arrow_release_buffers(ArrowArray *array)
141141
arrow_private_release(array);
142142
}
143143

144+
void
145+
arrow_release(ArrowArray *array)
146+
{
147+
if (array->release != NULL)
148+
array->release(array);
149+
150+
pfree(array);
151+
}
152+
144153
/*
145154
* Variable-size primitive layout ArrowArray from decompression iterator.
146155
*/

tsl/src/hypercore/arrow_array.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ extern NullableDatum arrow_get_datum(const ArrowArray *array, Oid typid, int16 t
1515
uint16 index);
1616
extern ArrowArray *arrow_from_compressed(Datum compressed, Oid typid, MemoryContext dest_mcxt,
1717
MemoryContext tmp_mcxt);
18+
extern void arrow_release(ArrowArray *array);

tsl/src/hypercore/arrow_cache.c

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -174,13 +174,7 @@ arrow_cache_clear_entry(ArrowColumnCacheEntry *restrict entry)
174174
if (entry->arrow_arrays[i])
175175
{
176176
ArrowArray *array = entry->arrow_arrays[i];
177-
178-
if (array->release)
179-
{
180-
array->release(array);
181-
array->release = NULL;
182-
}
183-
pfree(array);
177+
arrow_release(array);
184178
entry->arrow_arrays[i] = NULL;
185179
}
186180
}

tsl/src/hypercore/arrow_tts.c

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
* LICENSE-TIMESCALE for a copy of the license.
55
*/
66
#include <postgres.h>
7+
#include "guc.h"
78
#include <access/attnum.h>
89
#include <access/htup_details.h>
910
#include <access/tupdesc.h>
@@ -94,6 +95,7 @@ tts_arrow_init(TupleTableSlot *slot)
9495
aslot->total_row_count = 0;
9596
aslot->referenced_attrs = NULL;
9697
aslot->arrow_qual_result = NULL;
98+
aslot->arrow_arrays = NULL;
9799

98100
/*
99101
* Set up child slots, one for the non-compressed relation and one for the
@@ -112,6 +114,8 @@ tts_arrow_init(TupleTableSlot *slot)
112114
aslot->child_slot = aslot->noncompressed_slot;
113115
aslot->valid_attrs = palloc0(sizeof(bool) * slot->tts_tupleDescriptor->natts);
114116
aslot->segmentby_attrs = palloc0(sizeof(bool) * slot->tts_tupleDescriptor->natts);
117+
aslot->arrow_arrays = palloc0(sizeof(ArrowArray *) * slot->tts_tupleDescriptor->natts);
118+
115119
/* Note that aslot->referenced_attrs is initialized on demand, and not
116120
* here, because NULL is a valid state for referenced_attrs. */
117121
MemoryContextSwitchTo(oldmcxt);
@@ -128,6 +132,24 @@ tts_arrow_init(TupleTableSlot *slot)
128132
GenerationContextCreateCompat(slot->tts_mcxt, "Per-segment memory context", 64 * 1024);
129133
}
130134

135+
static void
136+
clear_arrow_arrays(TupleTableSlot *slot)
137+
{
138+
ArrowTupleTableSlot *aslot = (ArrowTupleTableSlot *) slot;
139+
140+
if (aslot->arrow_arrays)
141+
{
142+
for (int i = 0; i < slot->tts_tupleDescriptor->natts; i++)
143+
{
144+
if (aslot->arrow_arrays[i] != NULL)
145+
{
146+
arrow_release(aslot->arrow_arrays[i]);
147+
aslot->arrow_arrays[i] = NULL;
148+
}
149+
}
150+
}
151+
}
152+
131153
/*
132154
* The release function is called by:
133155
*
@@ -153,6 +175,7 @@ tts_arrow_release(TupleTableSlot *slot)
153175
aslot->compressed_slot = NULL;
154176
aslot->noncompressed_slot = NULL;
155177
aslot->arrow_cache_entry = NULL;
178+
aslot->arrow_arrays = NULL;
156179
}
157180

158181
static void
@@ -269,13 +292,15 @@ tts_arrow_clear(TupleTableSlot *slot)
269292
memset(aslot->valid_attrs, 0, sizeof(bool) * slot->tts_tupleDescriptor->natts);
270293
aslot->arrow_cache_entry = NULL;
271294
aslot->arrow_qual_result = NULL;
295+
clear_arrow_arrays(slot);
272296
MemoryContextReset(aslot->per_segment_mcxt);
273297
}
274298

275299
static inline void
276300
tts_arrow_store_tuple(TupleTableSlot *slot, TupleTableSlot *child_slot, uint16 tuple_index)
277301
{
278302
ArrowTupleTableSlot *aslot = (ArrowTupleTableSlot *) slot;
303+
bool clear_arrow_data = true;
279304

280305
Assert(!TTS_EMPTY(child_slot));
281306
Assert(OidIsValid(slot->tts_tableOid));
@@ -315,6 +340,8 @@ tts_arrow_store_tuple(TupleTableSlot *slot, TupleTableSlot *child_slot, uint16 t
315340

316341
if (!ItemPointerEquals(&decoded_tid, &child_slot->tts_tid))
317342
clear_arrow_parent(slot);
343+
else
344+
clear_arrow_data = false;
318345
}
319346
}
320347

@@ -339,6 +366,10 @@ tts_arrow_store_tuple(TupleTableSlot *slot, TupleTableSlot *child_slot, uint16 t
339366
aslot->child_slot = child_slot;
340367
aslot->tuple_index = tuple_index;
341368
aslot->arrow_cache_entry = NULL;
369+
370+
if (clear_arrow_data)
371+
clear_arrow_arrays(slot);
372+
342373
/* Clear valid attributes */
343374
memset(aslot->valid_attrs, 0, sizeof(bool) * slot->tts_tupleDescriptor->natts);
344375
MemoryContextReset(aslot->per_segment_mcxt);
@@ -462,6 +493,52 @@ is_compressed_col(const TupleDesc tupdesc, AttrNumber attno)
462493
return coltypid == typinfo->type_oid;
463494
}
464495

496+
static inline ArrowArray *
497+
get_arrow_array(ArrowTupleTableSlot *aslot, const int16 attoff)
498+
{
499+
const AttrNumber attnum = AttrOffsetGetAttrNumber(attoff);
500+
TupleTableSlot *slot = &aslot->base.base;
501+
502+
/*
503+
* Only use the arrow array cache if the slot is used in an index scan and
504+
* the cache hasn't been disabled by configuration.
505+
*/
506+
if (aslot->index_attrs != NULL && ts_guc_hypercore_arrow_cache_max_entries > 0)
507+
{
508+
ArrowArray **arrow_arrays = arrow_column_cache_read_one(aslot, attnum);
509+
return arrow_arrays[attoff];
510+
}
511+
512+
Assert(aslot->arrow_arrays);
513+
514+
if (NULL == aslot->arrow_arrays[attoff])
515+
{
516+
const int16 *attrs_offset_map = arrow_slot_get_attribute_offset_map(&aslot->base.base);
517+
const AttrNumber cattno = AttrOffsetGetAttrNumber(attrs_offset_map[attoff]);
518+
const TupleDesc compressed_tupdesc = aslot->compressed_slot->tts_tupleDescriptor;
519+
520+
if (is_compressed_col(compressed_tupdesc, cattno))
521+
{
522+
bool isnull;
523+
Datum value = slot_getattr(aslot->child_slot, cattno, &isnull);
524+
525+
/* Can this ever be NULL? */
526+
if (!isnull)
527+
{
528+
const ArrowColumnCache *acache = &aslot->arrow_cache;
529+
const TupleDesc tupdesc = slot->tts_tupleDescriptor;
530+
const Form_pg_attribute attr = TupleDescAttr(tupdesc, attoff);
531+
aslot->arrow_arrays[attoff] = arrow_from_compressed(value,
532+
attr->atttypid,
533+
slot->tts_mcxt,
534+
acache->decompression_mcxt);
535+
}
536+
}
537+
}
538+
539+
return aslot->arrow_arrays[attoff];
540+
}
541+
465542
static pg_attribute_always_inline ArrowArray *
466543
set_attr_value(TupleTableSlot *slot, const int16 attoff)
467544
{
@@ -486,13 +563,11 @@ set_attr_value(TupleTableSlot *slot, const int16 attoff)
486563
}
487564
else
488565
{
489-
const AttrNumber attnum = AttrOffsetGetAttrNumber(attoff);
490-
ArrowArray **arrow_arrays = arrow_column_cache_read_one(aslot, attnum);
491-
492-
arrow_array = arrow_arrays[attoff];
566+
arrow_array = get_arrow_array(aslot, attoff);
493567

494568
if (arrow_array == NULL)
495569
{
570+
const AttrNumber attnum = AttrOffsetGetAttrNumber(attoff);
496571
/* Since the column is not the segment-by column, and there is no
497572
* decompressed data, the column must be NULL. Use the default
498573
* value. */
@@ -506,7 +581,7 @@ set_attr_value(TupleTableSlot *slot, const int16 attoff)
506581
const Oid typid = attr->atttypid;
507582
const int16 typlen = attr->attlen;
508583
const NullableDatum datum =
509-
arrow_get_datum(arrow_arrays[attoff], typid, typlen, aslot->tuple_index - 1);
584+
arrow_get_datum(arrow_array, typid, typlen, aslot->tuple_index - 1);
510585
slot->tts_values[attoff] = datum.value;
511586
slot->tts_isnull[attoff] = datum.isnull;
512587
}
@@ -777,7 +852,6 @@ arrow_slot_get_array(TupleTableSlot *slot, AttrNumber attno)
777852
{
778853
ArrowTupleTableSlot *aslot = (ArrowTupleTableSlot *) slot;
779854
const int attoff = AttrNumberGetAttrOffset(attno);
780-
ArrowArray **arrow_arrays;
781855

782856
TS_DEBUG_LOG("attno: %d, tuple_index: %d", attno, aslot->tuple_index);
783857

@@ -800,8 +874,7 @@ arrow_slot_get_array(TupleTableSlot *slot, AttrNumber attno)
800874
if (!aslot->valid_attrs[attoff])
801875
return set_attr_value(slot, attoff);
802876

803-
arrow_arrays = arrow_column_cache_read_one(aslot, attno);
804-
return arrow_arrays[attoff];
877+
return get_arrow_array(aslot, attoff);
805878
}
806879

807880
/*

tsl/src/hypercore/arrow_tts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ typedef struct ArrowTupleTableSlot
7373
uint16 total_row_count;
7474
ArrowColumnCache arrow_cache;
7575
ArrowColumnCacheEntry *arrow_cache_entry;
76+
ArrowArray **arrow_arrays;
7677
bool *referenced_attrs;
7778
bool *segmentby_attrs;
7879
bool *valid_attrs; /* Per-column validity up to "tts_nvalid" */

tsl/test/expected/hypercore_columnar.out

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,7 @@ $$, :'chunk'));
107107
Scankey: (device < 4)
108108
Vectorized Filter: (location = 2)
109109
Rows Removed by Filter: 16
110-
Array: cache misses=N, decompress count=N calls=N
111-
(6 rows)
110+
(5 rows)
112111

113112
-- Save away all data from the chunk so that we can compare.
114113
create table saved as select * from :chunk;
@@ -139,8 +138,7 @@ $$, :'chunk'));
139138
-> Custom Scan (ColumnarScan) on _hyper_I_N_chunk (actual rows=N loops=N)
140139
Vectorized Filter: (humidity > '110'::double precision)
141140
Rows Removed by Filter: 204
142-
Array: cache misses=N, decompress count=N calls=N
143-
(5 rows)
141+
(4 rows)
144142

145143
select count(*) from :chunk where humidity > 110;
146144
count
@@ -159,8 +157,7 @@ $$, :'chunk'));
159157
-> Custom Scan (ColumnarScan) on _hyper_I_N_chunk (actual rows=N loops=N)
160158
Vectorized Filter: (humidity > '50'::double precision)
161159
Rows Removed by Filter: 87
162-
Array: cache misses=N, decompress count=N calls=N
163-
(5 rows)
160+
(4 rows)
164161

165162
select lhs.count, rhs.count
166163
from (select count(*) from :chunk where humidity > 50) lhs,
@@ -191,8 +188,7 @@ $$, :'chunk'));
191188
-> Custom Scan (ColumnarScan) on _hyper_I_N_chunk (actual rows=N loops=N)
192189
Filter: (temp > '50'::numeric)
193190
Rows Removed by Filter: 204
194-
Array: cache misses=N, decompress count=N calls=N
195-
(5 rows)
191+
(4 rows)
196192

197193
select count(*) from :chunk where temp > 50;
198194
count
@@ -210,8 +206,7 @@ $$, :'chunk'));
210206
-> Custom Scan (ColumnarScan) on _hyper_I_N_chunk (actual rows=N loops=N)
211207
Filter: (temp > '20'::numeric)
212208
Rows Removed by Filter: 98
213-
Array: cache misses=N, decompress count=N calls=N
214-
(5 rows)
209+
(4 rows)
215210

216211
select lhs.count, rhs.count
217212
from (select count(*) from :chunk where temp > 20) lhs,
@@ -242,8 +237,7 @@ select count(*) from :chunk where humidity > 40 and temp > 20;
242237
Filter: (temp > '20'::numeric)
243238
Rows Removed by Filter: 132
244239
Vectorized Filter: (humidity > '40'::double precision)
245-
Array: cache misses=30, decompress count=60 calls=165
246-
(6 rows)
240+
(5 rows)
247241

248242
select count(*) from :chunk where humidity > 40 and temp > 20;
249243
count
@@ -272,8 +266,7 @@ $$, :'chunk'));
272266
Rows Removed by Filter: 3
273267
Scankey: (device = 3)
274268
Vectorized Filter: (humidity > '40'::double precision)
275-
Array: cache misses=N, decompress count=N calls=N
276-
(7 rows)
269+
(6 rows)
277270

278271
select count(*) from :chunk where humidity > 40 and temp > 20 and device = 3;
279272
count
@@ -303,8 +296,7 @@ $$, :'chunk'));
303296
-> Seq Scan on _hyper_I_N_chunk (actual rows=N loops=N)
304297
Filter: (device < 4)
305298
Rows Removed by Filter: 184
306-
Array: cache misses=N, decompress count=N calls=N
307-
(8 rows)
299+
(7 rows)
308300

309301
drop table readings;
310302
drop table saved;

tsl/test/expected/hypercore_index_btree.out

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,7 @@ $$, :'chunk1'));
435435
------------------------------------------------------------------------
436436
Custom Scan (ColumnarScan) on _hyper_I_N_chunk (actual rows=N loops=N)
437437
Scankey: ((location_id >= 5) AND (location_id <= 10))
438-
Array: cache misses=N, decompress count=N calls=N
439-
(3 rows)
438+
(2 rows)
440439

441440
-- These should generate decompressions as above, but for all columns.
442441
select explain_analyze_anonymize(format($$

tsl/test/expected/hypercore_scans.out

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -377,8 +377,7 @@ on conflict (location, device, time) do nothing;
377377
-> Custom Scan (ChunkDispatch) (actual rows=8641 loops=1)
378378
-> Subquery Scan on "*SELECT*" (actual rows=8641 loops=1)
379379
-> Function Scan on generate_series t (actual rows=8641 loops=1)
380-
Array: cache misses=2, decompress count=4 calls=4
381-
(10 rows)
380+
(9 rows)
382381

383382
-- This should show values for all columns
384383
explain (analyze, costs off, timing off, summary off, decompress_cache_stats)
@@ -447,8 +446,7 @@ order by time desc;
447446
Scankey: (location = '1'::text)
448447
Vectorized Filter: (location = '1'::text)
449448
Rows Removed by Filter: 113
450-
Array: cache misses=27, decompress count=81 calls=239
451-
(8 rows)
449+
(7 rows)
452450

453451
-- Save the data for comparison with seqscan
454452
create temp table chunk_saved as
@@ -526,8 +524,7 @@ select count(*) from :chunk where location = 1::text;
526524
Scankey: (location = '1'::text)
527525
Vectorized Filter: (location = '1'::text)
528526
Rows Removed by Filter: 113
529-
Array: cache misses=27, decompress count=27 calls=27
530-
(6 rows)
527+
(5 rows)
531528

532529
-- Testing same thing with SeqScan. It still decompresses in the
533530
-- count(*) case, although it shouldn't have to. So, probably an
@@ -541,8 +538,7 @@ select count(*) from :chunk where device = 1;
541538
-> Seq Scan on _hyper_1_1_chunk (actual rows=17 loops=1)
542539
Filter: (device = 1)
543540
Rows Removed by Filter: 392
544-
Array: cache misses=30, decompress count=62 calls=410
545-
(5 rows)
541+
(4 rows)
546542

547543
explain (analyze, costs off, timing off, summary off, decompress_cache_stats)
548544
select device from :chunk where device = 1;
@@ -561,8 +557,7 @@ select count(*) from :chunk where location = 1::text;
561557
-> Seq Scan on _hyper_1_1_chunk (actual rows=89 loops=1)
562558
Filter: (location = '1'::text)
563559
Rows Removed by Filter: 320
564-
Array: cache misses=30, decompress count=62 calls=410
565-
(5 rows)
560+
(4 rows)
566561

567562
-- ColumnarScan declares itself as projection capable. This query
568563
-- would add a Result node on top if ColumnarScan couldn't project.

0 commit comments

Comments
 (0)