Skip to content

Commit

Permalink
Add apply-count parameter (#392)
Browse files Browse the repository at this point in the history
- Add a new apply-count command line parameter to allow tuning of the REDO apply_count variable.
- Check on startup that switch_threshold is not greater or equal to apply_count, which would cause REDO to stop after the first apply_log call (fwiw this bug also exists in 1.5.0, since switch_threshold parameter was added but was not checked against the default APPLY_COUNT of 1000).
- Tuning apply_count might be useful for improving REDO performance on tables with small tuples that receive lots of updates.
- Fixes #393
  • Loading branch information
alex-richman-onesignal authored Apr 15, 2024
1 parent cd7efb4 commit aae9f2f
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 6 deletions.
19 changes: 13 additions & 6 deletions bin/pg_repack.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ const char *PROGRAM_VERSION = "unknown";


/*
* APPLY_COUNT: Number of applied logs per transaction. Larger values
* APPLY_COUNT_DEFAULT: Number of applied logs per transaction. Larger values
* could be faster, but will be long transactions in the REDO phase.
*/
#define APPLY_COUNT 1000
#define APPLY_COUNT_DEFAULT 1000

/* Once we get down to seeing fewer than this many tuples in the
* log table, we'll say that we're ready to perform the switch.
Expand Down Expand Up @@ -258,6 +258,7 @@ static bool no_kill_backend = false; /* abandon when timed-out */
static bool no_superuser_check = false;
static SimpleStringList exclude_extension_list = {NULL, NULL}; /* don't repack tables of these extensions */
static bool error_on_invalid_index = false; /* don't repack when invalid index is found */
static int apply_count = APPLY_COUNT_DEFAULT;
static int switch_threshold = SWITCH_THRESHOLD_DEFAULT;

/* buffer should have at least 11 bytes */
Expand Down Expand Up @@ -288,7 +289,8 @@ static pgut_option options[] =
{ 'b', 'D', "no-kill-backend", &no_kill_backend },
{ 'b', 'k', "no-superuser-check", &no_superuser_check },
{ 'l', 'C', "exclude-extension", &exclude_extension_list },
{ 'b', 2, "error-on-invalid-index", &error_on_invalid_index },
{ 'b', 3, "error-on-invalid-index", &error_on_invalid_index },
{ 'i', 2, "apply-count", &apply_count },
{ 'i', 1, "switch-threshold", &switch_threshold },
{ 0 },
};
Expand All @@ -308,6 +310,10 @@ main(int argc, char *argv[])
(errcode(EINVAL),
errmsg("too many arguments")));

if(switch_threshold >= apply_count)
ereport(ERROR, (errcode(EINVAL),
errmsg("switch_threshold must be less than apply_count")));

check_tablespace();

if (dryrun)
Expand Down Expand Up @@ -1547,10 +1553,10 @@ repack_one_table(repack_table *table, const char *orderby)
*/
for (;;)
{
num = apply_log(connection, table, APPLY_COUNT);
num = apply_log(connection, table, apply_count);

/* We'll keep applying tuples from the log table in batches
* of APPLY_COUNT, until applying a batch of tuples
* of apply_count, until applying a batch of tuples
* (via LIMIT) results in our having applied
* switch_threshold or fewer tuples. We don't want to
* get stuck repetitively applying some small number of tuples
Expand Down Expand Up @@ -2377,5 +2383,6 @@ pgut_help(bool details)
printf(" -k, --no-superuser-check skip superuser checks in client\n");
printf(" -C, --exclude-extension don't repack tables which belong to specific extension\n");
printf(" --error-on-invalid-index don't repack tables which belong to specific extension\n");
printf(" --switch-threshold switch tables when that many tuples are left to catchup\n");
printf(" --apply-count number of tuples to apply in one transaction during replay\n");
printf(" --switch-threshold switch tables when that many tuples are left to catchup\n");
}
1 change: 1 addition & 0 deletions doc/pg_repack.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ Options:
-k, --no-superuser-check skip superuser checks in client
-C, --exclude-extension don't repack tables which belong to specific extension
--error-on-invalid-index don't repack when invalid index is found
--apply-count number of tuples to apply in one trasaction during replay
--switch-threshold switch tables when that many tuples are left to catchup

Connection options:
Expand Down
5 changes: 5 additions & 0 deletions regress/expected/repack-check.out
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,11 @@ INFO: repacking index "public.child_b_2_pkey"
INFO: repacking indexes of "public.parent_b"
INFO: repacking index "public.parent_b_pkey"
--
-- Apply count
--
\! pg_repack --dbname=contrib_regression --table=tbl_cluster --apply-count 1234
INFO: repacking table "public.tbl_cluster"
--
-- Switch threshold
--
\! pg_repack --dbname=contrib_regression --table=tbl_cluster --switch-threshold 200
Expand Down
4 changes: 4 additions & 0 deletions regress/sql/repack-check.sql
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ CREATE TABLE child_b_2(val integer primary key) INHERITS(parent_b);
-- => OK
\! pg_repack --dbname=contrib_regression --parent-table=parent_a --parent-table=parent_b --only-indexes

--
-- Apply count
--
\! pg_repack --dbname=contrib_regression --table=tbl_cluster --apply-count 1234
--
-- Switch threshold
--
Expand Down

0 comments on commit aae9f2f

Please sign in to comment.