Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add generalized 2- and 3-layer merging of the forward NTT #784

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 112 additions & 88 deletions mlkem/poly.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,106 +279,134 @@ void mlk_poly_mulcache_compute(mlk_poly_mulcache *x, const mlk_poly *a)
#endif /* MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE */

#if !defined(MLK_USE_NATIVE_NTT)
/*
* Computes a block CT butterflies with a fixed twiddle factor,
* using Montgomery multiplication.
* Parameters:
* - r: Pointer to base of polynomial (_not_ the base of butterfly block)
* - root: Twiddle factor to use for the butterfly. This must be in
* Montgomery form and signed canonical.
* - start: Offset to the beginning of the butterfly block
* - len: Index difference between coefficients subject to a butterfly
* - bound: Ghost variable describing coefficient bound: Prior to `start`,
* coefficients must be bound by `bound + MLKEM_Q`. Post `start`,
* they must be bound by `bound`.
* When this function returns, output coefficients in the index range
* [start, start+2*len) have bound bumped to `bound + MLKEM_Q`.
* Example:
* - start=8, len=4
* This would compute the following four butterflies
* 8 -- 12
* 9 -- 13
* 10 -- 14
* 11 -- 15
* - start=4, len=2
* This would compute the following two butterflies
* 4 -- 6
* 5 -- 7
*/

/* Reference: Embedded in `ntt()` in the reference implementation. */
static void mlk_ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta,
unsigned start, unsigned len, int bound)
static MLK_INLINE void mlk_ct_butterfly(int16_t r[MLKEM_N],
const unsigned coeff1_index,
const unsigned coeff2_index,
const int16_t zeta)
{
int16_t t1 = r[coeff1_index];
int16_t t2 = mlk_fqmul(r[coeff2_index], zeta);
r[coeff1_index] = t1 + t2;
r[coeff2_index] = t1 - t2;
}

static void mlk_ntt_1_layer(int16_t r[MLKEM_N], unsigned layer)
__contract__(
requires(start < MLKEM_N)
requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
requires(-MLKEM_Q_HALF < zeta && zeta < MLKEM_Q_HALF)
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
requires(array_abs_bound(r, start, MLKEM_N, bound))
requires(1 <= layer && layer <= 7)
requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q)))
{
/* `bound` is a ghost variable only needed in the CBMC specification */
unsigned j;
((void)bound);
for (j = start; j < start + len; j++)
const unsigned len = MLKEM_N >> layer;
unsigned start, k;
/* Twiddle factors for layer n start at index 2 ** (layer-1) */
k = 1 << (layer - 1);
for (start = 0; start < MLKEM_N; start += 2 * len)
__loop__(
invariant(start <= j && j <= start + len)
/*
* Coefficients are updated in strided pairs, so the bounds for the
* intermediate states alternate twice between the old and new bound
*/
invariant(array_abs_bound(r, 0, j, bound + MLKEM_Q))
invariant(array_abs_bound(r, j, start + len, bound))
invariant(array_abs_bound(r, start + len, j + len, bound + MLKEM_Q))
invariant(array_abs_bound(r, j + len, MLKEM_N, bound)))
invariant(start < MLKEM_N + 2 * len)
invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q))
invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q)))
{
int16_t t;
t = mlk_fqmul(r[j + len], zeta);
r[j + len] = r[j] - t;
r[j] = r[j] + t;
const int16_t zeta = zetas[k++];
unsigned j;
for (j = 0; j < len; j++)
{
mlk_ct_butterfly(r, j + start, j + start + len, zeta);
}
}
}

/*
* Compute one layer of forward NTT
* Parameters:
* - r: Pointer to base of polynomial
* - len: Stride of butterflies in this layer.
* - layer: Ghost variable indicating which layer is being applied.
* Must match `len` via `len == MLKEM_N >> layer`.
* Note: `len` could be dropped and computed in the function, but
* we are following the structure of the reference NTT from the
* official Kyber implementation here, merely adding `layer` as
* a ghost variable for the specifications.
*/
static void mlk_ntt_2_layers(int16_t r[MLKEM_N], unsigned layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(1 <= layer && layer <= 6)
requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 2) * MLKEM_Q)))
{
const unsigned len = MLKEM_N >> layer;
unsigned start, k;
/* Twiddle factors for layer n start at index 2 ** (layer-1) */
k = 1 << (layer - 1);
for (start = 0; start < MLKEM_N; start += 2 * len)
{
unsigned j;
const int16_t this_layer_zeta = zetas[k];
const int16_t next_layer_zeta1 = zetas[k * 2];
const int16_t next_layer_zeta2 = zetas[k * 2 + 1];
k++;

/* Reference: Embedded in `ntt()` in the reference implementation. */
static void mlk_ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer)
for (j = 0; j < len / 2; j++)
{
const unsigned ci0 = j + start;
const unsigned ci1 = ci0 + len / 2;
const unsigned ci2 = ci1 + len / 2;
const unsigned ci3 = ci2 + len / 2;

mlk_ct_butterfly(r, ci0, ci2, this_layer_zeta);
mlk_ct_butterfly(r, ci1, ci3, this_layer_zeta);
mlk_ct_butterfly(r, ci0, ci1, next_layer_zeta1);
mlk_ct_butterfly(r, ci2, ci3, next_layer_zeta2);
}
}
}

static void mlk_ntt_3_layers(int16_t r[MLKEM_N], unsigned layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
requires(1 <= layer && layer <= 5)
requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q)))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 3) * MLKEM_Q)))
{
const unsigned len = MLKEM_N >> layer;
unsigned start, k;
/* `layer` is a ghost variable only needed in the CBMC specification */
((void)layer);
/* Twiddle factors for layer n start at index 2^(layer-1) */
k = MLKEM_N / (2 * len);
/* Twiddle factors for layer n start at index 2 ** (layer-1) */
k = 1 << (layer - 1);
for (start = 0; start < MLKEM_N; start += 2 * len)
__loop__(
invariant(start < MLKEM_N + 2 * len)
invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q))
invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q)))
{
int16_t zeta = zetas[k++];
mlk_ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q);
unsigned j;
const int16_t first_layer_zeta = zetas[k];
const unsigned second_layer_zi1 = k * 2;
const unsigned second_layer_zi2 = k * 2 + 1;
const int16_t second_layer_zeta1 = zetas[second_layer_zi1];
const int16_t second_layer_zeta2 = zetas[second_layer_zi2];
const int16_t third_layer_zeta1 = zetas[second_layer_zi1 * 2];
const int16_t third_layer_zeta2 = zetas[second_layer_zi1 * 2 + 1];
const int16_t third_layer_zeta3 = zetas[second_layer_zi2 * 2];
const int16_t third_layer_zeta4 = zetas[second_layer_zi2 * 2 + 1];
k++;

for (j = 0; j < len / 4; j++)
{
const unsigned ci0 = j + start;
const unsigned ci1 = ci0 + len / 4;
const unsigned ci2 = ci1 + len / 4;
const unsigned ci3 = ci2 + len / 4;
const unsigned ci4 = ci3 + len / 4;
const unsigned ci5 = ci4 + len / 4;
const unsigned ci6 = ci5 + len / 4;
const unsigned ci7 = ci6 + len / 4;

mlk_ct_butterfly(r, ci0, ci4, first_layer_zeta);
mlk_ct_butterfly(r, ci1, ci5, first_layer_zeta);
mlk_ct_butterfly(r, ci2, ci6, first_layer_zeta);
mlk_ct_butterfly(r, ci3, ci7, first_layer_zeta);

mlk_ct_butterfly(r, ci0, ci2, second_layer_zeta1);
mlk_ct_butterfly(r, ci1, ci3, second_layer_zeta1);
mlk_ct_butterfly(r, ci4, ci6, second_layer_zeta2);
mlk_ct_butterfly(r, ci5, ci7, second_layer_zeta2);

mlk_ct_butterfly(r, ci0, ci1, third_layer_zeta1);
mlk_ct_butterfly(r, ci2, ci3, third_layer_zeta2);
mlk_ct_butterfly(r, ci4, ci5, third_layer_zeta3);
mlk_ct_butterfly(r, ci6, ci7, third_layer_zeta4);
}
}
}

Expand All @@ -395,18 +423,14 @@ __contract__(
MLK_INTERNAL_API
void mlk_poly_ntt(mlk_poly *p)
{
unsigned len, layer;
int16_t *r;
mlk_assert_abs_bound(p, MLKEM_N, MLKEM_Q);
r = p->coeffs;

for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
__loop__(
invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)))
{
mlk_ntt_layer(r, len, layer);
}
mlk_ntt_3_layers(r, 1);
mlk_ntt_2_layers(r, 4);
mlk_ntt_1_layer(r, 6);
mlk_ntt_1_layer(r, 7);

/* Check the stronger bound */
mlk_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND);
Expand Down
Loading