Skip to content

Commit

Permalink
Add first attempt at generalized 2- and 3-layer merged forward
Browse files Browse the repository at this point in the history
NTT functions.

CBMC proofs of these new functions are all TBD.

For now, we call these in a "3,2,1,1" pattern to make sure
there are no unreferenced functions.

Signed-off-by: Rod Chapman <rodchap@amazon.com>
  • Loading branch information
rod-chapman committed Feb 21, 2025
1 parent 366156b commit 54308a0
Showing 1 changed file with 120 additions and 91 deletions.
211 changes: 120 additions & 91 deletions mlkem/poly.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
#define fqmul MLK_NAMESPACE(fqmul)
#define barrett_reduce MLK_NAMESPACE(barrett_reduce)
#define scalar_signed_to_unsigned_q MLK_NAMESPACE(scalar_signed_to_unsigned_q)
#define ntt_butterfly_block MLK_NAMESPACE(ntt_butterfly_block)
#define ntt_layer MLK_NAMESPACE(ntt_layer)
#define ntt_1_layer MLK_NAMESPACE(ntt_1_layer)
#define ntt_2_layers MLK_NAMESPACE(ntt_2_layers)
#define ntt_3_layers MLK_NAMESPACE(ntt_3_layers)
#define invntt_layer MLK_NAMESPACE(invntt_layer)
/* End of static namespacing */

Expand Down Expand Up @@ -261,102 +262,133 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
#endif /* MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE */

#if !defined(MLK_USE_NATIVE_NTT)
/*
* Computes a block CT butterflies with a fixed twiddle factor,
* using Montgomery multiplication.
* Parameters:
* - r: Pointer to base of polynomial (_not_ the base of butterfly block)
* - root: Twiddle factor to use for the butterfly. This must be in
* Montgomery form and signed canonical.
* - start: Offset to the beginning of the butterfly block
* - len: Index difference between coefficients subject to a butterfly
* - bound: Ghost variable describing coefficient bound: Prior to `start`,
* coefficients must be bound by `bound + MLKEM_Q`. Post `start`,
* they must be bound by `bound`.
* When this function returns, output coefficients in the index range
* [start, start+2*len) have bound bumped to `bound + MLKEM_Q`.
* Example:
* - start=8, len=4
* This would compute the following four butterflies
* 8 -- 12
* 9 -- 13
* 10 -- 14
* 11 -- 15
* - start=4, len=2
* This would compute the following two butterflies
* 4 -- 6
* 5 -- 7
*/
static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta,
unsigned start, unsigned len, int bound)
__contract__(
requires(start < MLKEM_N)
requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
requires(-MLKEM_Q_HALF < zeta && zeta < MLKEM_Q_HALF)
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
requires(array_abs_bound(r, start, MLKEM_N, bound))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))

static MLK_INLINE void ct_butterfly(int16_t r[MLKEM_N],
const unsigned coeff1_index,
const unsigned coeff2_index,
const int16_t zeta)
{
/* `bound` is a ghost variable only needed in the CBMC specification */
unsigned j;
((void)bound);
for (j = start; j < start + len; j++)
__loop__(
invariant(start <= j && j <= start + len)
/*
* Coefficients are updated in strided pairs, so the bounds for the
* intermediate states alternate twice between the old and new bound
*/
invariant(array_abs_bound(r, 0, j, bound + MLKEM_Q))
invariant(array_abs_bound(r, j, start + len, bound))
invariant(array_abs_bound(r, start + len, j + len, bound + MLKEM_Q))
invariant(array_abs_bound(r, j + len, MLKEM_N, bound)))
{
int16_t t;
t = fqmul(r[j + len], zeta);
r[j + len] = r[j] - t;
r[j] = r[j] + t;
}
int16_t t1 = r[coeff1_index];
int16_t t2 = fqmul(r[coeff2_index], zeta);
r[coeff1_index] = t1 + t2;
r[coeff2_index] = t1 - t2;
}

/*
*Compute one layer of forward NTT
* Parameters:
* - r: Pointer to base of polynomial
* - len: Stride of butterflies in this layer.
* - layer: Ghost variable indicating which layer is being applied.
* Must match `len` via `len == MLKEM_N >> layer`.
* Note: `len` could be dropped and computed in the function, but
* we are following the structure of the reference NTT from the
* official Kyber implementation here, merely adding `layer` as
* a ghost variable for the specifications.
*/
static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer)
static void ntt_1_layer(int16_t r[MLKEM_N], unsigned layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
requires(1 <= layer && layer <= 7)
requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q)))
{
const unsigned len = MLKEM_N >> layer;
unsigned start, k;
/* `layer` is a ghost variable only needed in the CBMC specification */
((void)layer);
/* Twiddle factors for layer n start at index 2^(layer-1) */
k = MLKEM_N / (2 * len);
/* Twiddle factors for layer n start at index 2 ** (layer-1) */
k = 1 << (layer - 1);
for (start = 0; start < MLKEM_N; start += 2 * len)
__loop__(
invariant(start < MLKEM_N + 2 * len)
invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q))
invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q)))
{
int16_t zeta = zetas[k++];
ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q);
const int16_t zeta = zetas[k++];
unsigned j;
for (j = 0; j < len; j++)
{
ct_butterfly(r, j + start, j + start + len, zeta);
}
}
}

static void ntt_2_layers(int16_t r[MLKEM_N], unsigned layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(1 <= layer && layer <= 6)
requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 2) * MLKEM_Q)))
{
const unsigned len = MLKEM_N >> layer;
unsigned start, k;
/* Twiddle factors for layer n start at index 2 ** (layer-1) */
k = 1 << (layer - 1);
for (start = 0; start < MLKEM_N; start += 2 * len)
{
unsigned j;
const int16_t this_layer_zeta = zetas[k];
const int16_t next_layer_zeta1 = zetas[k * 2];
const int16_t next_layer_zeta2 = zetas[k * 2 + 1];
k++;

for (j = 0; j < len / 2; j++)
{
const unsigned ci0 = j + start;
const unsigned ci1 = ci0 + len / 2;
const unsigned ci2 = ci1 + len / 2;
const unsigned ci3 = ci2 + len / 2;

ct_butterfly(r, ci0, ci2, this_layer_zeta);
ct_butterfly(r, ci1, ci3, this_layer_zeta);
ct_butterfly(r, ci0, ci1, next_layer_zeta1);
ct_butterfly(r, ci2, ci3, next_layer_zeta2);
}
}
}

static void ntt_3_layers(int16_t r[MLKEM_N], unsigned layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(1 <= layer && layer <= 5)
requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 3) * MLKEM_Q)))
{
const unsigned len = MLKEM_N >> layer;
unsigned start, k;
/* Twiddle factors for layer n start at index 2 ** (layer-1) */
k = 1 << (layer - 1);
for (start = 0; start < MLKEM_N; start += 2 * len)
{
unsigned j;
const int16_t first_layer_zeta = zetas[k];
const unsigned second_layer_zi1 = k * 2;
const unsigned second_layer_zi2 = k * 2 + 1;
const int16_t second_layer_zeta1 = zetas[second_layer_zi1];
const int16_t second_layer_zeta2 = zetas[second_layer_zi2];
const int16_t third_layer_zeta1 = zetas[second_layer_zi1 * 2];
const int16_t third_layer_zeta2 = zetas[second_layer_zi1 * 2 + 1];
const int16_t third_layer_zeta3 = zetas[second_layer_zi2 * 2];
const int16_t third_layer_zeta4 = zetas[second_layer_zi2 * 2 + 1];
k++;

for (j = 0; j < len / 4; j++)
{
const unsigned ci0 = j + start;
const unsigned ci1 = ci0 + len / 4;
const unsigned ci2 = ci1 + len / 4;
const unsigned ci3 = ci2 + len / 4;
const unsigned ci4 = ci3 + len / 4;
const unsigned ci5 = ci4 + len / 4;
const unsigned ci6 = ci5 + len / 4;
const unsigned ci7 = ci6 + len / 4;

ct_butterfly(r, ci0, ci4, first_layer_zeta);
ct_butterfly(r, ci1, ci5, first_layer_zeta);
ct_butterfly(r, ci2, ci6, first_layer_zeta);
ct_butterfly(r, ci3, ci7, first_layer_zeta);

ct_butterfly(r, ci0, ci2, second_layer_zeta1);
ct_butterfly(r, ci1, ci3, second_layer_zeta1);
ct_butterfly(r, ci4, ci6, second_layer_zeta2);
ct_butterfly(r, ci5, ci7, second_layer_zeta2);

ct_butterfly(r, ci0, ci1, third_layer_zeta1);
ct_butterfly(r, ci2, ci3, third_layer_zeta2);
ct_butterfly(r, ci4, ci5, third_layer_zeta3);
ct_butterfly(r, ci6, ci7, third_layer_zeta4);
}
}
}

Expand All @@ -372,18 +404,14 @@ __contract__(
MLK_INTERNAL_API
void poly_ntt(poly *p)
{
unsigned len, layer;
int16_t *r;
debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q);
r = p->coeffs;

for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
__loop__(
invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)))
{
ntt_layer(r, len, layer);
}
ntt_3_layers(r, 1);
ntt_2_layers(r, 4);
ntt_1_layer(r, 6);
ntt_1_layer(r, 7);

/* Check the stronger bound */
debug_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND);
Expand Down Expand Up @@ -490,6 +518,7 @@ MLK_EMPTY_CU(poly)
#undef fqmul
#undef barrett_reduce
#undef scalar_signed_to_unsigned_q
#undef ntt_butterfly_block
#undef ntt_layer
#undef ntt_1_layer
#undef ntt_2_layers
#undef ntt_3_layers
#undef invntt_layer

0 comments on commit 54308a0

Please sign in to comment.