From 54308a075c9704629fc9be38a8f629e5f0ab1237 Mon Sep 17 00:00:00 2001 From: Rod Chapman Date: Tue, 18 Feb 2025 10:48:34 +0000 Subject: [PATCH] Add first attempt at generalized 2- and 3-layer merged forward NTT functions. CBMC proofs of these new functions are all TBD. For now, we call these in a "3,2,1,1" pattern to make sure there are no unreferenced functions. Signed-off-by: Rod Chapman --- mlkem/poly.c | 211 +++++++++++++++++++++++++++++---------------------- 1 file changed, 120 insertions(+), 91 deletions(-) diff --git a/mlkem/poly.c b/mlkem/poly.c index e077d5d1d..e0a7cc9fb 100644 --- a/mlkem/poly.c +++ b/mlkem/poly.c @@ -22,8 +22,9 @@ #define fqmul MLK_NAMESPACE(fqmul) #define barrett_reduce MLK_NAMESPACE(barrett_reduce) #define scalar_signed_to_unsigned_q MLK_NAMESPACE(scalar_signed_to_unsigned_q) -#define ntt_butterfly_block MLK_NAMESPACE(ntt_butterfly_block) -#define ntt_layer MLK_NAMESPACE(ntt_layer) +#define ntt_1_layer MLK_NAMESPACE(ntt_1_layer) +#define ntt_2_layers MLK_NAMESPACE(ntt_2_layers) +#define ntt_3_layers MLK_NAMESPACE(ntt_3_layers) #define invntt_layer MLK_NAMESPACE(invntt_layer) /* End of static namespacing */ @@ -261,93 +262,30 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a) #endif /* MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE */ #if !defined(MLK_USE_NATIVE_NTT) -/* - * Computes a block CT butterflies with a fixed twiddle factor, - * using Montgomery multiplication. - * Parameters: - * - r: Pointer to base of polynomial (_not_ the base of butterfly block) - * - root: Twiddle factor to use for the butterfly. This must be in - * Montgomery form and signed canonical. - * - start: Offset to the beginning of the butterfly block - * - len: Index difference between coefficients subject to a butterfly - * - bound: Ghost variable describing coefficient bound: Prior to `start`, - * coefficients must be bound by `bound + MLKEM_Q`. Post `start`, - * they must be bound by `bound`. - * When this function returns, output coefficients in the index range - * [start, start+2*len) have bound bumped to `bound + MLKEM_Q`. - * Example: - * - start=8, len=4 - * This would compute the following four butterflies - * 8 -- 12 - * 9 -- 13 - * 10 -- 14 - * 11 -- 15 - * - start=4, len=2 - * This would compute the following two butterflies - * 4 -- 6 - * 5 -- 7 - */ -static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, - unsigned start, unsigned len, int bound) -__contract__( - requires(start < MLKEM_N) - requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) - requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) - requires(-MLKEM_Q_HALF < zeta && zeta < MLKEM_Q_HALF) - requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) - requires(array_abs_bound(r, 0, start, bound + MLKEM_Q)) - requires(array_abs_bound(r, start, MLKEM_N, bound)) - assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) - ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q)) - ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) + +static MLK_INLINE void ct_butterfly(int16_t r[MLKEM_N], + const unsigned coeff1_index, + const unsigned coeff2_index, + const int16_t zeta) { - /* `bound` is a ghost variable only needed in the CBMC specification */ - unsigned j; - ((void)bound); - for (j = start; j < start + len; j++) - __loop__( - invariant(start <= j && j <= start + len) - /* - * Coefficients are updated in strided pairs, so the bounds for the - * intermediate states alternate twice between the old and new bound - */ - invariant(array_abs_bound(r, 0, j, bound + MLKEM_Q)) - invariant(array_abs_bound(r, j, start + len, bound)) - invariant(array_abs_bound(r, start + len, j + len, bound + MLKEM_Q)) - invariant(array_abs_bound(r, j + len, MLKEM_N, bound))) - { - int16_t t; - t = fqmul(r[j + len], zeta); - r[j + len] = r[j] - t; - r[j] = r[j] + t; - } + int16_t t1 = r[coeff1_index]; + int16_t t2 = fqmul(r[coeff2_index], zeta); + r[coeff1_index] = t1 + t2; + r[coeff2_index] = t1 - t2; } -/* - *Compute one layer of forward NTT - * Parameters: - * - r: Pointer to base of polynomial - * - len: Stride of butterflies in this layer. - * - layer: Ghost variable indicating which layer is being applied. - * Must match `len` via `len == MLKEM_N >> layer`. - * Note: `len` could be dropped and computed in the function, but - * we are following the structure of the reference NTT from the - * official Kyber implementation here, merely adding `layer` as - * a ghost variable for the specifications. - */ -static void ntt_layer(int16_t r[MLKEM_N], unsigned len, unsigned layer) +static void ntt_1_layer(int16_t r[MLKEM_N], unsigned layer) __contract__( requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) - requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) + requires(1 <= layer && layer <= 7) requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)) assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) { + const unsigned len = MLKEM_N >> layer; unsigned start, k; - /* `layer` is a ghost variable only needed in the CBMC specification */ - ((void)layer); - /* Twiddle factors for layer n start at index 2^(layer-1) */ - k = MLKEM_N / (2 * len); + /* Twiddle factors for layer n start at index 2 ** (layer-1) */ + k = 1 << (layer - 1); for (start = 0; start < MLKEM_N; start += 2 * len) __loop__( invariant(start < MLKEM_N + 2 * len) @@ -355,8 +293,102 @@ __contract__( invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) { - int16_t zeta = zetas[k++]; - ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q); + const int16_t zeta = zetas[k++]; + unsigned j; + for (j = 0; j < len; j++) + { + ct_butterfly(r, j + start, j + start + len, zeta); + } + } +} + +static void ntt_2_layers(int16_t r[MLKEM_N], unsigned layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(1 <= layer && layer <= 6) + requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 2) * MLKEM_Q))) +{ + const unsigned len = MLKEM_N >> layer; + unsigned start, k; + /* Twiddle factors for layer n start at index 2 ** (layer-1) */ + k = 1 << (layer - 1); + for (start = 0; start < MLKEM_N; start += 2 * len) + { + unsigned j; + const int16_t this_layer_zeta = zetas[k]; + const int16_t next_layer_zeta1 = zetas[k * 2]; + const int16_t next_layer_zeta2 = zetas[k * 2 + 1]; + k++; + + for (j = 0; j < len / 2; j++) + { + const unsigned ci0 = j + start; + const unsigned ci1 = ci0 + len / 2; + const unsigned ci2 = ci1 + len / 2; + const unsigned ci3 = ci2 + len / 2; + + ct_butterfly(r, ci0, ci2, this_layer_zeta); + ct_butterfly(r, ci1, ci3, this_layer_zeta); + ct_butterfly(r, ci0, ci1, next_layer_zeta1); + ct_butterfly(r, ci2, ci3, next_layer_zeta2); + } + } +} + +static void ntt_3_layers(int16_t r[MLKEM_N], unsigned layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(1 <= layer && layer <= 5) + requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 3) * MLKEM_Q))) +{ + const unsigned len = MLKEM_N >> layer; + unsigned start, k; + /* Twiddle factors for layer n start at index 2 ** (layer-1) */ + k = 1 << (layer - 1); + for (start = 0; start < MLKEM_N; start += 2 * len) + { + unsigned j; + const int16_t first_layer_zeta = zetas[k]; + const unsigned second_layer_zi1 = k * 2; + const unsigned second_layer_zi2 = k * 2 + 1; + const int16_t second_layer_zeta1 = zetas[second_layer_zi1]; + const int16_t second_layer_zeta2 = zetas[second_layer_zi2]; + const int16_t third_layer_zeta1 = zetas[second_layer_zi1 * 2]; + const int16_t third_layer_zeta2 = zetas[second_layer_zi1 * 2 + 1]; + const int16_t third_layer_zeta3 = zetas[second_layer_zi2 * 2]; + const int16_t third_layer_zeta4 = zetas[second_layer_zi2 * 2 + 1]; + k++; + + for (j = 0; j < len / 4; j++) + { + const unsigned ci0 = j + start; + const unsigned ci1 = ci0 + len / 4; + const unsigned ci2 = ci1 + len / 4; + const unsigned ci3 = ci2 + len / 4; + const unsigned ci4 = ci3 + len / 4; + const unsigned ci5 = ci4 + len / 4; + const unsigned ci6 = ci5 + len / 4; + const unsigned ci7 = ci6 + len / 4; + + ct_butterfly(r, ci0, ci4, first_layer_zeta); + ct_butterfly(r, ci1, ci5, first_layer_zeta); + ct_butterfly(r, ci2, ci6, first_layer_zeta); + ct_butterfly(r, ci3, ci7, first_layer_zeta); + + ct_butterfly(r, ci0, ci2, second_layer_zeta1); + ct_butterfly(r, ci1, ci3, second_layer_zeta1); + ct_butterfly(r, ci4, ci6, second_layer_zeta2); + ct_butterfly(r, ci5, ci7, second_layer_zeta2); + + ct_butterfly(r, ci0, ci1, third_layer_zeta1); + ct_butterfly(r, ci2, ci3, third_layer_zeta2); + ct_butterfly(r, ci4, ci5, third_layer_zeta3); + ct_butterfly(r, ci6, ci7, third_layer_zeta4); + } } } @@ -372,18 +404,14 @@ __contract__( MLK_INTERNAL_API void poly_ntt(poly *p) { - unsigned len, layer; int16_t *r; debug_assert_abs_bound(p, MLKEM_N, MLKEM_Q); r = p->coeffs; - for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) - __loop__( - invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer)) - invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))) - { - ntt_layer(r, len, layer); - } + ntt_3_layers(r, 1); + ntt_2_layers(r, 4); + ntt_1_layer(r, 6); + ntt_1_layer(r, 7); /* Check the stronger bound */ debug_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND); @@ -490,6 +518,7 @@ MLK_EMPTY_CU(poly) #undef fqmul #undef barrett_reduce #undef scalar_signed_to_unsigned_q -#undef ntt_butterfly_block -#undef ntt_layer +#undef ntt_1_layer +#undef ntt_2_layers +#undef ntt_3_layers #undef invntt_layer