Skip to content

Commit

Permalink
Rename *_clean.S to *.S and remove _clean suffix from .h headers.
Browse files Browse the repository at this point in the history
Signed-off-by: Rod Chapman <rodchap@amazon.com>

Remove _clean suffix from all labels and function entry points

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Rename files, removing _opt suffix.

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Remove _opt suffix from all labels and function names

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Rename files to remove _opt and _clean suffixes

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Update autogen script, re-generate all files, and commit revised files.

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Remove _clean in two more places

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Remove _clean suffix from opt implementation of rej_uniform_asm()

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Remove _clean suffix from rej_uniform_asm in the opt backend

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Update README to update links to clean and optimized AArch64 assembly files.

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Update calls to omit _clean and _opt suffixes

Signed-off-by: Rod Chapman <rodchap@amazon.com>

Correct name of mlk_poly_mulcache_compute_asm()

Signed-off-by: Rod Chapman <rodchap@amazon.com>
  • Loading branch information
rod-chapman committed Feb 24, 2025
1 parent 227195e commit d50368d
Show file tree
Hide file tree
Showing 42 changed files with 303 additions and 290 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ undefined behaviour in C, including out of bounds memory accesses and integer ov
all C code in [mlkem/*](mlkem) and [mlkem/fips202/*](mlkem/fips202) involved in running mlkem-native with its C backend.
See [proofs/cbmc](proofs/cbmc) for details.

HOL-Light functional correctness proofs for the optimized AArch64 NTT [ntt_opt.S](mlkem/native/aarch64/src/ntt_opt.S) and inverse NTT [intt_opt.S](mlkem/native/aarch64/src/intt_opt.S)
HOL-Light functional correctness proofs for the optimized AArch64 NTT [ntt.S](dev/aarch64_opt/src/ntt.S) and inverse NTT [intt.S](dev/aarch64_opt/src/intt.S)
can be found in [proofs/hol_light/arm](proofs/hol_light/arm). These proofs were contributed by John Harrison, and are
utilizing the verification infrastructure provided by [s2n-bignum](https://github.com/awslabs/s2n-bignum) infrastructure.

Expand All @@ -80,8 +80,8 @@ offers three backends for C, AArch64 and x86_64 - if you'd like contribute new b
PR.

Our AArch64 assembly is developed using [SLOTHY](https://github.com/slothy-optimizer/slothy): We write
'clean' assembly by hand and automate micro-optimizations (e.g. see the [clean](dev/aarch64_clean/src/ntt_clean.S)
vs [optimized](mlkem/native/aarch64/src/ntt_opt.S) AArch64 NTT). See [dev/README.md](dev/README.md) for more details.
'clean' assembly by hand and automate micro-optimizations (e.g. see the [clean](dev/aarch64_clean/src/ntt.S)
vs [optimized](dev/aarch64_opt/src/ntt.S) AArch64 NTT). See [dev/README.md](dev/README.md) for more details.

## How should I use mlkem-native?

Expand Down
63 changes: 34 additions & 29 deletions dev/aarch64_clean/src/arith_native_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,44 +29,49 @@ extern const int16_t mlk_aarch64_zetas_mulcache_native[];
extern const int16_t mlk_aarch64_zetas_mulcache_twisted_native[];
extern const uint8_t mlk_rej_uniform_table[];

#define mlk_ntt_asm_clean MLK_NAMESPACE(ntt_asm_clean)
void mlk_ntt_asm_clean(int16_t *, const int16_t *, const int16_t *);
#define mlk_ntt_asm MLK_NAMESPACE(ntt_asm)
void mlk_ntt_asm(int16_t *, const int16_t *, const int16_t *);

#define mlk_intt_asm_clean MLK_NAMESPACE(intt_asm_clean)
void mlk_intt_asm_clean(int16_t *, const int16_t *, const int16_t *);
#define mlk_intt_asm MLK_NAMESPACE(intt_asm)
void mlk_intt_asm(int16_t *, const int16_t *, const int16_t *);

#define mlk_rej_uniform_asm_clean MLK_NAMESPACE(rej_uniform_asm_clean)
unsigned mlk_rej_uniform_asm_clean(int16_t *r, const uint8_t *buf,
unsigned buflen, const uint8_t *table);
#define mlk_rej_uniform_asm MLK_NAMESPACE(rej_uniform_asm)
unsigned mlk_rej_uniform_asm(int16_t *r, const uint8_t *buf, unsigned buflen,
const uint8_t *table);

#define mlk_poly_reduce_asm_clean MLK_NAMESPACE(poly_reduce_asm_clean)
void mlk_poly_reduce_asm_clean(int16_t *);
#define mlk_poly_reduce_asm MLK_NAMESPACE(poly_reduce_asm)
void mlk_poly_reduce_asm(int16_t *);

#define mlk_poly_tomont_asm_clean MLK_NAMESPACE(poly_tomont_asm_clean)
void mlk_poly_tomont_asm_clean(int16_t *);
#define mlk_poly_tomont_asm MLK_NAMESPACE(poly_tomont_asm)
void mlk_poly_tomont_asm(int16_t *);

#define mlk_poly_mulcache_compute_asm_clean \
MLK_NAMESPACE(poly_mulcache_compute_asm_clean)
void mlk_poly_mulcache_compute_asm_clean(int16_t *, const int16_t *,
const int16_t *, const int16_t *);
#define mlk_poly_mulcache_compute_asm MLK_NAMESPACE(poly_mulcache_compute_asm)
void mlk_poly_mulcache_compute_asm(int16_t *, const int16_t *, const int16_t *,
const int16_t *);


#define mlk_poly_tobytes_asm_clean MLK_NAMESPACE(poly_tobytes_asm_clean)
void mlk_poly_tobytes_asm_clean(uint8_t *r, const int16_t *a);
#define mlk_poly_tobytes_asm MLK_NAMESPACE(poly_tobytes_asm)
void mlk_poly_tobytes_asm(uint8_t *r, const int16_t *a);

#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k2_clean \
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k2_clean(
int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache);
#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k2 \
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2)
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k2(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k3_clean \
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k3_clean(
int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache);
#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k3 \
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3)
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k3(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k4_clean \
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k4_clean(
int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache);
#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k4 \
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4)
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k4(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#endif /* MLK_DEV_AARCH64_CLEAN_SRC_ARITH_NATIVE_AARCH64_H */
27 changes: 13 additions & 14 deletions dev/aarch64_clean/src/clean_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,59 +26,58 @@

static MLK_INLINE void mlk_ntt_native(int16_t data[MLKEM_N])
{
mlk_ntt_asm_clean(data, mlk_aarch64_ntt_zetas_layer12345,
mlk_aarch64_ntt_zetas_layer67);
mlk_ntt_asm(data, mlk_aarch64_ntt_zetas_layer12345,
mlk_aarch64_ntt_zetas_layer67);
}

static MLK_INLINE void mlk_intt_native(int16_t data[MLKEM_N])
{
mlk_intt_asm_clean(data, mlk_aarch64_invntt_zetas_layer12345,
mlk_aarch64_invntt_zetas_layer67);
mlk_intt_asm(data, mlk_aarch64_invntt_zetas_layer12345,
mlk_aarch64_invntt_zetas_layer67);
}

static MLK_INLINE void mlk_poly_reduce_native(int16_t data[MLKEM_N])
{
mlk_poly_reduce_asm_clean(data);
mlk_poly_reduce_asm(data);
}

static MLK_INLINE void mlk_poly_tomont_native(int16_t data[MLKEM_N])
{
mlk_poly_tomont_asm_clean(data);
mlk_poly_tomont_asm(data);
}

static MLK_INLINE void mlk_poly_mulcache_compute_native(
int16_t x[MLKEM_N / 2], const int16_t y[MLKEM_N])
{
mlk_poly_mulcache_compute_asm_clean(
x, y, mlk_aarch64_zetas_mulcache_native,
mlk_aarch64_zetas_mulcache_twisted_native);
mlk_poly_mulcache_compute_asm(x, y, mlk_aarch64_zetas_mulcache_native,
mlk_aarch64_zetas_mulcache_twisted_native);
}

static MLK_INLINE void mlk_polyvec_basemul_acc_montgomery_cached_k2_native(
int16_t r[MLKEM_N], const int16_t a[2 * MLKEM_N],
const int16_t b[2 * MLKEM_N], const int16_t b_cache[2 * (MLKEM_N / 2)])
{
mlk_polyvec_basemul_acc_montgomery_cached_asm_k2_clean(r, a, b, b_cache);
mlk_polyvec_basemul_acc_montgomery_cached_asm_k2(r, a, b, b_cache);
}

static MLK_INLINE void mlk_polyvec_basemul_acc_montgomery_cached_k3_native(
int16_t r[MLKEM_N], const int16_t a[3 * MLKEM_N],
const int16_t b[3 * MLKEM_N], const int16_t b_cache[3 * (MLKEM_N / 2)])
{
mlk_polyvec_basemul_acc_montgomery_cached_asm_k3_clean(r, a, b, b_cache);
mlk_polyvec_basemul_acc_montgomery_cached_asm_k3(r, a, b, b_cache);
}

static MLK_INLINE void mlk_polyvec_basemul_acc_montgomery_cached_k4_native(
int16_t r[MLKEM_N], const int16_t a[4 * MLKEM_N],
const int16_t b[4 * MLKEM_N], const int16_t b_cache[4 * (MLKEM_N / 2)])
{
mlk_polyvec_basemul_acc_montgomery_cached_asm_k4_clean(r, a, b, b_cache);
mlk_polyvec_basemul_acc_montgomery_cached_asm_k4(r, a, b, b_cache);
}

static MLK_INLINE void mlk_poly_tobytes_native(uint8_t r[MLKEM_POLYBYTES],
const int16_t a[MLKEM_N])
{
mlk_poly_tobytes_asm_clean(r, a);
mlk_poly_tobytes_asm(r, a);
}

static MLK_INLINE int mlk_rej_uniform_native(int16_t *r, unsigned len,
Expand All @@ -89,7 +88,7 @@ static MLK_INLINE int mlk_rej_uniform_native(int16_t *r, unsigned len,
{
return -1;
}
return (int)mlk_rej_uniform_asm_clean(r, buf, buflen, mlk_rej_uniform_table);
return (int)mlk_rej_uniform_asm(r, buf, buflen, mlk_rej_uniform_table);
}

#endif /* MLK_ARITH_PROFILE_IMPL_H */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,9 @@
ninv_tw .req v30

.text
.global MLK_ASM_NAMESPACE(intt_asm_clean)
.global MLK_ASM_NAMESPACE(intt_asm)
.balign 4
MLK_ASM_FN_SYMBOL(intt_asm_clean)
MLK_ASM_FN_SYMBOL(intt_asm)
push_stack

// Setup constants
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,9 @@
t3 .req v28

.text
.global MLK_ASM_NAMESPACE(ntt_asm_clean)
.global MLK_ASM_NAMESPACE(ntt_asm)
.balign 4
MLK_ASM_FN_SYMBOL(ntt_asm_clean)
MLK_ASM_FN_SYMBOL(ntt_asm)
push_stack

mov wtmp, #3329
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@
modulus .req v6
modulus_twisted .req v7

.global MLK_ASM_NAMESPACE(poly_mulcache_compute_asm_clean)
.global MLK_ASM_NAMESPACE(poly_mulcache_compute_asm)
.text
.balign 4
MLK_ASM_FN_SYMBOL(poly_mulcache_compute_asm_clean)
MLK_ASM_FN_SYMBOL(poly_mulcache_compute_asm)
mov wtmp, #3329
dup modulus.8h, wtmp

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@
modulus_twisted .req v4

.text
.global MLK_ASM_NAMESPACE(poly_reduce_asm_clean)
.global MLK_ASM_NAMESPACE(poly_reduce_asm)
.balign 4
MLK_ASM_FN_SYMBOL(poly_reduce_asm_clean)
MLK_ASM_FN_SYMBOL(poly_reduce_asm)

mov wtmp, #3329 // ML-KEM modulus
dup modulus.8h, wtmp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
count .req x2

.text
.global MLK_ASM_NAMESPACE(poly_tobytes_asm_clean)
.global MLK_ASM_NAMESPACE(poly_tobytes_asm)
.balign 4
MLK_ASM_FN_SYMBOL(poly_tobytes_asm_clean)
MLK_ASM_FN_SYMBOL(poly_tobytes_asm)

mov count, #16
poly_tobytes_asm_clean_asm_loop_start:
poly_tobytes_asm_asm_loop_start:
ld2 {data0.8h, data1.8h}, [src], #32

// r[3 * i + 0] = (t0 >> 0);
Expand All @@ -47,7 +47,7 @@ poly_tobytes_asm_clean_asm_loop_start:
st3 {out0.8b, out1.8b, out2.8b}, [dst], #24

subs count, count, #1
cbnz count, poly_tobytes_asm_clean_asm_loop_start
cbnz count, poly_tobytes_asm_asm_loop_start
ret

.unreq data0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
tmp0 .req v6

.text
.global MLK_ASM_NAMESPACE(poly_tomont_asm_clean)
.global MLK_ASM_NAMESPACE(poly_tomont_asm)
.balign 4
MLK_ASM_FN_SYMBOL(poly_tomont_asm_clean)
MLK_ASM_FN_SYMBOL(poly_tomont_asm)

mov wtmp, #3329 // ML-KEM modulus
dup modulus.8h, wtmp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@
t0 .req v28

.text
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2)
.balign 4
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k2)
push_stack

mov wtmp, #3329
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@
t0 .req v28

.text
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3)
.balign 4
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k3)
push_stack
mov wtmp, #3329
dup modulus.8h, wtmp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@
t0 .req v28

.text
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4)
.balign 4
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k4)
push_stack
mov wtmp, #3329
dup modulus.8h, wtmp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*/

/*************************************************
* Name: mlk_rej_uniform_asm_clean
* Name: mlk_rej_uniform_asm
*
* Description: Run rejection sampling on uniform random bytes to generate
* uniform random integers mod q
Expand Down Expand Up @@ -114,9 +114,9 @@
bits .req v31

.text
.global MLK_ASM_NAMESPACE(rej_uniform_asm_clean)
.global MLK_ASM_NAMESPACE(rej_uniform_asm)
.balign 4
MLK_ASM_FN_SYMBOL(rej_uniform_asm_clean)
MLK_ASM_FN_SYMBOL(rej_uniform_asm)
push_stack

// Load 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
Expand Down
Loading

0 comments on commit d50368d

Please sign in to comment.