Skip to content

Commit

Permalink
Merge pull request #746 from pq-code-package/sys_changes
Browse files Browse the repository at this point in the history
Minor changes to sys.h, plus renaming
  • Loading branch information
mkannwischer authored Feb 6, 2025
2 parents 2f08fb2 + 9a6b959 commit b5ef959
Show file tree
Hide file tree
Showing 334 changed files with 2,992 additions and 3,013 deletions.
2 changes: 1 addition & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ Macros:
- __contract__(x)={ void a; void b; void c; void d; void e; void f; } void abcdefghijklmnopqrstuvw()
- __loop__(x)={}
# Make this artifically long to force line break
- MLKEM_NATIVE_INTERNAL_API=void abcdefghijklmnopqrstuvwabcdefghijklmnopqrstuvwabcdefg();
- MLK_INTERNAL_API=void abcdefghijklmnopqrstuvwabcdefghijklmnopqrstuvwabcdefg();
22 changes: 11 additions & 11 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ jobs:
- system: rpi4
name: Arm Cortex-A72 (Raspberry Pi 4) benchmarks
bench_pmu: PMU
archflags: -mcpu=cortex-a72 -DSYS_AARCH64_SLOW_BARREL_SHIFTER
cflags: "-flto -DFORCE_AARCH64"
archflags: -mcpu=cortex-a72 -DMLK_SYS_AARCH64_SLOW_BARREL_SHIFTER
cflags: "-flto -DMLK_FORCE_AARCH64"
bench_extra_args: ""
- system: rpi5
name: Arm Cortex-A76 (Raspberry Pi 5) benchmarks
bench_pmu: PERF
archflags: "-mcpu=cortex-a76 -march=armv8.2-a"
cflags: "-flto -DFORCE_AARCH64"
cflags: "-flto -DMLK_FORCE_AARCH64"
bench_extra_args: ""
- system: a55
name: Arm Cortex-A55 (Snapdragon 888) benchmarks
bench_pmu: PERF
archflags: "-mcpu=cortex-a55 -march=armv8.2-a"
cflags: "-flto -static -DFORCE_AARCH64 -DMLKEM_NATIVE_FIPS202_BACKEND_FILE=\\\\\\\"fips202/native/aarch64/meta_cortex_a55.h\\\\\\\""
cflags: "-flto -static -DMLK_FORCE_AARCH64 -DMLK_FIPS202_BACKEND_FILE=\\\\\\\"fips202/native/aarch64/meta_cortex_a55.h\\\\\\\""
bench_extra_args: -w exec-on-a55
- system: bpi
name: SpacemiT K1 8 (Banana Pi F3) benchmarks
Expand Down Expand Up @@ -82,43 +82,43 @@ jobs:
ec2_instance_type: t4g.small
ec2_ami: ubuntu-latest (aarch64)
archflags: -mcpu=cortex-a76 -march=armv8.2-a
cflags: "-flto -DFORCE_AARCH64"
cflags: "-flto -DMLK_FORCE_AARCH64"
perf: PERF
- name: Graviton3
ec2_instance_type: c7g.medium
ec2_ami: ubuntu-latest (aarch64)
archflags: -march=armv8.4-a+sha3
cflags: "-flto -DFORCE_AARCH64"
cflags: "-flto -DMLK_FORCE_AARCH64"
perf: PERF
- name: Graviton4
ec2_instance_type: c8g.medium
ec2_ami: ubuntu-latest (aarch64)
archflags: -march=armv9-a+sha3
cflags: "-flto -DFORCE_AARCH64"
cflags: "-flto -DMLK_FORCE_AARCH64"
perf: PERF
- name: AMD EPYC 4th gen (c7a)
ec2_instance_type: c7a.medium
ec2_ami: ubuntu-latest (x86_64)
archflags: -mavx2 -mbmi2 -mpopcnt -maes -march=znver4
cflags: "-flto -DFORCE_X86_64"
cflags: "-flto -DMLK_FORCE_X86_64"
perf: PMU
- name: Intel Xeon 4th gen (c7i)
ec2_instance_type: c7i.metal-24xl
ec2_ami: ubuntu-latest (x86_64)
archflags: -mavx2 -mbmi2 -mpopcnt -maes -march=sapphirerapids
cflags: "-flto -DFORCE_X86_64"
cflags: "-flto -DMLK_FORCE_X86_64"
perf: PMU
- name: AMD EPYC 3rd gen (c6a)
ec2_instance_type: c6a.large
ec2_ami: ubuntu-latest (x86_64)
archflags: -mavx2 -mbmi2 -mpopcnt -maes -march=znver3
cflags: "-flto -DFORCE_X86_64"
cflags: "-flto -DMLK_FORCE_X86_64"
perf: PMU
- name: Intel Xeon 3rd gen (c6i)
ec2_instance_type: c6i.large
ec2_ami: ubuntu-latest (x86_64)
archflags: -mavx2 -mbmi2 -mpopcnt -maes -march=icelake-server
cflags: "-flto -DFORCE_X86_64"
cflags: "-flto -DMLK_FORCE_X86_64"
perf: PMU
uses: ./.github/workflows/bench_ec2_reusable.yml
if: github.repository_owner == 'pq-code-package' && (github.event.label.name == 'benchmark' || github.ref == 'refs/heads/main')
Expand Down
22 changes: 8 additions & 14 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
system: [ubuntu-latest, ubuntu-24.04-arm]
system: [ubuntu-latest, pqcp-arm64]
name: Linting
runs-on: ${{ matrix.system }}
steps:
Expand Down Expand Up @@ -129,7 +129,7 @@ jobs:
name: Quickcheck lib
strategy:
matrix:
system: [macos-latest, ubuntu-latest, ubuntu-24.04-arm]
system: [macos-latest, ubuntu-latest, pqcp-arm64]
runs-on: ${{ matrix.system }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand All @@ -140,7 +140,7 @@ jobs:
name: Examples
strategy:
matrix:
system: [macos-latest, ubuntu-latest, ubuntu-24.04-arm]
system: [macos-latest, ubuntu-latest, pqcp-arm64]
runs-on: ${{ matrix.system }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down Expand Up @@ -170,7 +170,7 @@ jobs:
strategy:
fail-fast: false
matrix:
system: [ubuntu-latest, ubuntu-24.04-arm]
system: [ubuntu-latest, pqcp-arm64]
runs-on: ${{ matrix.system }}
name: Check autogenerated files
steps:
Expand All @@ -196,7 +196,7 @@ jobs:
name: Simplified
- arg: '--no-simplify'
name: Unmodified
runs-on: ubuntu-24.04-arm
runs-on: pqcp-arm64
name: AArch64 dev backend (${{ matrix.backend.name }}, ${{ matrix.simplify.name }})
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand All @@ -221,10 +221,6 @@ jobs:
name: 'MacOS'
arch: mac
mode: native
- runner: ubuntu-24.04-arm
name: 'ubuntu-24.04-arm'
arch: aarch64
mode: native
- runner: pqcp-arm64
name: 'ubuntu-latest (aarch64)'
arch: aarch64
Expand Down Expand Up @@ -321,8 +317,6 @@ jobs:
target:
- runner: pqcp-arm64
name: 'aarch64'
- runner: ubuntu-24.04-arm
name: 'ubuntu-24.04-arm'
- runner: ubuntu-latest
name: 'x86_64'
- runner: macos-latest
Expand Down Expand Up @@ -499,13 +493,13 @@ jobs:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: make quickcheck
run: |
OPT=0 CFLAGS="-DMLKEM_NATIVE_CHECK_APIS -Wno-redundant-decls" make quickcheck
OPT=0 CFLAGS="-DMLK_CHECK_APIS -Wno-redundant-decls" make quickcheck
make clean >/dev/null
OPT=1 CFLAGS="-DMLKEM_NATIVE_CHECK_APIS -Wno-redundant-decls" make quickcheck
OPT=1 CFLAGS="-DMLK_CHECK_APIS -Wno-redundant-decls" make quickcheck
- uses: ./.github/actions/setup-apt
- name: tests func
run: |
./scripts/tests func --cflags="-DMLKEM_NATIVE_CHECK_APIS -Wno-redundant-decls"
./scripts/tests func --cflags="-DMLK_CHECK_APIS -Wno-redundant-decls"
ec2_functests:
strategy:
fail-fast: false
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ct-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
fail-fast: false
max-parallel: 4
matrix:
system: [ubuntu-latest, ubuntu-24.04-arm]
system: [ubuntu-latest, pqcp-arm64]
nix-shell:
- ci_valgrind-varlat_clang14
- ci_valgrind-varlat_clang15
Expand Down
18 changes: 9 additions & 9 deletions dev/aarch64_clean/meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,26 @@
* SPDX-License-Identifier: Apache-2.0
*/

#ifndef MLKEM_NATIVE_DEV_AARCH64_CLEAN_META_H
#define MLKEM_NATIVE_DEV_AARCH64_CLEAN_META_H
#ifndef MLK_DEV_AARCH64_CLEAN_META_H
#define MLK_DEV_AARCH64_CLEAN_META_H
/* ML-KEM arithmetic native profile for clean assembly */

#ifdef MLKEM_NATIVE_ARITH_PROFILE_H
#ifdef MLK_ARITH_PROFILE_H
#error Only one MLKEM_ARITH assembly profile can be defined -- did you include multiple profiles?
#else
#define MLKEM_NATIVE_ARITH_PROFILE_H
#define MLK_ARITH_PROFILE_H

/* Identifier for this backend so that source and assembly files
* in the build can be appropriately guarded. */
#define MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN
#define MLK_ARITH_BACKEND_AARCH64_CLEAN

#define MLKEM_NATIVE_ARITH_BACKEND_NAME AARCH64_CLEAN
#define MLK_ARITH_BACKEND_NAME AARCH64_CLEAN

/* Filename of the C backend implementation.
* This is not inlined here because this header is included in assembly
* files as well. */
#define MLKEM_NATIVE_ARITH_BACKEND_IMPL "native/aarch64/src/clean_impl.h"
#define MLK_ARITH_BACKEND_IMPL "native/aarch64/src/clean_impl.h"

#endif /* MLKEM_NATIVE_ARITH_PROFILE_H */
#endif /* MLK_ARITH_PROFILE_H */

#endif /* MLKEM_NATIVE_DEV_AARCH64_CLEAN_META_H */
#endif /* MLK_DEV_AARCH64_CLEAN_META_H */
26 changes: 13 additions & 13 deletions dev/aarch64_clean/src/aarch64_zetas.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

#include "../../../common.h"

#if defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) && \
!defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED)
#if defined(MLK_ARITH_BACKEND_AARCH64_CLEAN) && \
!defined(MLK_MULTILEVEL_BUILD_NO_SHARED)

#include <stdint.h>
#include "arith_native_aarch64.h"
Expand All @@ -20,7 +20,7 @@
* Table of zeta values used in the AArch64 forward NTT
* See autogen for details.
*/
ALIGN const int16_t aarch64_ntt_zetas_layer01234[] = {
MLK_ALIGN const int16_t aarch64_ntt_zetas_layer01234[] = {
-1600, -15749, -749, -7373, -40, -394, -687, -6762, 630, 6201,
-1432, -14095, 848, 8347, 0, 0, 1062, 10453, 296, 2914,
-882, -8682, 0, 0, -1410, -13879, 1339, 13180, 1476, 14529,
Expand All @@ -31,7 +31,7 @@ ALIGN const int16_t aarch64_ntt_zetas_layer01234[] = {
0, 0, -1583, -15582, -1355, -13338, 821, 8081, 0, 0,
};

ALIGN const int16_t aarch64_ntt_zetas_layer56[] = {
MLK_ALIGN const int16_t aarch64_ntt_zetas_layer56[] = {
289, 289, 331, 331, -76, -76, -1573, -1573, 2845,
2845, 3258, 3258, -748, -748, -15483, -15483, 17, 17,
583, 583, 1637, 1637, -1041, -1041, 167, 167, 5739,
Expand Down Expand Up @@ -77,7 +77,7 @@ ALIGN const int16_t aarch64_ntt_zetas_layer56[] = {
10129, 10129, -3878, -3878, -11566, -11566,
};

ALIGN const int16_t aarch64_invntt_zetas_layer01234[] = {
MLK_ALIGN const int16_t aarch64_invntt_zetas_layer01234[] = {
1583, 15582, -821, -8081, 1355, 13338, 0, 0, -569, -5601,
450, 4429, 936, 9213, 0, 0, 69, 679, 447, 4400,
-535, -5266, 0, 0, 543, 5345, 1235, 12156, -1426, -14036,
Expand All @@ -88,7 +88,7 @@ ALIGN const int16_t aarch64_invntt_zetas_layer01234[] = {
-848, -8347, 1432, 14095, -630, -6201, 687, 6762, 0, 0,
};

ALIGN const int16_t aarch64_invntt_zetas_layer56[] = {
MLK_ALIGN const int16_t aarch64_invntt_zetas_layer56[] = {
-910, -910, -1227, -1227, 219, 219, 855, 855, -8957,
-8957, -12078, -12078, 2156, 2156, 8416, 8416, 1175, 1175,
394, 394, -1029, -1029, -1212, -1212, 11566, 11566, 3878,
Expand Down Expand Up @@ -134,7 +134,7 @@ ALIGN const int16_t aarch64_invntt_zetas_layer56[] = {
-16113, -16113, -5739, -5739, -167, -167,
};

ALIGN const int16_t aarch64_zetas_mulcache_native[] = {
MLK_ALIGN const int16_t aarch64_zetas_mulcache_native[] = {
17, -17, -568, 568, 583, -583, -680, 680, 1637, -1637, 723,
-723, -1041, 1041, 1100, -1100, 1409, -1409, -667, 667, -48, 48,
233, -233, 756, -756, -1173, 1173, -314, 314, -279, 279, -1626,
Expand All @@ -149,7 +149,7 @@ ALIGN const int16_t aarch64_zetas_mulcache_native[] = {
1219, -394, 394, 885, -885, -1175, 1175,
};

ALIGN const int16_t aarch64_zetas_mulcache_twisted_native[] = {
MLK_ALIGN const int16_t aarch64_zetas_mulcache_twisted_native[] = {
167, -167, -5591, 5591, 5739, -5739, -6693, 6693, 16113,
-16113, 7117, -7117, -10247, 10247, 10828, -10828, 13869, -13869,
-6565, 6565, -472, 472, 2293, -2293, 7441, -7441, -11546,
Expand All @@ -167,11 +167,11 @@ ALIGN const int16_t aarch64_zetas_mulcache_twisted_native[] = {
-11566, 11566,
};

#else /* defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) \
&& !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) */
#else /* defined(MLK_ARITH_BACKEND_AARCH64_CLEAN) \
&& !defined(MLK_MULTILEVEL_BUILD_NO_SHARED) */

MLKEM_NATIVE_EMPTY_CU(aarch64_zetas)
MLK_EMPTY_CU(aarch64_zetas)


#endif /* defined(MLKEM_NATIVE_ARITH_BACKEND_AARCH64_CLEAN) \
&& !defined(MLKEM_NATIVE_MULTILEVEL_BUILD_NO_SHARED) */
#endif /* defined(MLK_ARITH_BACKEND_AARCH64_CLEAN) \
&& !defined(MLK_MULTILEVEL_BUILD_NO_SHARED) */
42 changes: 20 additions & 22 deletions dev/aarch64_clean/src/arith_native_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,22 @@
* Copyright (c) 2024-2025 The mlkem-native project authors
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef MLKEM_NATIVE_DEV_AARCH64_CLEAN_SRC_ARITH_NATIVE_AARCH64_H
#define MLKEM_NATIVE_DEV_AARCH64_CLEAN_SRC_ARITH_NATIVE_AARCH64_H
#ifndef MLK_DEV_AARCH64_CLEAN_SRC_ARITH_NATIVE_AARCH64_H
#define MLK_DEV_AARCH64_CLEAN_SRC_ARITH_NATIVE_AARCH64_H

#include <stdint.h>
#include "../../../common.h"

#define aarch64_ntt_zetas_layer01234 \
MLKEM_NAMESPACE(aarch64_ntt_zetas_layer01234)
#define aarch64_ntt_zetas_layer56 MLKEM_NAMESPACE(aarch64_ntt_zetas_layer56)
#define aarch64_ntt_zetas_layer01234 MLK_NAMESPACE(aarch64_ntt_zetas_layer01234)
#define aarch64_ntt_zetas_layer56 MLK_NAMESPACE(aarch64_ntt_zetas_layer56)
#define aarch64_invntt_zetas_layer01234 \
MLKEM_NAMESPACE(aarch64_invntt_zetas_layer01234)
#define aarch64_invntt_zetas_layer56 \
MLKEM_NAMESPACE(aarch64_invntt_zetas_layer56)
MLK_NAMESPACE(aarch64_invntt_zetas_layer01234)
#define aarch64_invntt_zetas_layer56 MLK_NAMESPACE(aarch64_invntt_zetas_layer56)
#define aarch64_zetas_mulcache_native \
MLKEM_NAMESPACE(aarch64_zetas_mulcache_native)
MLK_NAMESPACE(aarch64_zetas_mulcache_native)
#define aarch64_zetas_mulcache_twisted_native \
MLKEM_NAMESPACE(aarch64_zetas_mulcache_twisted_native)
#define rej_uniform_table MLKEM_NAMESPACE(rej_uniform_table)
MLK_NAMESPACE(aarch64_zetas_mulcache_twisted_native)
#define rej_uniform_table MLK_NAMESPACE(rej_uniform_table)

extern const int16_t aarch64_ntt_zetas_layer01234[];
extern const int16_t aarch64_ntt_zetas_layer56[];
Expand All @@ -29,50 +27,50 @@ extern const int16_t aarch64_zetas_mulcache_native[];
extern const int16_t aarch64_zetas_mulcache_twisted_native[];
extern const uint8_t rej_uniform_table[];

#define ntt_asm_clean MLKEM_NAMESPACE(ntt_asm_clean)
#define ntt_asm_clean MLK_NAMESPACE(ntt_asm_clean)
void ntt_asm_clean(int16_t *, const int16_t *, const int16_t *);

#define intt_asm_clean MLKEM_NAMESPACE(intt_asm_clean)
#define intt_asm_clean MLK_NAMESPACE(intt_asm_clean)
void intt_asm_clean(int16_t *, const int16_t *, const int16_t *);

#define rej_uniform_asm_clean MLKEM_NAMESPACE(rej_uniform_asm_clean)
#define rej_uniform_asm_clean MLK_NAMESPACE(rej_uniform_asm_clean)
unsigned rej_uniform_asm_clean(int16_t *r, const uint8_t *buf, unsigned buflen,
const uint8_t *table);

#define poly_reduce_asm_clean MLKEM_NAMESPACE(poly_reduce_asm_clean)
#define poly_reduce_asm_clean MLK_NAMESPACE(poly_reduce_asm_clean)
void poly_reduce_asm_clean(int16_t *);

#define poly_tomont_asm_clean MLKEM_NAMESPACE(poly_tomont_asm_clean)
#define poly_tomont_asm_clean MLK_NAMESPACE(poly_tomont_asm_clean)
void poly_tomont_asm_clean(int16_t *);

#define poly_mulcache_compute_asm_clean \
MLKEM_NAMESPACE(poly_mulcache_compute_asm_clean)
MLK_NAMESPACE(poly_mulcache_compute_asm_clean)
void poly_mulcache_compute_asm_clean(int16_t *, const int16_t *,
const int16_t *, const int16_t *);


#define poly_tobytes_asm_clean MLKEM_NAMESPACE(poly_tobytes_asm_clean)
#define poly_tobytes_asm_clean MLK_NAMESPACE(poly_tobytes_asm_clean)
void poly_tobytes_asm_clean(uint8_t *r, const int16_t *a);

#define polyvec_basemul_acc_montgomery_cached_asm_k2_clean \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
void polyvec_basemul_acc_montgomery_cached_asm_k2_clean(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define polyvec_basemul_acc_montgomery_cached_asm_k3_clean \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
void polyvec_basemul_acc_montgomery_cached_asm_k3_clean(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#define polyvec_basemul_acc_montgomery_cached_asm_k4_clean \
MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
void polyvec_basemul_acc_montgomery_cached_asm_k4_clean(int16_t *r,
const int16_t *a,
const int16_t *b,
const int16_t *b_cache);

#endif /* MLKEM_NATIVE_DEV_AARCH64_CLEAN_SRC_ARITH_NATIVE_AARCH64_H */
#endif /* MLK_DEV_AARCH64_CLEAN_SRC_ARITH_NATIVE_AARCH64_H */
Loading

18 comments on commit b5ef959

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arm Cortex-A76 (Raspberry Pi 5) benchmarks

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 28965 cycles 28967 cycles 1.00
ML-KEM-512 encaps 34251 cycles 34254 cycles 1.00
ML-KEM-512 decaps 44731 cycles 44729 cycles 1.00
ML-KEM-768 keypair 49308 cycles 49309 cycles 1.00
ML-KEM-768 encaps 54570 cycles 54570 cycles 1
ML-KEM-768 decaps 69426 cycles 69425 cycles 1.00
ML-KEM-1024 keypair 71916 cycles 71915 cycles 1.00
ML-KEM-1024 encaps 80615 cycles 80611 cycles 1.00
ML-KEM-1024 decaps 100364 cycles 100359 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intel Xeon 4th gen (c7i)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 9301 cycles 9259 cycles 1.00
ML-KEM-512 encaps 10817 cycles 10725 cycles 1.01
ML-KEM-512 decaps 14751 cycles 14783 cycles 1.00
ML-KEM-768 keypair 15944 cycles 16026 cycles 0.99
ML-KEM-768 encaps 17284 cycles 17361 cycles 1.00
ML-KEM-768 decaps 22973 cycles 23076 cycles 1.00
ML-KEM-1024 keypair 21401 cycles 21379 cycles 1.00
ML-KEM-1024 encaps 23329 cycles 23268 cycles 1.00
ML-KEM-1024 decaps 30797 cycles 30774 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intel Xeon 4th gen (c7i) (no-opt)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 29666 cycles 29558 cycles 1.00
ML-KEM-512 encaps 35430 cycles 35237 cycles 1.01
ML-KEM-512 decaps 46054 cycles 45870 cycles 1.00
ML-KEM-768 keypair 47168 cycles 47148 cycles 1.00
ML-KEM-768 encaps 55575 cycles 55637 cycles 1.00
ML-KEM-768 decaps 67579 cycles 67578 cycles 1.00
ML-KEM-1024 keypair 71837 cycles 71740 cycles 1.00
ML-KEM-1024 encaps 82156 cycles 82104 cycles 1.00
ML-KEM-1024 decaps 99400 cycles 99329 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AMD EPYC 4th gen (c7a)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 11287 cycles 11302 cycles 1.00
ML-KEM-512 encaps 12851 cycles 12858 cycles 1.00
ML-KEM-512 decaps 17703 cycles 17683 cycles 1.00
ML-KEM-768 keypair 19634 cycles 19646 cycles 1.00
ML-KEM-768 encaps 20584 cycles 20600 cycles 1.00
ML-KEM-768 decaps 27647 cycles 27661 cycles 1.00
ML-KEM-1024 keypair 26298 cycles 26305 cycles 1.00
ML-KEM-1024 encaps 28185 cycles 28193 cycles 1.00
ML-KEM-1024 decaps 37654 cycles 37845 cycles 0.99

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AMD EPYC 3rd gen (c6a)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 16977 cycles 16963 cycles 1.00
ML-KEM-512 encaps 18681 cycles 18645 cycles 1.00
ML-KEM-512 decaps 24062 cycles 24038 cycles 1.00
ML-KEM-768 keypair 28713 cycles 28708 cycles 1.00
ML-KEM-768 encaps 29802 cycles 29791 cycles 1.00
ML-KEM-768 decaps 37589 cycles 37628 cycles 1.00
ML-KEM-1024 keypair 41719 cycles 41756 cycles 1.00
ML-KEM-1024 encaps 44091 cycles 43929 cycles 1.00
ML-KEM-1024 decaps 54312 cycles 54372 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intel Xeon 3rd gen (c6i)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 15928 cycles 15920 cycles 1.00
ML-KEM-512 encaps 18003 cycles 18006 cycles 1.00
ML-KEM-512 decaps 24545 cycles 24506 cycles 1.00
ML-KEM-768 keypair 27349 cycles 27361 cycles 1.00
ML-KEM-768 encaps 28916 cycles 28920 cycles 1.00
ML-KEM-768 decaps 38330 cycles 38332 cycles 1.00
ML-KEM-1024 keypair 36988 cycles 36946 cycles 1.00
ML-KEM-1024 encaps 39889 cycles 39874 cycles 1.00
ML-KEM-1024 decaps 52383 cycles 52381 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AMD EPYC 4th gen (c7a) (no-opt)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 36084 cycles 36095 cycles 1.00
ML-KEM-512 encaps 42315 cycles 42326 cycles 1.00
ML-KEM-512 decaps 55462 cycles 55455 cycles 1.00
ML-KEM-768 keypair 58482 cycles 58537 cycles 1.00
ML-KEM-768 encaps 67004 cycles 66981 cycles 1.00
ML-KEM-768 decaps 84056 cycles 84106 cycles 1.00
ML-KEM-1024 keypair 86534 cycles 86562 cycles 1.00
ML-KEM-1024 encaps 97238 cycles 97246 cycles 1.00
ML-KEM-1024 decaps 118868 cycles 118885 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton3

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 18921 cycles 18921 cycles 1
ML-KEM-512 encaps 22420 cycles 22422 cycles 1.00
ML-KEM-512 decaps 29674 cycles 29673 cycles 1.00
ML-KEM-768 keypair 32315 cycles 32311 cycles 1.00
ML-KEM-768 encaps 35796 cycles 35788 cycles 1.00
ML-KEM-768 decaps 46184 cycles 46172 cycles 1.00
ML-KEM-1024 keypair 46633 cycles 46629 cycles 1.00
ML-KEM-1024 encaps 52344 cycles 52343 cycles 1.00
ML-KEM-1024 decaps 66374 cycles 66375 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton4

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 17721 cycles 17724 cycles 1.00
ML-KEM-512 encaps 20976 cycles 20977 cycles 1.00
ML-KEM-512 decaps 27657 cycles 27657 cycles 1
ML-KEM-768 keypair 30520 cycles 30512 cycles 1.00
ML-KEM-768 encaps 33418 cycles 33427 cycles 1.00
ML-KEM-768 decaps 42978 cycles 42981 cycles 1.00
ML-KEM-1024 keypair 44138 cycles 44138 cycles 1
ML-KEM-1024 encaps 49438 cycles 49447 cycles 1.00
ML-KEM-1024 decaps 62362 cycles 62376 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AMD EPYC 3rd gen (c6a) (no-opt)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 39495 cycles 39465 cycles 1.00
ML-KEM-512 encaps 47534 cycles 47509 cycles 1.00
ML-KEM-512 decaps 61829 cycles 61779 cycles 1.00
ML-KEM-768 keypair 63896 cycles 63948 cycles 1.00
ML-KEM-768 encaps 75263 cycles 75288 cycles 1.00
ML-KEM-768 decaps 93820 cycles 93780 cycles 1.00
ML-KEM-1024 keypair 95500 cycles 95465 cycles 1.00
ML-KEM-1024 encaps 109013 cycles 108982 cycles 1.00
ML-KEM-1024 decaps 132492 cycles 132458 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton2

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 28945 cycles 28971 cycles 1.00
ML-KEM-512 encaps 34280 cycles 34256 cycles 1.00
ML-KEM-512 decaps 44784 cycles 44744 cycles 1.00
ML-KEM-768 keypair 49254 cycles 49315 cycles 1.00
ML-KEM-768 encaps 54614 cycles 54585 cycles 1.00
ML-KEM-768 decaps 69448 cycles 69411 cycles 1.00
ML-KEM-1024 keypair 71942 cycles 71940 cycles 1.00
ML-KEM-1024 encaps 80630 cycles 80609 cycles 1.00
ML-KEM-1024 decaps 100420 cycles 100409 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intel Xeon 3rd gen (c6i) (no-opt)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 46428 cycles 46483 cycles 1.00
ML-KEM-512 encaps 54593 cycles 54676 cycles 1.00
ML-KEM-512 decaps 70319 cycles 70405 cycles 1.00
ML-KEM-768 keypair 76752 cycles 76715 cycles 1.00
ML-KEM-768 encaps 87358 cycles 87316 cycles 1.00
ML-KEM-768 decaps 107739 cycles 107786 cycles 1.00
ML-KEM-1024 keypair 112122 cycles 112144 cycles 1.00
ML-KEM-1024 encaps 126277 cycles 126216 cycles 1.00
ML-KEM-1024 decaps 152314 cycles 152340 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton4 (no-opt)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 35500 cycles 35503 cycles 1.00
ML-KEM-512 encaps 40643 cycles 40647 cycles 1.00
ML-KEM-512 decaps 51642 cycles 51648 cycles 1.00
ML-KEM-768 keypair 58475 cycles 58477 cycles 1.00
ML-KEM-768 encaps 65242 cycles 65247 cycles 1.00
ML-KEM-768 decaps 80455 cycles 80462 cycles 1.00
ML-KEM-1024 keypair 88171 cycles 88178 cycles 1.00
ML-KEM-1024 encaps 96951 cycles 96961 cycles 1.00
ML-KEM-1024 decaps 116622 cycles 116641 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton3 (no-opt)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 38691 cycles 38697 cycles 1.00
ML-KEM-512 encaps 44312 cycles 44319 cycles 1.00
ML-KEM-512 decaps 56145 cycles 56152 cycles 1.00
ML-KEM-768 keypair 63849 cycles 63847 cycles 1.00
ML-KEM-768 encaps 70975 cycles 70979 cycles 1.00
ML-KEM-768 decaps 86940 cycles 86937 cycles 1.00
ML-KEM-1024 keypair 95387 cycles 95390 cycles 1.00
ML-KEM-1024 encaps 105309 cycles 105304 cycles 1.00
ML-KEM-1024 decaps 125681 cycles 125668 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton2 (no-opt)

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 58986 cycles 58986 cycles 1
ML-KEM-512 encaps 67439 cycles 67450 cycles 1.00
ML-KEM-512 decaps 86008 cycles 86012 cycles 1.00
ML-KEM-768 keypair 98182 cycles 98296 cycles 1.00
ML-KEM-768 encaps 109062 cycles 109147 cycles 1.00
ML-KEM-768 decaps 133814 cycles 133900 cycles 1.00
ML-KEM-1024 keypair 147166 cycles 146989 cycles 1.00
ML-KEM-1024 encaps 162157 cycles 162043 cycles 1.00
ML-KEM-1024 decaps 193744 cycles 193596 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SpacemiT K1 8 (Banana Pi F3) benchmarks

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 225125 cycles 225122 cycles 1.00
ML-KEM-512 encaps 269700 cycles 269688 cycles 1.00
ML-KEM-512 decaps 343347 cycles 343328 cycles 1.00
ML-KEM-768 keypair 371226 cycles 371271 cycles 1.00
ML-KEM-768 encaps 429971 cycles 430032 cycles 1.00
ML-KEM-768 decaps 527334 cycles 527841 cycles 1.00
ML-KEM-1024 keypair 555491 cycles 555480 cycles 1.00
ML-KEM-1024 encaps 631286 cycles 631163 cycles 1.00
ML-KEM-1024 decaps 752583 cycles 752367 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arm Cortex-A55 (Snapdragon 888) benchmarks

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 58123 cycles 58103 cycles 1.00
ML-KEM-512 encaps 64912 cycles 64952 cycles 1.00
ML-KEM-512 decaps 83730 cycles 83761 cycles 1.00
ML-KEM-768 keypair 98868 cycles 98810 cycles 1.00
ML-KEM-768 encaps 109760 cycles 109645 cycles 1.00
ML-KEM-768 decaps 136284 cycles 135910 cycles 1.00
ML-KEM-1024 keypair 149524 cycles 149652 cycles 1.00
ML-KEM-1024 encaps 166010 cycles 166172 cycles 1.00
ML-KEM-1024 decaps 201645 cycles 201905 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

@oqs-bot oqs-bot commented on b5ef959 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arm Cortex-A72 (Raspberry Pi 4) benchmarks

Benchmark suite Current: b5ef959 Previous: 2f08fb2 Ratio
ML-KEM-512 keypair 51927 cycles 52329 cycles 0.99
ML-KEM-512 encaps 59573 cycles 59730 cycles 1.00
ML-KEM-512 decaps 76635 cycles 77018 cycles 1.00
ML-KEM-768 keypair 88222 cycles 88025 cycles 1.00
ML-KEM-768 encaps 95752 cycles 95638 cycles 1.00
ML-KEM-768 decaps 119106 cycles 119964 cycles 0.99
ML-KEM-1024 keypair 132133 cycles 130892 cycles 1.01
ML-KEM-1024 encaps 144672 cycles 143701 cycles 1.01
ML-KEM-1024 decaps 177517 cycles 176435 cycles 1.01

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.