Skip to content

Commit ac6b2d1

Browse files
authored
Merge pull request #1 from sayantn/risc-v
Implement RISC-V (32 and 64)
2 parents 3b56c25 + 8176cd8 commit ac6b2d1

File tree

5 files changed

+615
-22
lines changed

5 files changed

+615
-22
lines changed

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,14 @@ implementations, among which it automatically decides the best (most performant)
99
- AES-NI (with Vector AES for 2- and 4- blocks) => requires a Nightly Compiler, the `nightly` feature to be enabled, and
1010
compiling for x86(64) with the `avx512f` and `vaes` target_feature flags set.
1111
- AES-NI (with Vector AES for 2-blocks) => requires a Nightly Compiler, the `nightly` feature to be enabled, and
12-
compiling for x86(64) with the `vaes` target_feature flag set. (although `vaes` is a AVX-512 feature, some AlderLake
12+
compiling for x86(64) with the `vaes` target_feature flag set. (although `vaes` is an AVX-512 feature, some AlderLake
1313
CPUs have `vaes` without AVX-512 support)
1414
- AES-NI => requires compiling for x86(64) with the `sse4.1` and `aes` target_feature flags set.
1515
- AES-Neon => requires compiling for AArch64 or ARM64EC or ARM-v8 with the `aes` target_feature flag set (ARM-v8
1616
requires a Nightly compiler and the `nightly` feature to be enabled) .
17+
- AES-RV => Requires a Nightly compiler, the `nightly` feature to be enabled and compiling for RISC-V RV64 or RV32 with
18+
the `zkne` and `zknd` target-features enabled (performance considerably improves with the `unaligned-scalar-mem`
19+
target-feature enabled)
1720
- Software AES => fallback implementation based on Rijmen and Daemen's `optimized` implementation (available
1821
on [their website](https://web.archive.org/web/20050828204927/http://www.iaik.tu-graz.ac.at/research/krypto/AES/old/%7Erijmen/rijndael/))
1922

src/aes_arm.rs

+16-6
Original file line numberDiff line numberDiff line change
@@ -111,37 +111,47 @@ impl AesBlock {
111111
}
112112

113113
#[inline(always)]
114-
pub(crate) fn aese(self, round_key: Self) -> Self {
114+
pub(crate) fn pre_enc_last(self, round_key: Self) -> Self {
115115
Self(unsafe { vaeseq_u8(self.0, round_key.0) })
116116
}
117117

118+
#[inline(always)]
119+
pub(crate) fn pre_enc(self, round_key: Self) -> Self {
120+
self.pre_enc_last(round_key).mc()
121+
}
122+
118123
/// Performs one round of AES encryption function (ShiftRows->SubBytes->MixColumns->AddRoundKey)
119124
#[inline]
120125
pub fn enc(self, round_key: Self) -> Self {
121-
self.aese(Self::zero()).mc() ^ round_key
126+
self.pre_enc(Self::zero()) ^ round_key
122127
}
123128

124129
#[inline(always)]
125-
pub(crate) fn aesd(self, round_key: Self) -> Self {
130+
pub(crate) fn pre_dec_last(self, round_key: Self) -> Self {
126131
Self(unsafe { vaesdq_u8(self.0, round_key.0) })
127132
}
128133

134+
#[inline(always)]
135+
pub(crate) fn pre_dec(self, round_key: Self) -> Self {
136+
self.pre_dec_last(round_key).imc()
137+
}
138+
129139
/// Performs one round of AES decryption function (InvShiftRows->InvSubBytes->InvMixColumns->AddRoundKey)
130140
#[inline]
131141
pub fn dec(self, round_key: Self) -> Self {
132-
self.aesd(Self::zero()).imc() ^ round_key
142+
self.pre_dec(Self::zero()) ^ round_key
133143
}
134144

135145
/// Performs one round of AES encryption function without MixColumns (ShiftRows->SubBytes->AddRoundKey)
136146
#[inline]
137147
pub fn enc_last(self, round_key: Self) -> Self {
138-
self.aese(Self::zero()) ^ round_key
148+
self.pre_enc_last(Self::zero()) ^ round_key
139149
}
140150

141151
/// Performs one round of AES decryption function without InvMixColumns (InvShiftRows->InvSubBytes->AddRoundKey)
142152
#[inline]
143153
pub fn dec_last(self, round_key: Self) -> Self {
144-
self.aesd(Self::zero()) ^ round_key
154+
self.pre_dec_last(Self::zero()) ^ round_key
145155
}
146156

147157
/// Performs the MixColumns operation

src/aes_riscv32.rs

+268
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
use core::arch::asm;
2+
use core::ops::{BitAnd, BitOr, BitXor, Not};
3+
use core::{mem, slice};
4+
5+
#[derive(Eq, PartialEq, Copy, Clone)]
6+
#[repr(C, align(16))]
7+
pub struct AesBlock(u32, u32, u32, u32);
8+
9+
macro_rules! _asm {
10+
($instruction:expr, $idx:literal, $rsd:ident, $rs:expr) => {
11+
asm!(
12+
concat!($instruction, "i {},{},{},", $idx),
13+
lateout(reg) $rsd,
14+
in(reg) $rsd,
15+
in(reg) $rs,
16+
options(pure, nomem, nostack)
17+
)
18+
};
19+
}
20+
21+
macro_rules! outer {
22+
($name:ident, $msg:ident, $rk:ident) => {{
23+
#[inline(always)]
24+
fn $name(t0: u32, t1: u32, t2: u32, t3: u32, rk: u32) -> u32 {
25+
let mut value = rk;
26+
unsafe {
27+
_asm!(stringify!($name), 0, value, t0);
28+
_asm!(stringify!($name), 1, value, t1);
29+
_asm!(stringify!($name), 2, value, t2);
30+
_asm!(stringify!($name), 3, value, t3);
31+
}
32+
value
33+
}
34+
AesBlock(
35+
$name($msg.0, $msg.1, $msg.2, $msg.3, $rk.0),
36+
$name($msg.1, $msg.2, $msg.3, $msg.0, $rk.1),
37+
$name($msg.2, $msg.3, $msg.0, $msg.1, $rk.2),
38+
$name($msg.3, $msg.0, $msg.1, $msg.2, $rk.3),
39+
)
40+
}};
41+
}
42+
43+
impl From<[u8; 16]> for AesBlock {
44+
#[inline]
45+
fn from(value: [u8; 16]) -> Self {
46+
Self::new(value)
47+
}
48+
}
49+
50+
impl BitAnd for AesBlock {
51+
type Output = Self;
52+
53+
#[inline]
54+
fn bitand(self, rhs: Self) -> Self::Output {
55+
Self(
56+
self.0 & rhs.0,
57+
self.1 & rhs.1,
58+
self.2 & rhs.2,
59+
self.3 & rhs.3,
60+
)
61+
}
62+
}
63+
64+
impl BitOr for AesBlock {
65+
type Output = Self;
66+
67+
#[inline]
68+
fn bitor(self, rhs: Self) -> Self::Output {
69+
Self(
70+
self.0 | rhs.0,
71+
self.1 | rhs.1,
72+
self.2 | rhs.2,
73+
self.3 | rhs.3,
74+
)
75+
}
76+
}
77+
78+
impl BitXor for AesBlock {
79+
type Output = Self;
80+
81+
#[inline]
82+
fn bitxor(self, rhs: Self) -> Self::Output {
83+
Self(
84+
self.0 ^ rhs.0,
85+
self.1 ^ rhs.1,
86+
self.2 ^ rhs.2,
87+
self.3 ^ rhs.3,
88+
)
89+
}
90+
}
91+
92+
impl Not for AesBlock {
93+
type Output = Self;
94+
95+
#[inline]
96+
fn not(self) -> Self::Output {
97+
Self(!self.0, !self.1, !self.2, !self.3)
98+
}
99+
}
100+
101+
impl AesBlock {
102+
#[inline]
103+
pub const fn new(value: [u8; 16]) -> Self {
104+
unsafe { mem::transmute(value) }
105+
}
106+
107+
#[inline]
108+
pub fn store_to(self, dst: &mut [u8]) {
109+
assert!(dst.len() >= 16);
110+
unsafe { *dst.as_mut_ptr().cast::<[u8; 16]>() = mem::transmute(self) }
111+
}
112+
113+
#[inline]
114+
pub fn zero() -> Self {
115+
Self(0, 0, 0, 0)
116+
}
117+
118+
#[inline]
119+
pub fn is_zero(self) -> bool {
120+
(self.0 | self.1 | self.2 | self.3) == 0
121+
}
122+
123+
#[inline(always)]
124+
pub(crate) fn pre_enc(self, round_key: Self) -> Self {
125+
outer!(aes32esm, self, round_key)
126+
}
127+
128+
/// Performs one round of AES encryption function (ShiftRows->SubBytes->MixColumns->AddRoundKey)
129+
#[inline]
130+
pub fn enc(self, round_key: Self) -> Self {
131+
self.pre_enc(Self::zero()) ^ round_key
132+
}
133+
134+
#[inline(always)]
135+
pub(crate) fn pre_enc_last(self, round_key: Self) -> Self {
136+
outer!(aes32es, self, round_key)
137+
}
138+
139+
/// Performs one round of AES encryption function without MixColumns (ShiftRows->SubBytes->AddRoundKey)
140+
#[inline]
141+
pub fn enc_last(self, round_key: Self) -> Self {
142+
self.pre_enc_last(Self::zero()) ^ round_key
143+
}
144+
145+
#[inline(always)]
146+
pub(crate) fn pre_dec(self, round_key: Self) -> Self {
147+
outer!(aes32dsm, self, round_key)
148+
}
149+
150+
/// Performs one round of AES decryption function (InvShiftRows->InvSubBytes->InvMixColumns->AddRoundKey)
151+
#[inline]
152+
pub fn dec(self, round_key: Self) -> Self {
153+
self.pre_dec(Self::zero()) ^ round_key
154+
}
155+
156+
#[inline(always)]
157+
pub(crate) fn pre_dec_last(self, round_key: Self) -> Self {
158+
outer!(aes32ds, self, round_key)
159+
}
160+
161+
/// Performs one round of AES decryption function without InvMixColumns (InvShiftRows->InvSubBytes->AddRoundKey)
162+
#[inline]
163+
pub fn dec_last(self, round_key: Self) -> Self {
164+
self.pre_dec_last(Self::zero()) ^ round_key
165+
}
166+
167+
/// Performs the MixColumns operation
168+
#[inline]
169+
pub fn mc(self) -> Self {
170+
self.pre_dec_last(Self::zero()).enc(Self::zero())
171+
}
172+
173+
/// Performs the InvMixColumns operation
174+
#[inline]
175+
pub fn imc(self) -> Self {
176+
self.pre_enc_last(Self::zero()).dec(Self::zero())
177+
}
178+
}
179+
180+
const RCON: [u32; 10] = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36];
181+
182+
#[inline(always)]
183+
fn sub_word(xor: u32, word: u32) -> u32 {
184+
let mut value = xor;
185+
unsafe {
186+
_asm!("aes32es", 0, value, word);
187+
_asm!("aes32es", 1, value, word);
188+
_asm!("aes32es", 2, value, word);
189+
_asm!("aes32es", 3, value, word);
190+
}
191+
value
192+
}
193+
194+
pub(super) fn keygen_128(key: [u8; 16]) -> [AesBlock; 11] {
195+
let mut expanded_keys: [AesBlock; 11] = unsafe { mem::zeroed() };
196+
197+
let keys_ptr: *mut u32 = expanded_keys.as_mut_ptr().cast();
198+
let columns = unsafe { slice::from_raw_parts_mut(keys_ptr, 44) };
199+
200+
for (i, chunk) in key.chunks_exact(4).enumerate() {
201+
columns[i] = u32::from_ne_bytes(chunk.try_into().unwrap());
202+
}
203+
204+
for i in (0..40).step_by(4) {
205+
columns[i + 4] = sub_word(columns[i + 0] ^ RCON[i / 4], columns[i + 3]);
206+
columns[i + 5] = columns[i + 1] ^ columns[i + 4];
207+
columns[i + 6] = columns[i + 2] ^ columns[i + 5];
208+
columns[i + 7] = columns[i + 3] ^ columns[i + 6];
209+
}
210+
211+
expanded_keys
212+
}
213+
214+
pub(super) fn keygen_192(key: [u8; 24]) -> [AesBlock; 13] {
215+
let mut expanded_keys: [AesBlock; 13] = unsafe { mem::zeroed() };
216+
217+
let keys_ptr: *mut u32 = expanded_keys.as_mut_ptr().cast();
218+
let columns = unsafe { slice::from_raw_parts_mut(keys_ptr, 52) };
219+
220+
for (i, chunk) in key.chunks_exact(4).enumerate() {
221+
columns[i] = u32::from_ne_bytes(chunk.try_into().unwrap());
222+
}
223+
224+
for i in (0..42).step_by(6) {
225+
columns[i + 6] = sub_word(columns[i + 0] ^ RCON[i / 6], columns[i + 5]);
226+
columns[i + 7] = columns[i + 1] ^ columns[i + 6];
227+
columns[i + 8] = columns[i + 2] ^ columns[i + 7];
228+
columns[i + 9] = columns[i + 3] ^ columns[i + 8];
229+
columns[i + 10] = columns[i + 4] ^ columns[i + 9];
230+
columns[i + 11] = columns[i + 5] ^ columns[i + 10];
231+
}
232+
233+
columns[48] = sub_word(columns[42] ^ RCON[7], columns[47]);
234+
columns[49] = columns[43] ^ columns[48];
235+
columns[50] = columns[44] ^ columns[49];
236+
columns[51] = columns[45] ^ columns[50];
237+
238+
expanded_keys
239+
}
240+
241+
pub(super) fn keygen_256(key: [u8; 32]) -> [AesBlock; 15] {
242+
let mut expanded_keys: [AesBlock; 15] = unsafe { mem::zeroed() };
243+
244+
let keys_ptr: *mut u32 = expanded_keys.as_mut_ptr().cast();
245+
let columns = unsafe { slice::from_raw_parts_mut(keys_ptr, 60) };
246+
247+
for (i, chunk) in key.chunks_exact(4).enumerate() {
248+
columns[i] = u32::from_ne_bytes(chunk.try_into().unwrap());
249+
}
250+
251+
for i in (0..48).step_by(8) {
252+
columns[i + 8] = sub_word(columns[i + 0] ^ RCON[i / 8], columns[i + 7].rotate_right(8));
253+
columns[i + 9] = columns[i + 1] ^ columns[i + 8];
254+
columns[i + 10] = columns[i + 2] ^ columns[i + 9];
255+
columns[i + 11] = columns[i + 3] ^ columns[i + 10];
256+
columns[i + 12] = sub_word(columns[i + 4], columns[i + 11]);
257+
columns[i + 13] = columns[i + 5] ^ columns[i + 12];
258+
columns[i + 14] = columns[i + 6] ^ columns[i + 13];
259+
columns[i + 15] = columns[i + 7] ^ columns[i + 14];
260+
}
261+
262+
columns[56] = sub_word(columns[48] ^ RCON[6], columns[55]);
263+
columns[57] = columns[49] ^ columns[56];
264+
columns[58] = columns[50] ^ columns[57];
265+
columns[59] = columns[51] ^ columns[58];
266+
267+
expanded_keys
268+
}

0 commit comments

Comments
 (0)