Skip to content

Commit 1a3ff0e

Browse files
committed
mayo: refine function names and explain the techniques better
1 parent 20ad6a5 commit 1a3ff0e

File tree

8 files changed

+144
-96
lines changed

8 files changed

+144
-96
lines changed

sign/mayo/mode1/internal/matrix.go

+27-22
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,13 @@ func upper(in []uint64, out []uint64, size int) {
154154
}
155155
}
156156

157-
func variableTime(sps []uint64, p1 []uint64, p2 []uint64, p3 []uint64, s []uint8) {
157+
// The variable time technique is describe in the "Nibbling" paper (https://eprint.iacr.org/2023/1683.pdf)
158+
// Section 4 (and Figure 2).
159+
func calculatePStVarTime(sps []uint64, p1 []uint64, p2 []uint64, p3 []uint64, s []uint8) {
158160
var accumulator [K * N][P * 16]uint64
159161

160-
// compute P * S^t = [ P1 P2 ] * [S1] = [P1*S1 + P2*S2]
161-
// [ 0 P3 ] [S2] [ P3*S2]
162+
// compute P * S^t = [ P1 P2 ] * [S1^t] = [P1*S1^t + P2*S2^t]
163+
// [ 0 P3 ] [S2^t] [ P3*S2^t]
162164

163165
// Note that S = S1||S2 is strided at N=V+O
164166

@@ -167,7 +169,7 @@ func variableTime(sps []uint64, p1 []uint64, p2 []uint64, p3 []uint64, s []uint8
167169
for r := 0; r < V; r++ {
168170
for c := r; c < V; c++ {
169171
for k := 0; k < K; k++ {
170-
vecAddPacked(P, p1[P*pos:], accumulator[r*K+k][P*int(s[k*N+c]):])
172+
vecAddPacked(p1[P*pos:], accumulator[r*K+k][P*int(s[k*N+c]):])
171173
}
172174
pos++
173175
}
@@ -178,7 +180,7 @@ func variableTime(sps []uint64, p1 []uint64, p2 []uint64, p3 []uint64, s []uint8
178180
for r := 0; r < V; r++ {
179181
for c := 0; c < O; c++ {
180182
for k := 0; k < K; k++ {
181-
vecAddPacked(P, p2[P*pos:], accumulator[r*K+k][P*int(s[k*N+V+c]):])
183+
vecAddPacked(p2[P*pos:], accumulator[r*K+k][P*int(s[k*N+V+c]):])
182184
}
183185
pos++
184186
}
@@ -189,50 +191,51 @@ func variableTime(sps []uint64, p1 []uint64, p2 []uint64, p3 []uint64, s []uint8
189191
for r := 0; r < O; r++ {
190192
for c := r; c < O; c++ {
191193
for k := 0; k < K; k++ {
192-
vecAddPacked(P, p3[P*pos:], accumulator[(r+V)*K+k][P*int(s[k*N+V+c]):])
194+
vecAddPacked(p3[P*pos:], accumulator[(r+V)*K+k][P*int(s[k*N+V+c]):])
193195
}
194196
pos++
195197
}
196198
}
197199

198200
for i := 0; i < K*N; i++ {
199-
aggregate(P, accumulator[i], sps[P*i:])
201+
accumulate(P, accumulator[i], sps[P*i:])
200202
}
201203
}
202204

203-
func variableTime2(sps []uint64, s []uint8, pst []uint64) {
205+
func calculateSPstVarTime(sps []uint64, s []uint8, pst []uint64) {
204206
var accumulator [K * K][P * 16]uint64
205207

206208
// S * PST : KxN * N*K
207209
for r := 0; r < K; r++ {
208210
for c := 0; c < N; c++ {
209211
for k := 0; k < K; k++ {
210-
vecAddPacked(P, pst[P*(c*K+k):], accumulator[r*K+k][P*int(s[r*N+c]):])
212+
vecAddPacked(pst[P*(c*K+k):], accumulator[r*K+k][P*int(s[r*N+c]):])
211213
}
212214
}
213215
}
214216

215217
for i := 0; i < K*K; i++ {
216-
aggregate(P, accumulator[i], sps[P*i:])
218+
accumulate(P, accumulator[i], sps[P*i:])
217219
}
218220
}
219221

220-
// p is always P, but is still kept to be consistent with other functions
221-
//
222-
//nolint:unparam
223-
func vecAddPacked(p int, in []uint64, acc []uint64) {
224-
for i := 0; i < p; i++ {
222+
func vecAddPacked(in []uint64, acc []uint64) {
223+
for i := 0; i < P; i++ {
225224
acc[i] ^= in[i]
226225
}
227226
}
228227

229-
func aggregate(p int, bins [P * 16]uint64, out []uint64) {
230-
// The following two methods are mathematically equivalent, but the second one is slightly faster.
228+
func accumulate(p int, bins [P * 16]uint64, out []uint64) {
229+
// The following two approches are mathematically equivalent, but the second one is slightly faster.
231230

232-
// the powers of x mod x^4+x+1, represented as integers, are 1,2,4,8,3,..,13,9
233-
// out = bins[9]*x^14 + bins[13]*x^13 + bins[15]*x^12 ... + bin[4]*x^2 + bins[2]*x + bins[1]
234-
// = ((bins[9]x+bins[13])x+bins[15])x + ... bins[4])x+bins[2])x+bins[1]
231+
// Here we chose to multiply by x^-1 all the way through,
232+
// unlike Method 3 in Figure 2 (see paper) which interleaves *x and *x^-1
233+
// which probably gives more parallelism on more complex CPUs.
234+
//
235+
// Also, on M1 Pro, Method 2 in Figure 2 is not faster then Approach 2 coded here.
235236

237+
// Approach 1. Multiplying by x all the way through:
238+
// the powers of x mod x^4+x+1, represented as integers, are 1,2,4,8,3,..,13,9
236239
// vecMulAddPackedByX(p, bins[P*9:], bins[P*13:])
237240
// vecMulAddPackedByX(p, bins[P*13:], bins[P*15:])
238241
// vecMulAddPackedByX(p, bins[P*15:], bins[P*14:])
@@ -249,8 +252,8 @@ func aggregate(p int, bins [P * 16]uint64, out []uint64) {
249252
// vecMulAddPackedByX(p, bins[P*2:], bins[P*1:])
250253
// copy(out[:P], bins[P*1:])
251254

252-
// In the reversed order of the above, because /x turns out to be slightly faster than *x.
253-
// out = ((bins[2]x^-1+bins[4])x^-1+bins[8])x^-1 + ... bins[13])x^-1+bins[9])x^-1+bins[1]
255+
// Approach 2. Multiplying by x^-1 all the way through:
256+
// In the reversed order of the first approach, because /x turns out to be slightly faster than *x.
254257
vecMulAddPackedByInvX(p, bins[P*2:], bins[P*4:])
255258
vecMulAddPackedByInvX(p, bins[P*4:], bins[P*8:])
256259
vecMulAddPackedByInvX(p, bins[P*8:], bins[P*3:])
@@ -278,7 +281,9 @@ func aggregate(p int, bins [P * 16]uint64, out []uint64) {
278281
// }
279282
// }
280283

284+
// It can be seen by comparison to the commented code above that this requires fewer instructions.
281285
func vecMulAddPackedByInvX(p int, in []uint64, acc []uint64) {
286+
// Equivalently:
282287
// vecMulAddPacked(p, in, 9, acc)
283288

284289
lsb := uint64(0x1111111111111111)

sign/mayo/mode1/internal/mayo.go

+9-2
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,11 @@ func extract(in []uint64, index int) byte {
371371
return byte((in[leg] >> (offset * 4)) & 0xF)
372372
}
373373

374+
// The following code to compute echelon form is taken from the reference code:
375+
// https://github.com/PQCMayo/MAYO-C/tree/nibbling-mayo/src
376+
//
377+
// As of the time of this writing, a formally verified implementation is still in progress by scholars.
378+
374379
// put matrix in row echelon form with ones on first nonzero entries *in constant time*
375380
func ef(A []byte, nrows, ncols int) {
376381
// ncols is actually always K*O + 1
@@ -635,15 +640,17 @@ func Verify(msg []byte, sig []byte, epk *ExpandedPublicKey) bool {
635640
// compute P * S^t = [ P1 P2 ] * [S1] = [P1*S1 + P2*S2]
636641
// [ 0 P3 ] [S2] [ P3*S2]
637642
var pst [M * N * K / 16]uint64
643+
// Constant time apprach:
638644
// mulAddMMatXMatTrans(pst[:], P1, s[:], V, V, K, N, true)
639645
// mulAddMMatXMatTrans(pst[:], P2, s[V:], V, O, K, N, false)
640646
// mulAddMMatXMatTrans(pst[M*V*K/16:], P3, s[V:], O, O, K, N, true)
641-
variableTime(pst[:], P1, P2, P3, s[:])
647+
// Variable time approach with table access where index depends on input:
648+
calculatePStVarTime(pst[:], P1, P2, P3, s[:])
642649

643650
// compute S * PST
644651
var sps [M * K * K / 16]uint64
645652
// mulAddMatXMMat(sps[:], s[:], pst[:], K, N, K)
646-
variableTime2(sps[:], s[:], pst[:])
653+
calculateSPstVarTime(sps[:], s[:], pst[:])
647654

648655
emulsifyInto(sps[:], t[:])
649656

sign/mayo/mode2/internal/matrix.go

+27-22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sign/mayo/mode2/internal/mayo.go

+9-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)