@@ -60,6 +60,39 @@ func (z *scalar64) cmov(x *scalar64, b uint64) {
60
60
}
61
61
}
62
62
63
+ // mul calculates z = x * y.
64
+ func (z * scalar64 ) mul (x , y * scalar64 ) * scalar64 {
65
+ var t scalar64
66
+ prod := (& [_N + 1 ]uint64 {})[:]
67
+ mulWord (prod , x [:], y [_N - 1 ])
68
+ copy (t [:], prod [:_N ])
69
+ t .reduceOneWord (prod [_N ])
70
+ for i := _N - 2 ; i >= 0 ; i -- {
71
+ h := t .leftShift (0 )
72
+ t .reduceOneWord (h )
73
+ mulWord (prod , x [:], y [i ])
74
+ c := add (t [:], t [:], prod [:_N ])
75
+ t .reduceOneWord (prod [_N ] + c )
76
+ }
77
+ * z = t
78
+ return z
79
+ }
80
+
81
+ // sqrn calculates z = x^(2^n).
82
+ func (z * scalar64 ) sqrn (x * scalar64 , n uint ) * scalar64 {
83
+ t := * x
84
+ for i := uint (0 ); i < n ; i ++ {
85
+ t .mul (& t , & t )
86
+ }
87
+ * z = t
88
+ return z
89
+ }
90
+
91
+ // sqrnmul calculates z = x^(2^n) * y.
92
+ func (z * scalar64 ) sqrnmul (x * scalar64 , n uint , y * scalar64 ) * scalar64 {
93
+ return z .mul (z .sqrn (x , n ), y )
94
+ }
95
+
63
96
// add calculates z = x + y. Assumes len(z) > max(len(x),len(y)).
64
97
func add (z , x , y []uint64 ) uint64 {
65
98
l , L , zz := len (x ), len (y ), y
@@ -203,49 +236,61 @@ func (z *Scalar) Mul(x, y *Scalar) {
203
236
var z64 , x64 , y64 scalar64
204
237
x64 .fromScalar (x )
205
238
y64 .fromScalar (y )
206
- coremul ( & z64 , & x64 , & y64 )
239
+ z64 . mul ( & x64 , & y64 )
207
240
z64 .modOrder ()
208
241
z64 .toScalar (z )
209
242
}
210
243
211
- func coremul (z64 , x64 , y64 * scalar64 ) {
212
- var p64 scalar64
213
- prod := (& [_N + 1 ]uint64 {})[:]
214
- mulWord (prod , x64 [:], y64 [_N - 1 ])
215
- copy (p64 [:], prod [:_N ])
216
- p64 .reduceOneWord (prod [_N ])
217
- for i := _N - 2 ; i >= 0 ; i -- {
218
- h := p64 .leftShift (0 )
219
- p64 .reduceOneWord (h )
220
- mulWord (prod , x64 [:], y64 [i ])
221
- c := add (p64 [:], p64 [:], prod [:_N ])
222
- p64 .reduceOneWord (prod [_N ] + c )
223
- }
224
- * z64 = p64
225
- }
226
-
227
- // Inv calculates z = 1/x mod order.
244
+ // Inv calculaxes z = 1/x mod order.
228
245
func (z * Scalar ) Inv (x * Scalar ) {
229
246
var x64 scalar64
230
247
x64 .fromScalar (x )
231
248
232
- var T [16 ]scalar64
233
- T [0 ] = scalar64 {1 }
234
- for i := 1 ; i < 16 ; i ++ {
235
- coremul (& T [i ], & T [i - 1 ], & x64 )
236
- }
237
-
238
- t := & scalar64 {1 }
239
- for i := 8 * len (orderMinusTwo ) - 4 ; i >= 0 ; i -= 4 {
240
- b := (orderMinusTwo [i / 8 ] >> uint (i % 8 )) & 0xF
241
- coremul (t , t , t )
242
- coremul (t , t , t )
243
- coremul (t , t , t )
244
- coremul (t , t , t )
245
- if b != 0 {
246
- coremul (t , t , & T [b ])
247
- }
248
- }
249
- t .modOrder ()
250
- t .toScalar (z )
249
+ x10 := (& scalar64 {}).mul (& x64 , & x64 ) // x10 = 2 * 1
250
+ x11 := (& scalar64 {}).mul (x10 , & x64 ) // x11 = 1 + x10
251
+ x100 := (& scalar64 {}).mul (x11 , & x64 ) // x100 = 1 + x11
252
+ x101 := (& scalar64 {}).mul (x100 , & x64 ) // x101 = 1 + x100
253
+ x1001 := (& scalar64 {}).mul (x100 , x101 ) // x1001 = x100 + x101
254
+ x1011 := (& scalar64 {}).mul (x10 , x1001 ) // x1011 = x10 + x1001
255
+ x1101 := (& scalar64 {}).mul (x10 , x1011 ) // x1101 = x10 + x1011
256
+ x1111 := (& scalar64 {}).mul (x10 , x1101 ) // x1111 = x10 + x1101
257
+ x10001 := (& scalar64 {}).mul (x10 , x1111 ) // x10001 = x10 + x1111
258
+ x10011 := (& scalar64 {}).mul (x10 , x10001 ) // x10011 = x10 + x10001
259
+ x10101 := (& scalar64 {}).mul (x10 , x10011 ) // x10101 = x10 + x10011
260
+ x10111 := (& scalar64 {}).mul (x10 , x10101 ) // x10111 = x10 + x10101
261
+ x11001 := (& scalar64 {}).mul (x10 , x10111 ) // x11001 = x10 + x10111
262
+ x11011 := (& scalar64 {}).mul (x10 , x11001 ) // x11011 = x10 + x11001
263
+ x11101 := (& scalar64 {}).mul (x10 , x11011 ) // x11101 = x10 + x11011
264
+ x11111 := (& scalar64 {}).mul (x10 , x11101 ) // x11111 = x10 + x11101
265
+ x111110 := (& scalar64 {}).mul (x11111 , x11111 ) // x111110 = 2 * x11111
266
+ x1111100 := (& scalar64 {}).mul (x111110 , x111110 ) // x1111100 = 2 * x111110
267
+ var i24 , i41 , i73 , i129 , t = & scalar64 {}, & scalar64 {}, & scalar64 {},
268
+ & scalar64 {}, & scalar64 {}
269
+ var x222 , i262 , i279 , i298 , i312 , i331 , i343 , i365 ,
270
+ i375 , i396 , i411 , i431 , i444 , i464 , i478 , i498 , iret * scalar64 = t , t ,
271
+ t , t , t , t , t , t , t , t , t , t , t , t , t , t , t
272
+
273
+ i24 .sqrnmul (x1111100 , 5 , x1111100 ) // i24 = x1111100 << 5 + x1111100
274
+ i41 .sqrnmul (i41 .sqrnmul (i24 , 4 , x111110 ), 11 , i24 ) // i41 = (i24 << 4 + x111110) << 11 + i24
275
+ i73 .sqrnmul (i73 .sqrnmul (i41 , 4 , x111110 ), 26 , i41 ) // i73 = (i41 << 4 + x111110) << 26 + i41
276
+ i129 .sqrnmul (i73 , 55 , i73 ) // i129 = i73 << 55 + i73
277
+ x222 .mul (x222 .sqrnmul (i129 , 110 , x11 ), i129 ) // x222 = i129 << 110 + x11 + i129
278
+ i262 .sqrn (i262 .sqrnmul (i262 .sqrnmul (x222 , 6 , x11111 ), 7 , x11001 ), 6 ) // i262 = ((x222 << 6 + x11111) << 7 + x11001) << 6
279
+ i279 .sqrnmul (i279 .sqrnmul (i279 .mul (x10001 , i262 ), 8 , x11111 ), 6 , x10011 ) // i279 = ((x10001 + i262) << 8 + x11111) << 6 + x10011
280
+ i298 .sqrn (i298 .sqrnmul (i298 .sqrnmul (i279 , 5 , x10001 ), 8 , x10011 ), 4 ) // i298 = ((i279 << 5 + x10001) << 8 + x10011) << 4
281
+ i312 .sqrnmul (i312 .sqrnmul (i312 .mul (x1011 , i298 ), 6 , x11011 ), 5 , x1001 ) // i312 = ((x1011 + i298) << 6 + x11011) << 5 + x1001
282
+ i331 .sqrn (i331 .sqrnmul (i331 .sqrnmul (i312 , 6 , x1101 ), 6 , x11101 ), 5 ) // i331 = ((i312 << 6 + x1101) << 6 + x11101) << 5
283
+ i343 .sqrnmul (i343 .sqrnmul (i343 .mul (x10101 , i331 ), 5 , x10001 ), 4 , x1011 ) // i343 = ((x10101 + i331) << 5 + x10001) << 4 + x1011
284
+ i365 .sqrn (i365 .sqrnmul (i365 .sqrnmul (i343 , 5 , x1001 ), 7 , & x64 ), 8 ) // i365 = ((i343 << 5 + x1001) << 7 + 1) << 8
285
+ i375 .sqrnmul (i375 .sqrnmul (i375 .mul (x1011 , i365 ), 6 , x11001 ), 1 , & x64 ) // i375 = 2*((x1011 + i365) << 6 + x11001) + 1
286
+ i396 .sqrn (i396 .sqrnmul (i396 .sqrnmul (i375 , 9 , x10011 ), 4 , x1001 ), 6 ) // i396 = ((i375 << 9 + x10011) << 4 + x1001) << 6
287
+ i411 .sqrnmul (i411 .sqrnmul (i411 .mul (x10001 , i396 ), 5 , x10111 ), 7 , x1011 ) // i411 = ((x10001 + i396) << 5 + x10111) << 7 + x1011
288
+ i431 .sqrn (i431 .sqrnmul (i431 .sqrnmul (i411 , 7 , x1111 ), 6 , x10101 ), 5 ) // i431 = ((i411 << 7 + x1111) << 6 + x10101) << 5
289
+ i444 .sqrnmul (i444 .sqrnmul (i444 .mul (x1001 , i431 ), 8 , x11011 ), 2 , x11 ) // i444 = ((x1001 + i431) << 8 + x11011) << 2 + x11
290
+ i464 .sqrn (i464 .sqrnmul (i464 .sqrnmul (i444 , 5 , x11 ), 7 , x101 ), 6 ) // i464 = ((i444 << 5 + x11) << 7 + x101) << 6
291
+ i478 .sqrnmul (i478 .sqrnmul (i478 .mul (x1001 , i464 ), 6 , x10101 ), 5 , x1101 ) // i478 = ((x1001 + i464) << 6 + x10101) << 5 + x1101
292
+ i498 .sqrn (i498 .sqrnmul (i498 .sqrnmul (i478 , 3 , x11 ), 9 , x10001 ), 6 ) // i498 = ((i478 << 3 + x11) << 9 + x10001) << 6
293
+ iret .sqrnmul (iret .mul (x1111 , i498 ), 4 , & x64 ) // z = (x1111 + i498) << 4 + 1
294
+ iret .modOrder ()
295
+ iret .toScalar (z )
251
296
}
0 commit comments