Source file src/pkg/crypto/elliptic/p256_asm.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package elliptic
16
17 import (
18 "math/big"
19 "sync"
20 )
21
22 type (
23 p256Curve struct {
24 *CurveParams
25 }
26
27 p256Point struct {
28 xyz [12]uint64
29 }
30 )
31
32 var (
33 p256 p256Curve
34 p256Precomputed *[43][32 * 8]uint64
35 precomputeOnce sync.Once
36 )
37
38 func initP256() {
39
40 p256.CurveParams = &CurveParams{Name: "P-256"}
41 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
42 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
43 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
44 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
45 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
46 p256.BitSize = 256
47 }
48
49 func (curve p256Curve) Params() *CurveParams {
50 return curve.CurveParams
51 }
52
53
54
55
56 func p256Mul(res, in1, in2 []uint64)
57
58
59
60 func p256Sqr(res, in []uint64, n int)
61
62
63
64 func p256FromMont(res, in []uint64)
65
66
67
68 func p256NegCond(val []uint64, cond int)
69
70
71
72 func p256MovCond(res, a, b []uint64, cond int)
73
74
75
76 func p256BigToLittle(res []uint64, in []byte)
77
78
79 func p256LittleToBig(res []byte, in []uint64)
80
81
82
83 func p256Select(point, table []uint64, idx int)
84
85
86 func p256SelectBase(point, table []uint64, idx int)
87
88
89
90 func p256OrdMul(res, in1, in2 []uint64)
91
92
93
94 func p256OrdSqr(res, in []uint64, n int)
95
96
97
98
99
100
101 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
102
103
104
105
106
107 func p256PointAddAsm(res, in1, in2 []uint64) int
108
109
110
111 func p256PointDoubleAsm(res, in []uint64)
112
113 func (curve p256Curve) Inverse(k *big.Int) *big.Int {
114 if k.Sign() < 0 {
115
116 k = new(big.Int).Neg(k)
117 }
118
119 if k.Cmp(p256.N) >= 0 {
120
121 k = new(big.Int).Mod(k, p256.N)
122 }
123
124
125 var table [4 * 9]uint64
126 var (
127 _1 = table[4*0 : 4*1]
128 _11 = table[4*1 : 4*2]
129 _101 = table[4*2 : 4*3]
130 _111 = table[4*3 : 4*4]
131 _1111 = table[4*4 : 4*5]
132 _10101 = table[4*5 : 4*6]
133 _101111 = table[4*6 : 4*7]
134 x = table[4*7 : 4*8]
135 t = table[4*8 : 4*9]
136 )
137
138 fromBig(x[:], k)
139
140
141
142
143
144
145
146 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
147 p256OrdMul(_1, x, RR)
148 p256OrdSqr(x, _1, 1)
149 p256OrdMul(_11, x, _1)
150 p256OrdMul(_101, x, _11)
151 p256OrdMul(_111, x, _101)
152 p256OrdSqr(x, _101, 1)
153 p256OrdMul(_1111, _101, x)
154
155 p256OrdSqr(t, x, 1)
156 p256OrdMul(_10101, t, _1)
157 p256OrdSqr(x, _10101, 1)
158 p256OrdMul(_101111, _101, x)
159 p256OrdMul(x, _10101, x)
160 p256OrdSqr(t, x, 2)
161 p256OrdMul(t, t, _11)
162 p256OrdSqr(x, t, 8)
163 p256OrdMul(x, x, t)
164 p256OrdSqr(t, x, 16)
165 p256OrdMul(t, t, x)
166
167 p256OrdSqr(x, t, 64)
168 p256OrdMul(x, x, t)
169 p256OrdSqr(x, x, 32)
170 p256OrdMul(x, x, t)
171
172 sqrs := []uint8{
173 6, 5, 4, 5, 5,
174 4, 3, 3, 5, 9,
175 6, 2, 5, 6, 5,
176 4, 5, 5, 3, 10,
177 2, 5, 5, 3, 7, 6}
178 muls := [][]uint64{
179 _101111, _111, _11, _1111, _10101,
180 _101, _101, _101, _111, _101111,
181 _1111, _1, _1, _1111, _111,
182 _111, _111, _101, _11, _101111,
183 _11, _11, _11, _1, _10101, _1111}
184
185 for i, s := range sqrs {
186 p256OrdSqr(x, x, int(s))
187 p256OrdMul(x, x, muls[i])
188 }
189
190
191
192 one := []uint64{1, 0, 0, 0}
193 p256OrdMul(x, x, one)
194
195 xOut := make([]byte, 32)
196 p256LittleToBig(xOut, x)
197 return new(big.Int).SetBytes(xOut)
198 }
199
200
201 func fromBig(out []uint64, big *big.Int) {
202 for i := range out {
203 out[i] = 0
204 }
205
206 for i, v := range big.Bits() {
207 out[i] = uint64(v)
208 }
209 }
210
211
212
213
214 func p256GetScalar(out []uint64, in []byte) {
215 n := new(big.Int).SetBytes(in)
216
217 if n.Cmp(p256.N) >= 0 {
218 n.Mod(n, p256.N)
219 }
220 fromBig(out, n)
221 }
222
223
224
225
226 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
227
228 func maybeReduceModP(in *big.Int) *big.Int {
229 if in.Cmp(p256.P) < 0 {
230 return in
231 }
232 return new(big.Int).Mod(in, p256.P)
233 }
234
235 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
236 scalarReversed := make([]uint64, 4)
237 var r1, r2 p256Point
238 p256GetScalar(scalarReversed, baseScalar)
239 r1IsInfinity := scalarIsZero(scalarReversed)
240 r1.p256BaseMult(scalarReversed)
241
242 p256GetScalar(scalarReversed, scalar)
243 r2IsInfinity := scalarIsZero(scalarReversed)
244 fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
245 fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
246 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
247 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
248
249
250 r2.xyz[8] = 0x0000000000000001
251 r2.xyz[9] = 0xffffffff00000000
252 r2.xyz[10] = 0xffffffffffffffff
253 r2.xyz[11] = 0x00000000fffffffe
254
255 r2.p256ScalarMult(scalarReversed)
256
257 var sum, double p256Point
258 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
259 p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
260 sum.CopyConditional(&double, pointsEqual)
261 sum.CopyConditional(&r1, r2IsInfinity)
262 sum.CopyConditional(&r2, r1IsInfinity)
263
264 return sum.p256PointToAffine()
265 }
266
267 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
268 scalarReversed := make([]uint64, 4)
269 p256GetScalar(scalarReversed, scalar)
270
271 var r p256Point
272 r.p256BaseMult(scalarReversed)
273 return r.p256PointToAffine()
274 }
275
276 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
277 scalarReversed := make([]uint64, 4)
278 p256GetScalar(scalarReversed, scalar)
279
280 var r p256Point
281 fromBig(r.xyz[0:4], maybeReduceModP(bigX))
282 fromBig(r.xyz[4:8], maybeReduceModP(bigY))
283 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
284 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
285
286 r.xyz[8] = 0x0000000000000001
287 r.xyz[9] = 0xffffffff00000000
288 r.xyz[10] = 0xffffffffffffffff
289 r.xyz[11] = 0x00000000fffffffe
290
291 r.p256ScalarMult(scalarReversed)
292 return r.p256PointToAffine()
293 }
294
295
296 func uint64IsZero(x uint64) int {
297 x = ^x
298 x &= x >> 32
299 x &= x >> 16
300 x &= x >> 8
301 x &= x >> 4
302 x &= x >> 2
303 x &= x >> 1
304 return int(x & 1)
305 }
306
307
308
309 func scalarIsZero(scalar []uint64) int {
310 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
311 }
312
313 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
314 zInv := make([]uint64, 4)
315 zInvSq := make([]uint64, 4)
316 p256Inverse(zInv, p.xyz[8:12])
317 p256Sqr(zInvSq, zInv, 1)
318 p256Mul(zInv, zInv, zInvSq)
319
320 p256Mul(zInvSq, p.xyz[0:4], zInvSq)
321 p256Mul(zInv, p.xyz[4:8], zInv)
322
323 p256FromMont(zInvSq, zInvSq)
324 p256FromMont(zInv, zInv)
325
326 xOut := make([]byte, 32)
327 yOut := make([]byte, 32)
328 p256LittleToBig(xOut, zInvSq)
329 p256LittleToBig(yOut, zInv)
330
331 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
332 }
333
334
335
336 func (p *p256Point) CopyConditional(src *p256Point, v int) {
337 pMask := uint64(v) - 1
338 srcMask := ^pMask
339
340 for i, n := range p.xyz {
341 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
342 }
343 }
344
345
346 func p256Inverse(out, in []uint64) {
347 var stack [6 * 4]uint64
348 p2 := stack[4*0 : 4*0+4]
349 p4 := stack[4*1 : 4*1+4]
350 p8 := stack[4*2 : 4*2+4]
351 p16 := stack[4*3 : 4*3+4]
352 p32 := stack[4*4 : 4*4+4]
353
354 p256Sqr(out, in, 1)
355 p256Mul(p2, out, in)
356
357 p256Sqr(out, p2, 2)
358 p256Mul(p4, out, p2)
359
360 p256Sqr(out, p4, 4)
361 p256Mul(p8, out, p4)
362
363 p256Sqr(out, p8, 8)
364 p256Mul(p16, out, p8)
365
366 p256Sqr(out, p16, 16)
367 p256Mul(p32, out, p16)
368
369 p256Sqr(out, p32, 32)
370 p256Mul(out, out, in)
371
372 p256Sqr(out, out, 128)
373 p256Mul(out, out, p32)
374
375 p256Sqr(out, out, 32)
376 p256Mul(out, out, p32)
377
378 p256Sqr(out, out, 16)
379 p256Mul(out, out, p16)
380
381 p256Sqr(out, out, 8)
382 p256Mul(out, out, p8)
383
384 p256Sqr(out, out, 4)
385 p256Mul(out, out, p4)
386
387 p256Sqr(out, out, 2)
388 p256Mul(out, out, p2)
389
390 p256Sqr(out, out, 2)
391 p256Mul(out, out, in)
392 }
393
394 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
395 copy(r[index*12:], p.xyz[:])
396 }
397
398 func boothW5(in uint) (int, int) {
399 var s uint = ^((in >> 5) - 1)
400 var d uint = (1 << 6) - in - 1
401 d = (d & s) | (in & (^s))
402 d = (d >> 1) + (d & 1)
403 return int(d), int(s & 1)
404 }
405
406 func boothW6(in uint) (int, int) {
407 var s uint = ^((in >> 6) - 1)
408 var d uint = (1 << 7) - in - 1
409 d = (d & s) | (in & (^s))
410 d = (d >> 1) + (d & 1)
411 return int(d), int(s & 1)
412 }
413
414 func initTable() {
415 p256Precomputed = new([43][32 * 8]uint64)
416
417 basePoint := []uint64{
418 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
419 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
420 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
421 }
422 t1 := make([]uint64, 12)
423 t2 := make([]uint64, 12)
424 copy(t2, basePoint)
425
426 zInv := make([]uint64, 4)
427 zInvSq := make([]uint64, 4)
428 for j := 0; j < 32; j++ {
429 copy(t1, t2)
430 for i := 0; i < 43; i++ {
431
432 if i != 0 {
433 for k := 0; k < 6; k++ {
434 p256PointDoubleAsm(t1, t1)
435 }
436 }
437
438
439 p256Inverse(zInv, t1[8:12])
440 p256Sqr(zInvSq, zInv, 1)
441 p256Mul(zInv, zInv, zInvSq)
442
443 p256Mul(t1[:4], t1[:4], zInvSq)
444 p256Mul(t1[4:8], t1[4:8], zInv)
445
446 copy(t1[8:12], basePoint[8:12])
447
448 copy(p256Precomputed[i][j*8:], t1[:8])
449 }
450 if j == 0 {
451 p256PointDoubleAsm(t2, basePoint)
452 } else {
453 p256PointAddAsm(t2, t2, basePoint)
454 }
455 }
456 }
457
458 func (p *p256Point) p256BaseMult(scalar []uint64) {
459 precomputeOnce.Do(initTable)
460
461 wvalue := (scalar[0] << 1) & 0x7f
462 sel, sign := boothW6(uint(wvalue))
463 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
464 p256NegCond(p.xyz[4:8], sign)
465
466
467 p.xyz[8] = 0x0000000000000001
468 p.xyz[9] = 0xffffffff00000000
469 p.xyz[10] = 0xffffffffffffffff
470 p.xyz[11] = 0x00000000fffffffe
471
472 var t0 p256Point
473
474 t0.xyz[8] = 0x0000000000000001
475 t0.xyz[9] = 0xffffffff00000000
476 t0.xyz[10] = 0xffffffffffffffff
477 t0.xyz[11] = 0x00000000fffffffe
478
479 index := uint(5)
480 zero := sel
481
482 for i := 1; i < 43; i++ {
483 if index < 192 {
484 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
485 } else {
486 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
487 }
488 index += 6
489 sel, sign = boothW6(uint(wvalue))
490 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
491 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
492 zero |= sel
493 }
494 }
495
496 func (p *p256Point) p256ScalarMult(scalar []uint64) {
497
498
499 var precomp [16 * 4 * 3]uint64
500 var t0, t1, t2, t3 p256Point
501
502
503 p.p256StorePoint(&precomp, 0)
504
505 p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
506 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
507 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
508 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
509 t0.p256StorePoint(&precomp, 1)
510 t1.p256StorePoint(&precomp, 3)
511 t2.p256StorePoint(&precomp, 7)
512 t3.p256StorePoint(&precomp, 15)
513
514 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
515 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
516 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
517 t0.p256StorePoint(&precomp, 2)
518 t1.p256StorePoint(&precomp, 4)
519 t2.p256StorePoint(&precomp, 8)
520
521 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
522 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
523 t0.p256StorePoint(&precomp, 5)
524 t1.p256StorePoint(&precomp, 9)
525
526 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
527 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
528 t2.p256StorePoint(&precomp, 6)
529 t1.p256StorePoint(&precomp, 10)
530
531 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
532 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
533 t0.p256StorePoint(&precomp, 11)
534 t2.p256StorePoint(&precomp, 13)
535
536 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
537 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
538 t0.p256StorePoint(&precomp, 12)
539 t2.p256StorePoint(&precomp, 14)
540
541
542 index := uint(254)
543 var sel, sign int
544
545 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
546 sel, _ = boothW5(uint(wvalue))
547
548 p256Select(p.xyz[0:12], precomp[0:], sel)
549 zero := sel
550
551 for index > 4 {
552 index -= 5
553 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
554 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
555 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
556 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
557 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
558
559 if index < 192 {
560 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
561 } else {
562 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
563 }
564
565 sel, sign = boothW5(uint(wvalue))
566
567 p256Select(t0.xyz[0:], precomp[0:], sel)
568 p256NegCond(t0.xyz[4:8], sign)
569 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
570 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
571 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
572 zero |= sel
573 }
574
575 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
576 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
577 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
578 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
579 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
580
581 wvalue = (scalar[0] << 1) & 0x3f
582 sel, sign = boothW5(uint(wvalue))
583
584 p256Select(t0.xyz[0:], precomp[0:], sel)
585 p256NegCond(t0.xyz[4:8], sign)
586 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
587 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
588 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
589 }
590
View as plain text