...

Source file src/pkg/crypto/elliptic/p256_asm.go

     1	// Copyright 2015 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// This file contains the Go wrapper for the constant-time, 64-bit assembly
     6	// implementation of P256. The optimizations performed here are described in
     7	// detail in:
     8	// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     9	//                          256-bit primes"
    10	// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
    11	// https://eprint.iacr.org/2013/816.pdf
    12	
    13	// +build amd64 arm64
    14	
    15	package elliptic
    16	
    17	import (
    18		"math/big"
    19		"sync"
    20	)
    21	
    22	type (
    23		p256Curve struct {
    24			*CurveParams
    25		}
    26	
    27		p256Point struct {
    28			xyz [12]uint64
    29		}
    30	)
    31	
    32	var (
    33		p256            p256Curve
    34		p256Precomputed *[43][32 * 8]uint64
    35		precomputeOnce  sync.Once
    36	)
    37	
    38	func initP256() {
    39		// See FIPS 186-3, section D.2.3
    40		p256.CurveParams = &CurveParams{Name: "P-256"}
    41		p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
    42		p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
    43		p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
    44		p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
    45		p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
    46		p256.BitSize = 256
    47	}
    48	
    49	func (curve p256Curve) Params() *CurveParams {
    50		return curve.CurveParams
    51	}
    52	
    53	// Functions implemented in p256_asm_*64.s
    54	// Montgomery multiplication modulo P256
    55	//go:noescape
    56	func p256Mul(res, in1, in2 []uint64)
    57	
    58	// Montgomery square modulo P256, repeated n times (n >= 1)
    59	//go:noescape
    60	func p256Sqr(res, in []uint64, n int)
    61	
    62	// Montgomery multiplication by 1
    63	//go:noescape
    64	func p256FromMont(res, in []uint64)
    65	
    66	// iff cond == 1  val <- -val
    67	//go:noescape
    68	func p256NegCond(val []uint64, cond int)
    69	
    70	// if cond == 0 res <- b; else res <- a
    71	//go:noescape
    72	func p256MovCond(res, a, b []uint64, cond int)
    73	
    74	// Endianness swap
    75	//go:noescape
    76	func p256BigToLittle(res []uint64, in []byte)
    77	
    78	//go:noescape
    79	func p256LittleToBig(res []byte, in []uint64)
    80	
    81	// Constant time table access
    82	//go:noescape
    83	func p256Select(point, table []uint64, idx int)
    84	
    85	//go:noescape
    86	func p256SelectBase(point, table []uint64, idx int)
    87	
    88	// Montgomery multiplication modulo Ord(G)
    89	//go:noescape
    90	func p256OrdMul(res, in1, in2 []uint64)
    91	
    92	// Montgomery square modulo Ord(G), repeated n times
    93	//go:noescape
    94	func p256OrdSqr(res, in []uint64, n int)
    95	
    96	// Point add with in2 being affine point
    97	// If sign == 1 -> in2 = -in2
    98	// If sel == 0 -> res = in1
    99	// if zero == 0 -> res = in2
   100	//go:noescape
   101	func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
   102	
   103	// Point add. Returns one if the two input points were equal and zero
   104	// otherwise. (Note that, due to the way that the equations work out, some
   105	// representations of ∞ are considered equal to everything by this function.)
   106	//go:noescape
   107	func p256PointAddAsm(res, in1, in2 []uint64) int
   108	
   109	// Point double
   110	//go:noescape
   111	func p256PointDoubleAsm(res, in []uint64)
   112	
   113	func (curve p256Curve) Inverse(k *big.Int) *big.Int {
   114		if k.Sign() < 0 {
   115			// This should never happen.
   116			k = new(big.Int).Neg(k)
   117		}
   118	
   119		if k.Cmp(p256.N) >= 0 {
   120			// This should never happen.
   121			k = new(big.Int).Mod(k, p256.N)
   122		}
   123	
   124		// table will store precomputed powers of x.
   125		var table [4 * 9]uint64
   126		var (
   127			_1      = table[4*0 : 4*1]
   128			_11     = table[4*1 : 4*2]
   129			_101    = table[4*2 : 4*3]
   130			_111    = table[4*3 : 4*4]
   131			_1111   = table[4*4 : 4*5]
   132			_10101  = table[4*5 : 4*6]
   133			_101111 = table[4*6 : 4*7]
   134			x       = table[4*7 : 4*8]
   135			t       = table[4*8 : 4*9]
   136		)
   137	
   138		fromBig(x[:], k)
   139		// This code operates in the Montgomery domain where R = 2^256 mod n
   140		// and n is the order of the scalar field. (See initP256 for the
   141		// value.) Elements in the Montgomery domain take the form a×R and
   142		// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   143		// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   144		// i.e. converts x into the Montgomery domain.
   145		// Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
   146		RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
   147		p256OrdMul(_1, x, RR)      // _1
   148		p256OrdSqr(x, _1, 1)       // _10
   149		p256OrdMul(_11, x, _1)     // _11
   150		p256OrdMul(_101, x, _11)   // _101
   151		p256OrdMul(_111, x, _101)  // _111
   152		p256OrdSqr(x, _101, 1)     // _1010
   153		p256OrdMul(_1111, _101, x) // _1111
   154	
   155		p256OrdSqr(t, x, 1)          // _10100
   156		p256OrdMul(_10101, t, _1)    // _10101
   157		p256OrdSqr(x, _10101, 1)     // _101010
   158		p256OrdMul(_101111, _101, x) // _101111
   159		p256OrdMul(x, _10101, x)     // _111111 = x6
   160		p256OrdSqr(t, x, 2)          // _11111100
   161		p256OrdMul(t, t, _11)        // _11111111 = x8
   162		p256OrdSqr(x, t, 8)          // _ff00
   163		p256OrdMul(x, x, t)          // _ffff = x16
   164		p256OrdSqr(t, x, 16)         // _ffff0000
   165		p256OrdMul(t, t, x)          // _ffffffff = x32
   166	
   167		p256OrdSqr(x, t, 64)
   168		p256OrdMul(x, x, t)
   169		p256OrdSqr(x, x, 32)
   170		p256OrdMul(x, x, t)
   171	
   172		sqrs := []uint8{
   173			6, 5, 4, 5, 5,
   174			4, 3, 3, 5, 9,
   175			6, 2, 5, 6, 5,
   176			4, 5, 5, 3, 10,
   177			2, 5, 5, 3, 7, 6}
   178		muls := [][]uint64{
   179			_101111, _111, _11, _1111, _10101,
   180			_101, _101, _101, _111, _101111,
   181			_1111, _1, _1, _1111, _111,
   182			_111, _111, _101, _11, _101111,
   183			_11, _11, _11, _1, _10101, _1111}
   184	
   185		for i, s := range sqrs {
   186			p256OrdSqr(x, x, int(s))
   187			p256OrdMul(x, x, muls[i])
   188		}
   189	
   190		// Multiplying by one in the Montgomery domain converts a Montgomery
   191		// value out of the domain.
   192		one := []uint64{1, 0, 0, 0}
   193		p256OrdMul(x, x, one)
   194	
   195		xOut := make([]byte, 32)
   196		p256LittleToBig(xOut, x)
   197		return new(big.Int).SetBytes(xOut)
   198	}
   199	
   200	// fromBig converts a *big.Int into a format used by this code.
   201	func fromBig(out []uint64, big *big.Int) {
   202		for i := range out {
   203			out[i] = 0
   204		}
   205	
   206		for i, v := range big.Bits() {
   207			out[i] = uint64(v)
   208		}
   209	}
   210	
   211	// p256GetScalar endian-swaps the big-endian scalar value from in and writes it
   212	// to out. If the scalar is equal or greater than the order of the group, it's
   213	// reduced modulo that order.
   214	func p256GetScalar(out []uint64, in []byte) {
   215		n := new(big.Int).SetBytes(in)
   216	
   217		if n.Cmp(p256.N) >= 0 {
   218			n.Mod(n, p256.N)
   219		}
   220		fromBig(out, n)
   221	}
   222	
   223	// p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
   224	// underlying field of the curve. (See initP256 for the value.) Thus rr here is
   225	// R×R mod p. See comment in Inverse about how this is used.
   226	var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
   227	
   228	func maybeReduceModP(in *big.Int) *big.Int {
   229		if in.Cmp(p256.P) < 0 {
   230			return in
   231		}
   232		return new(big.Int).Mod(in, p256.P)
   233	}
   234	
   235	func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   236		scalarReversed := make([]uint64, 4)
   237		var r1, r2 p256Point
   238		p256GetScalar(scalarReversed, baseScalar)
   239		r1IsInfinity := scalarIsZero(scalarReversed)
   240		r1.p256BaseMult(scalarReversed)
   241	
   242		p256GetScalar(scalarReversed, scalar)
   243		r2IsInfinity := scalarIsZero(scalarReversed)
   244		fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
   245		fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
   246		p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
   247		p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
   248	
   249		// This sets r2's Z value to 1, in the Montgomery domain.
   250		r2.xyz[8] = 0x0000000000000001
   251		r2.xyz[9] = 0xffffffff00000000
   252		r2.xyz[10] = 0xffffffffffffffff
   253		r2.xyz[11] = 0x00000000fffffffe
   254	
   255		r2.p256ScalarMult(scalarReversed)
   256	
   257		var sum, double p256Point
   258		pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
   259		p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
   260		sum.CopyConditional(&double, pointsEqual)
   261		sum.CopyConditional(&r1, r2IsInfinity)
   262		sum.CopyConditional(&r2, r1IsInfinity)
   263	
   264		return sum.p256PointToAffine()
   265	}
   266	
   267	func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   268		scalarReversed := make([]uint64, 4)
   269		p256GetScalar(scalarReversed, scalar)
   270	
   271		var r p256Point
   272		r.p256BaseMult(scalarReversed)
   273		return r.p256PointToAffine()
   274	}
   275	
   276	func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   277		scalarReversed := make([]uint64, 4)
   278		p256GetScalar(scalarReversed, scalar)
   279	
   280		var r p256Point
   281		fromBig(r.xyz[0:4], maybeReduceModP(bigX))
   282		fromBig(r.xyz[4:8], maybeReduceModP(bigY))
   283		p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
   284		p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
   285		// This sets r2's Z value to 1, in the Montgomery domain.
   286		r.xyz[8] = 0x0000000000000001
   287		r.xyz[9] = 0xffffffff00000000
   288		r.xyz[10] = 0xffffffffffffffff
   289		r.xyz[11] = 0x00000000fffffffe
   290	
   291		r.p256ScalarMult(scalarReversed)
   292		return r.p256PointToAffine()
   293	}
   294	
   295	// uint64IsZero returns 1 if x is zero and zero otherwise.
   296	func uint64IsZero(x uint64) int {
   297		x = ^x
   298		x &= x >> 32
   299		x &= x >> 16
   300		x &= x >> 8
   301		x &= x >> 4
   302		x &= x >> 2
   303		x &= x >> 1
   304		return int(x & 1)
   305	}
   306	
   307	// scalarIsZero returns 1 if scalar represents the zero value, and zero
   308	// otherwise.
   309	func scalarIsZero(scalar []uint64) int {
   310		return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
   311	}
   312	
   313	func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   314		zInv := make([]uint64, 4)
   315		zInvSq := make([]uint64, 4)
   316		p256Inverse(zInv, p.xyz[8:12])
   317		p256Sqr(zInvSq, zInv, 1)
   318		p256Mul(zInv, zInv, zInvSq)
   319	
   320		p256Mul(zInvSq, p.xyz[0:4], zInvSq)
   321		p256Mul(zInv, p.xyz[4:8], zInv)
   322	
   323		p256FromMont(zInvSq, zInvSq)
   324		p256FromMont(zInv, zInv)
   325	
   326		xOut := make([]byte, 32)
   327		yOut := make([]byte, 32)
   328		p256LittleToBig(xOut, zInvSq)
   329		p256LittleToBig(yOut, zInv)
   330	
   331		return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
   332	}
   333	
   334	// CopyConditional copies overwrites p with src if v == 1, and leaves p
   335	// unchanged if v == 0.
   336	func (p *p256Point) CopyConditional(src *p256Point, v int) {
   337		pMask := uint64(v) - 1
   338		srcMask := ^pMask
   339	
   340		for i, n := range p.xyz {
   341			p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
   342		}
   343	}
   344	
   345	// p256Inverse sets out to in^-1 mod p.
   346	func p256Inverse(out, in []uint64) {
   347		var stack [6 * 4]uint64
   348		p2 := stack[4*0 : 4*0+4]
   349		p4 := stack[4*1 : 4*1+4]
   350		p8 := stack[4*2 : 4*2+4]
   351		p16 := stack[4*3 : 4*3+4]
   352		p32 := stack[4*4 : 4*4+4]
   353	
   354		p256Sqr(out, in, 1)
   355		p256Mul(p2, out, in) // 3*p
   356	
   357		p256Sqr(out, p2, 2)
   358		p256Mul(p4, out, p2) // f*p
   359	
   360		p256Sqr(out, p4, 4)
   361		p256Mul(p8, out, p4) // ff*p
   362	
   363		p256Sqr(out, p8, 8)
   364		p256Mul(p16, out, p8) // ffff*p
   365	
   366		p256Sqr(out, p16, 16)
   367		p256Mul(p32, out, p16) // ffffffff*p
   368	
   369		p256Sqr(out, p32, 32)
   370		p256Mul(out, out, in)
   371	
   372		p256Sqr(out, out, 128)
   373		p256Mul(out, out, p32)
   374	
   375		p256Sqr(out, out, 32)
   376		p256Mul(out, out, p32)
   377	
   378		p256Sqr(out, out, 16)
   379		p256Mul(out, out, p16)
   380	
   381		p256Sqr(out, out, 8)
   382		p256Mul(out, out, p8)
   383	
   384		p256Sqr(out, out, 4)
   385		p256Mul(out, out, p4)
   386	
   387		p256Sqr(out, out, 2)
   388		p256Mul(out, out, p2)
   389	
   390		p256Sqr(out, out, 2)
   391		p256Mul(out, out, in)
   392	}
   393	
   394	func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
   395		copy(r[index*12:], p.xyz[:])
   396	}
   397	
   398	func boothW5(in uint) (int, int) {
   399		var s uint = ^((in >> 5) - 1)
   400		var d uint = (1 << 6) - in - 1
   401		d = (d & s) | (in & (^s))
   402		d = (d >> 1) + (d & 1)
   403		return int(d), int(s & 1)
   404	}
   405	
   406	func boothW6(in uint) (int, int) {
   407		var s uint = ^((in >> 6) - 1)
   408		var d uint = (1 << 7) - in - 1
   409		d = (d & s) | (in & (^s))
   410		d = (d >> 1) + (d & 1)
   411		return int(d), int(s & 1)
   412	}
   413	
   414	func initTable() {
   415		p256Precomputed = new([43][32 * 8]uint64)
   416	
   417		basePoint := []uint64{
   418			0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
   419			0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
   420			0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
   421		}
   422		t1 := make([]uint64, 12)
   423		t2 := make([]uint64, 12)
   424		copy(t2, basePoint)
   425	
   426		zInv := make([]uint64, 4)
   427		zInvSq := make([]uint64, 4)
   428		for j := 0; j < 32; j++ {
   429			copy(t1, t2)
   430			for i := 0; i < 43; i++ {
   431				// The window size is 6 so we need to double 6 times.
   432				if i != 0 {
   433					for k := 0; k < 6; k++ {
   434						p256PointDoubleAsm(t1, t1)
   435					}
   436				}
   437				// Convert the point to affine form. (Its values are
   438				// still in Montgomery form however.)
   439				p256Inverse(zInv, t1[8:12])
   440				p256Sqr(zInvSq, zInv, 1)
   441				p256Mul(zInv, zInv, zInvSq)
   442	
   443				p256Mul(t1[:4], t1[:4], zInvSq)
   444				p256Mul(t1[4:8], t1[4:8], zInv)
   445	
   446				copy(t1[8:12], basePoint[8:12])
   447				// Update the table entry
   448				copy(p256Precomputed[i][j*8:], t1[:8])
   449			}
   450			if j == 0 {
   451				p256PointDoubleAsm(t2, basePoint)
   452			} else {
   453				p256PointAddAsm(t2, t2, basePoint)
   454			}
   455		}
   456	}
   457	
   458	func (p *p256Point) p256BaseMult(scalar []uint64) {
   459		precomputeOnce.Do(initTable)
   460	
   461		wvalue := (scalar[0] << 1) & 0x7f
   462		sel, sign := boothW6(uint(wvalue))
   463		p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
   464		p256NegCond(p.xyz[4:8], sign)
   465	
   466		// (This is one, in the Montgomery domain.)
   467		p.xyz[8] = 0x0000000000000001
   468		p.xyz[9] = 0xffffffff00000000
   469		p.xyz[10] = 0xffffffffffffffff
   470		p.xyz[11] = 0x00000000fffffffe
   471	
   472		var t0 p256Point
   473		// (This is one, in the Montgomery domain.)
   474		t0.xyz[8] = 0x0000000000000001
   475		t0.xyz[9] = 0xffffffff00000000
   476		t0.xyz[10] = 0xffffffffffffffff
   477		t0.xyz[11] = 0x00000000fffffffe
   478	
   479		index := uint(5)
   480		zero := sel
   481	
   482		for i := 1; i < 43; i++ {
   483			if index < 192 {
   484				wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
   485			} else {
   486				wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
   487			}
   488			index += 6
   489			sel, sign = boothW6(uint(wvalue))
   490			p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
   491			p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
   492			zero |= sel
   493		}
   494	}
   495	
   496	func (p *p256Point) p256ScalarMult(scalar []uint64) {
   497		// precomp is a table of precomputed points that stores powers of p
   498		// from p^1 to p^16.
   499		var precomp [16 * 4 * 3]uint64
   500		var t0, t1, t2, t3 p256Point
   501	
   502		// Prepare the table
   503		p.p256StorePoint(&precomp, 0) // 1
   504	
   505		p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
   506		p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
   507		p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
   508		p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
   509		t0.p256StorePoint(&precomp, 1)  // 2
   510		t1.p256StorePoint(&precomp, 3)  // 4
   511		t2.p256StorePoint(&precomp, 7)  // 8
   512		t3.p256StorePoint(&precomp, 15) // 16
   513	
   514		p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   515		p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   516		p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   517		t0.p256StorePoint(&precomp, 2) // 3
   518		t1.p256StorePoint(&precomp, 4) // 5
   519		t2.p256StorePoint(&precomp, 8) // 9
   520	
   521		p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   522		p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
   523		t0.p256StorePoint(&precomp, 5) // 6
   524		t1.p256StorePoint(&precomp, 9) // 10
   525	
   526		p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
   527		p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   528		t2.p256StorePoint(&precomp, 6)  // 7
   529		t1.p256StorePoint(&precomp, 10) // 11
   530	
   531		p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   532		p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
   533		t0.p256StorePoint(&precomp, 11) // 12
   534		t2.p256StorePoint(&precomp, 13) // 14
   535	
   536		p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   537		p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   538		t0.p256StorePoint(&precomp, 12) // 13
   539		t2.p256StorePoint(&precomp, 14) // 15
   540	
   541		// Start scanning the window from top bit
   542		index := uint(254)
   543		var sel, sign int
   544	
   545		wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   546		sel, _ = boothW5(uint(wvalue))
   547	
   548		p256Select(p.xyz[0:12], precomp[0:], sel)
   549		zero := sel
   550	
   551		for index > 4 {
   552			index -= 5
   553			p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   554			p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   555			p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   556			p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   557			p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   558	
   559			if index < 192 {
   560				wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   561			} else {
   562				wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   563			}
   564	
   565			sel, sign = boothW5(uint(wvalue))
   566	
   567			p256Select(t0.xyz[0:], precomp[0:], sel)
   568			p256NegCond(t0.xyz[4:8], sign)
   569			p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   570			p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   571			p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   572			zero |= sel
   573		}
   574	
   575		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   576		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   577		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   578		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   579		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   580	
   581		wvalue = (scalar[0] << 1) & 0x3f
   582		sel, sign = boothW5(uint(wvalue))
   583	
   584		p256Select(t0.xyz[0:], precomp[0:], sel)
   585		p256NegCond(t0.xyz[4:8], sign)
   586		p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   587		p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   588		p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   589	}
   590	

View as plain text