...

Source file src/vendor/golang.org/x/crypto/curve25519/curve25519.go

     1	// Copyright 2013 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// We have an implementation in amd64 assembly so this code is only run on
     6	// non-amd64 platforms. The amd64 assembly does not support gccgo.
     7	// +build !amd64 gccgo appengine
     8	
     9	package curve25519
    10	
    11	import (
    12		"encoding/binary"
    13	)
    14	
    15	// This code is a port of the public domain, "ref10" implementation of
    16	// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
    17	
    18	// fieldElement represents an element of the field GF(2^255 - 19). An element
    19	// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
    20	// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
    21	// context.
    22	type fieldElement [10]int32
    23	
    24	func feZero(fe *fieldElement) {
    25		for i := range fe {
    26			fe[i] = 0
    27		}
    28	}
    29	
    30	func feOne(fe *fieldElement) {
    31		feZero(fe)
    32		fe[0] = 1
    33	}
    34	
    35	func feAdd(dst, a, b *fieldElement) {
    36		for i := range dst {
    37			dst[i] = a[i] + b[i]
    38		}
    39	}
    40	
    41	func feSub(dst, a, b *fieldElement) {
    42		for i := range dst {
    43			dst[i] = a[i] - b[i]
    44		}
    45	}
    46	
    47	func feCopy(dst, src *fieldElement) {
    48		for i := range dst {
    49			dst[i] = src[i]
    50		}
    51	}
    52	
    53	// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
    54	//
    55	// Preconditions: b in {0,1}.
    56	func feCSwap(f, g *fieldElement, b int32) {
    57		b = -b
    58		for i := range f {
    59			t := b & (f[i] ^ g[i])
    60			f[i] ^= t
    61			g[i] ^= t
    62		}
    63	}
    64	
    65	// load3 reads a 24-bit, little-endian value from in.
    66	func load3(in []byte) int64 {
    67		var r int64
    68		r = int64(in[0])
    69		r |= int64(in[1]) << 8
    70		r |= int64(in[2]) << 16
    71		return r
    72	}
    73	
    74	// load4 reads a 32-bit, little-endian value from in.
    75	func load4(in []byte) int64 {
    76		return int64(binary.LittleEndian.Uint32(in))
    77	}
    78	
    79	func feFromBytes(dst *fieldElement, src *[32]byte) {
    80		h0 := load4(src[:])
    81		h1 := load3(src[4:]) << 6
    82		h2 := load3(src[7:]) << 5
    83		h3 := load3(src[10:]) << 3
    84		h4 := load3(src[13:]) << 2
    85		h5 := load4(src[16:])
    86		h6 := load3(src[20:]) << 7
    87		h7 := load3(src[23:]) << 5
    88		h8 := load3(src[26:]) << 4
    89		h9 := (load3(src[29:]) & 0x7fffff) << 2
    90	
    91		var carry [10]int64
    92		carry[9] = (h9 + 1<<24) >> 25
    93		h0 += carry[9] * 19
    94		h9 -= carry[9] << 25
    95		carry[1] = (h1 + 1<<24) >> 25
    96		h2 += carry[1]
    97		h1 -= carry[1] << 25
    98		carry[3] = (h3 + 1<<24) >> 25
    99		h4 += carry[3]
   100		h3 -= carry[3] << 25
   101		carry[5] = (h5 + 1<<24) >> 25
   102		h6 += carry[5]
   103		h5 -= carry[5] << 25
   104		carry[7] = (h7 + 1<<24) >> 25
   105		h8 += carry[7]
   106		h7 -= carry[7] << 25
   107	
   108		carry[0] = (h0 + 1<<25) >> 26
   109		h1 += carry[0]
   110		h0 -= carry[0] << 26
   111		carry[2] = (h2 + 1<<25) >> 26
   112		h3 += carry[2]
   113		h2 -= carry[2] << 26
   114		carry[4] = (h4 + 1<<25) >> 26
   115		h5 += carry[4]
   116		h4 -= carry[4] << 26
   117		carry[6] = (h6 + 1<<25) >> 26
   118		h7 += carry[6]
   119		h6 -= carry[6] << 26
   120		carry[8] = (h8 + 1<<25) >> 26
   121		h9 += carry[8]
   122		h8 -= carry[8] << 26
   123	
   124		dst[0] = int32(h0)
   125		dst[1] = int32(h1)
   126		dst[2] = int32(h2)
   127		dst[3] = int32(h3)
   128		dst[4] = int32(h4)
   129		dst[5] = int32(h5)
   130		dst[6] = int32(h6)
   131		dst[7] = int32(h7)
   132		dst[8] = int32(h8)
   133		dst[9] = int32(h9)
   134	}
   135	
   136	// feToBytes marshals h to s.
   137	// Preconditions:
   138	//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   139	//
   140	// Write p=2^255-19; q=floor(h/p).
   141	// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
   142	//
   143	// Proof:
   144	//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
   145	//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
   146	//
   147	//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
   148	//   Then 0<y<1.
   149	//
   150	//   Write r=h-pq.
   151	//   Have 0<=r<=p-1=2^255-20.
   152	//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
   153	//
   154	//   Write x=r+19(2^-255)r+y.
   155	//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
   156	//
   157	//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
   158	//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
   159	func feToBytes(s *[32]byte, h *fieldElement) {
   160		var carry [10]int32
   161	
   162		q := (19*h[9] + (1 << 24)) >> 25
   163		q = (h[0] + q) >> 26
   164		q = (h[1] + q) >> 25
   165		q = (h[2] + q) >> 26
   166		q = (h[3] + q) >> 25
   167		q = (h[4] + q) >> 26
   168		q = (h[5] + q) >> 25
   169		q = (h[6] + q) >> 26
   170		q = (h[7] + q) >> 25
   171		q = (h[8] + q) >> 26
   172		q = (h[9] + q) >> 25
   173	
   174		// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
   175		h[0] += 19 * q
   176		// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
   177	
   178		carry[0] = h[0] >> 26
   179		h[1] += carry[0]
   180		h[0] -= carry[0] << 26
   181		carry[1] = h[1] >> 25
   182		h[2] += carry[1]
   183		h[1] -= carry[1] << 25
   184		carry[2] = h[2] >> 26
   185		h[3] += carry[2]
   186		h[2] -= carry[2] << 26
   187		carry[3] = h[3] >> 25
   188		h[4] += carry[3]
   189		h[3] -= carry[3] << 25
   190		carry[4] = h[4] >> 26
   191		h[5] += carry[4]
   192		h[4] -= carry[4] << 26
   193		carry[5] = h[5] >> 25
   194		h[6] += carry[5]
   195		h[5] -= carry[5] << 25
   196		carry[6] = h[6] >> 26
   197		h[7] += carry[6]
   198		h[6] -= carry[6] << 26
   199		carry[7] = h[7] >> 25
   200		h[8] += carry[7]
   201		h[7] -= carry[7] << 25
   202		carry[8] = h[8] >> 26
   203		h[9] += carry[8]
   204		h[8] -= carry[8] << 26
   205		carry[9] = h[9] >> 25
   206		h[9] -= carry[9] << 25
   207		// h10 = carry9
   208	
   209		// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
   210		// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
   211		// evidently 2^255 h10-2^255 q = 0.
   212		// Goal: Output h[0]+...+2^230 h[9].
   213	
   214		s[0] = byte(h[0] >> 0)
   215		s[1] = byte(h[0] >> 8)
   216		s[2] = byte(h[0] >> 16)
   217		s[3] = byte((h[0] >> 24) | (h[1] << 2))
   218		s[4] = byte(h[1] >> 6)
   219		s[5] = byte(h[1] >> 14)
   220		s[6] = byte((h[1] >> 22) | (h[2] << 3))
   221		s[7] = byte(h[2] >> 5)
   222		s[8] = byte(h[2] >> 13)
   223		s[9] = byte((h[2] >> 21) | (h[3] << 5))
   224		s[10] = byte(h[3] >> 3)
   225		s[11] = byte(h[3] >> 11)
   226		s[12] = byte((h[3] >> 19) | (h[4] << 6))
   227		s[13] = byte(h[4] >> 2)
   228		s[14] = byte(h[4] >> 10)
   229		s[15] = byte(h[4] >> 18)
   230		s[16] = byte(h[5] >> 0)
   231		s[17] = byte(h[5] >> 8)
   232		s[18] = byte(h[5] >> 16)
   233		s[19] = byte((h[5] >> 24) | (h[6] << 1))
   234		s[20] = byte(h[6] >> 7)
   235		s[21] = byte(h[6] >> 15)
   236		s[22] = byte((h[6] >> 23) | (h[7] << 3))
   237		s[23] = byte(h[7] >> 5)
   238		s[24] = byte(h[7] >> 13)
   239		s[25] = byte((h[7] >> 21) | (h[8] << 4))
   240		s[26] = byte(h[8] >> 4)
   241		s[27] = byte(h[8] >> 12)
   242		s[28] = byte((h[8] >> 20) | (h[9] << 6))
   243		s[29] = byte(h[9] >> 2)
   244		s[30] = byte(h[9] >> 10)
   245		s[31] = byte(h[9] >> 18)
   246	}
   247	
   248	// feMul calculates h = f * g
   249	// Can overlap h with f or g.
   250	//
   251	// Preconditions:
   252	//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   253	//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   254	//
   255	// Postconditions:
   256	//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   257	//
   258	// Notes on implementation strategy:
   259	//
   260	// Using schoolbook multiplication.
   261	// Karatsuba would save a little in some cost models.
   262	//
   263	// Most multiplications by 2 and 19 are 32-bit precomputations;
   264	// cheaper than 64-bit postcomputations.
   265	//
   266	// There is one remaining multiplication by 19 in the carry chain;
   267	// one *19 precomputation can be merged into this,
   268	// but the resulting data flow is considerably less clean.
   269	//
   270	// There are 12 carries below.
   271	// 10 of them are 2-way parallelizable and vectorizable.
   272	// Can get away with 11 carries, but then data flow is much deeper.
   273	//
   274	// With tighter constraints on inputs can squeeze carries into int32.
   275	func feMul(h, f, g *fieldElement) {
   276		f0 := f[0]
   277		f1 := f[1]
   278		f2 := f[2]
   279		f3 := f[3]
   280		f4 := f[4]
   281		f5 := f[5]
   282		f6 := f[6]
   283		f7 := f[7]
   284		f8 := f[8]
   285		f9 := f[9]
   286		g0 := g[0]
   287		g1 := g[1]
   288		g2 := g[2]
   289		g3 := g[3]
   290		g4 := g[4]
   291		g5 := g[5]
   292		g6 := g[6]
   293		g7 := g[7]
   294		g8 := g[8]
   295		g9 := g[9]
   296		g1_19 := 19 * g1 // 1.4*2^29
   297		g2_19 := 19 * g2 // 1.4*2^30; still ok
   298		g3_19 := 19 * g3
   299		g4_19 := 19 * g4
   300		g5_19 := 19 * g5
   301		g6_19 := 19 * g6
   302		g7_19 := 19 * g7
   303		g8_19 := 19 * g8
   304		g9_19 := 19 * g9
   305		f1_2 := 2 * f1
   306		f3_2 := 2 * f3
   307		f5_2 := 2 * f5
   308		f7_2 := 2 * f7
   309		f9_2 := 2 * f9
   310		f0g0 := int64(f0) * int64(g0)
   311		f0g1 := int64(f0) * int64(g1)
   312		f0g2 := int64(f0) * int64(g2)
   313		f0g3 := int64(f0) * int64(g3)
   314		f0g4 := int64(f0) * int64(g4)
   315		f0g5 := int64(f0) * int64(g5)
   316		f0g6 := int64(f0) * int64(g6)
   317		f0g7 := int64(f0) * int64(g7)
   318		f0g8 := int64(f0) * int64(g8)
   319		f0g9 := int64(f0) * int64(g9)
   320		f1g0 := int64(f1) * int64(g0)
   321		f1g1_2 := int64(f1_2) * int64(g1)
   322		f1g2 := int64(f1) * int64(g2)
   323		f1g3_2 := int64(f1_2) * int64(g3)
   324		f1g4 := int64(f1) * int64(g4)
   325		f1g5_2 := int64(f1_2) * int64(g5)
   326		f1g6 := int64(f1) * int64(g6)
   327		f1g7_2 := int64(f1_2) * int64(g7)
   328		f1g8 := int64(f1) * int64(g8)
   329		f1g9_38 := int64(f1_2) * int64(g9_19)
   330		f2g0 := int64(f2) * int64(g0)
   331		f2g1 := int64(f2) * int64(g1)
   332		f2g2 := int64(f2) * int64(g2)
   333		f2g3 := int64(f2) * int64(g3)
   334		f2g4 := int64(f2) * int64(g4)
   335		f2g5 := int64(f2) * int64(g5)
   336		f2g6 := int64(f2) * int64(g6)
   337		f2g7 := int64(f2) * int64(g7)
   338		f2g8_19 := int64(f2) * int64(g8_19)
   339		f2g9_19 := int64(f2) * int64(g9_19)
   340		f3g0 := int64(f3) * int64(g0)
   341		f3g1_2 := int64(f3_2) * int64(g1)
   342		f3g2 := int64(f3) * int64(g2)
   343		f3g3_2 := int64(f3_2) * int64(g3)
   344		f3g4 := int64(f3) * int64(g4)
   345		f3g5_2 := int64(f3_2) * int64(g5)
   346		f3g6 := int64(f3) * int64(g6)
   347		f3g7_38 := int64(f3_2) * int64(g7_19)
   348		f3g8_19 := int64(f3) * int64(g8_19)
   349		f3g9_38 := int64(f3_2) * int64(g9_19)
   350		f4g0 := int64(f4) * int64(g0)
   351		f4g1 := int64(f4) * int64(g1)
   352		f4g2 := int64(f4) * int64(g2)
   353		f4g3 := int64(f4) * int64(g3)
   354		f4g4 := int64(f4) * int64(g4)
   355		f4g5 := int64(f4) * int64(g5)
   356		f4g6_19 := int64(f4) * int64(g6_19)
   357		f4g7_19 := int64(f4) * int64(g7_19)
   358		f4g8_19 := int64(f4) * int64(g8_19)
   359		f4g9_19 := int64(f4) * int64(g9_19)
   360		f5g0 := int64(f5) * int64(g0)
   361		f5g1_2 := int64(f5_2) * int64(g1)
   362		f5g2 := int64(f5) * int64(g2)
   363		f5g3_2 := int64(f5_2) * int64(g3)
   364		f5g4 := int64(f5) * int64(g4)
   365		f5g5_38 := int64(f5_2) * int64(g5_19)
   366		f5g6_19 := int64(f5) * int64(g6_19)
   367		f5g7_38 := int64(f5_2) * int64(g7_19)
   368		f5g8_19 := int64(f5) * int64(g8_19)
   369		f5g9_38 := int64(f5_2) * int64(g9_19)
   370		f6g0 := int64(f6) * int64(g0)
   371		f6g1 := int64(f6) * int64(g1)
   372		f6g2 := int64(f6) * int64(g2)
   373		f6g3 := int64(f6) * int64(g3)
   374		f6g4_19 := int64(f6) * int64(g4_19)
   375		f6g5_19 := int64(f6) * int64(g5_19)
   376		f6g6_19 := int64(f6) * int64(g6_19)
   377		f6g7_19 := int64(f6) * int64(g7_19)
   378		f6g8_19 := int64(f6) * int64(g8_19)
   379		f6g9_19 := int64(f6) * int64(g9_19)
   380		f7g0 := int64(f7) * int64(g0)
   381		f7g1_2 := int64(f7_2) * int64(g1)
   382		f7g2 := int64(f7) * int64(g2)
   383		f7g3_38 := int64(f7_2) * int64(g3_19)
   384		f7g4_19 := int64(f7) * int64(g4_19)
   385		f7g5_38 := int64(f7_2) * int64(g5_19)
   386		f7g6_19 := int64(f7) * int64(g6_19)
   387		f7g7_38 := int64(f7_2) * int64(g7_19)
   388		f7g8_19 := int64(f7) * int64(g8_19)
   389		f7g9_38 := int64(f7_2) * int64(g9_19)
   390		f8g0 := int64(f8) * int64(g0)
   391		f8g1 := int64(f8) * int64(g1)
   392		f8g2_19 := int64(f8) * int64(g2_19)
   393		f8g3_19 := int64(f8) * int64(g3_19)
   394		f8g4_19 := int64(f8) * int64(g4_19)
   395		f8g5_19 := int64(f8) * int64(g5_19)
   396		f8g6_19 := int64(f8) * int64(g6_19)
   397		f8g7_19 := int64(f8) * int64(g7_19)
   398		f8g8_19 := int64(f8) * int64(g8_19)
   399		f8g9_19 := int64(f8) * int64(g9_19)
   400		f9g0 := int64(f9) * int64(g0)
   401		f9g1_38 := int64(f9_2) * int64(g1_19)
   402		f9g2_19 := int64(f9) * int64(g2_19)
   403		f9g3_38 := int64(f9_2) * int64(g3_19)
   404		f9g4_19 := int64(f9) * int64(g4_19)
   405		f9g5_38 := int64(f9_2) * int64(g5_19)
   406		f9g6_19 := int64(f9) * int64(g6_19)
   407		f9g7_38 := int64(f9_2) * int64(g7_19)
   408		f9g8_19 := int64(f9) * int64(g8_19)
   409		f9g9_38 := int64(f9_2) * int64(g9_19)
   410		h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
   411		h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
   412		h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
   413		h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
   414		h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
   415		h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
   416		h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
   417		h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
   418		h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
   419		h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
   420		var carry [10]int64
   421	
   422		// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
   423		//   i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
   424		// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
   425		//   i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
   426	
   427		carry[0] = (h0 + (1 << 25)) >> 26
   428		h1 += carry[0]
   429		h0 -= carry[0] << 26
   430		carry[4] = (h4 + (1 << 25)) >> 26
   431		h5 += carry[4]
   432		h4 -= carry[4] << 26
   433		// |h0| <= 2^25
   434		// |h4| <= 2^25
   435		// |h1| <= 1.51*2^58
   436		// |h5| <= 1.51*2^58
   437	
   438		carry[1] = (h1 + (1 << 24)) >> 25
   439		h2 += carry[1]
   440		h1 -= carry[1] << 25
   441		carry[5] = (h5 + (1 << 24)) >> 25
   442		h6 += carry[5]
   443		h5 -= carry[5] << 25
   444		// |h1| <= 2^24; from now on fits into int32
   445		// |h5| <= 2^24; from now on fits into int32
   446		// |h2| <= 1.21*2^59
   447		// |h6| <= 1.21*2^59
   448	
   449		carry[2] = (h2 + (1 << 25)) >> 26
   450		h3 += carry[2]
   451		h2 -= carry[2] << 26
   452		carry[6] = (h6 + (1 << 25)) >> 26
   453		h7 += carry[6]
   454		h6 -= carry[6] << 26
   455		// |h2| <= 2^25; from now on fits into int32 unchanged
   456		// |h6| <= 2^25; from now on fits into int32 unchanged
   457		// |h3| <= 1.51*2^58
   458		// |h7| <= 1.51*2^58
   459	
   460		carry[3] = (h3 + (1 << 24)) >> 25
   461		h4 += carry[3]
   462		h3 -= carry[3] << 25
   463		carry[7] = (h7 + (1 << 24)) >> 25
   464		h8 += carry[7]
   465		h7 -= carry[7] << 25
   466		// |h3| <= 2^24; from now on fits into int32 unchanged
   467		// |h7| <= 2^24; from now on fits into int32 unchanged
   468		// |h4| <= 1.52*2^33
   469		// |h8| <= 1.52*2^33
   470	
   471		carry[4] = (h4 + (1 << 25)) >> 26
   472		h5 += carry[4]
   473		h4 -= carry[4] << 26
   474		carry[8] = (h8 + (1 << 25)) >> 26
   475		h9 += carry[8]
   476		h8 -= carry[8] << 26
   477		// |h4| <= 2^25; from now on fits into int32 unchanged
   478		// |h8| <= 2^25; from now on fits into int32 unchanged
   479		// |h5| <= 1.01*2^24
   480		// |h9| <= 1.51*2^58
   481	
   482		carry[9] = (h9 + (1 << 24)) >> 25
   483		h0 += carry[9] * 19
   484		h9 -= carry[9] << 25
   485		// |h9| <= 2^24; from now on fits into int32 unchanged
   486		// |h0| <= 1.8*2^37
   487	
   488		carry[0] = (h0 + (1 << 25)) >> 26
   489		h1 += carry[0]
   490		h0 -= carry[0] << 26
   491		// |h0| <= 2^25; from now on fits into int32 unchanged
   492		// |h1| <= 1.01*2^24
   493	
   494		h[0] = int32(h0)
   495		h[1] = int32(h1)
   496		h[2] = int32(h2)
   497		h[3] = int32(h3)
   498		h[4] = int32(h4)
   499		h[5] = int32(h5)
   500		h[6] = int32(h6)
   501		h[7] = int32(h7)
   502		h[8] = int32(h8)
   503		h[9] = int32(h9)
   504	}
   505	
   506	// feSquare calculates h = f*f. Can overlap h with f.
   507	//
   508	// Preconditions:
   509	//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   510	//
   511	// Postconditions:
   512	//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   513	func feSquare(h, f *fieldElement) {
   514		f0 := f[0]
   515		f1 := f[1]
   516		f2 := f[2]
   517		f3 := f[3]
   518		f4 := f[4]
   519		f5 := f[5]
   520		f6 := f[6]
   521		f7 := f[7]
   522		f8 := f[8]
   523		f9 := f[9]
   524		f0_2 := 2 * f0
   525		f1_2 := 2 * f1
   526		f2_2 := 2 * f2
   527		f3_2 := 2 * f3
   528		f4_2 := 2 * f4
   529		f5_2 := 2 * f5
   530		f6_2 := 2 * f6
   531		f7_2 := 2 * f7
   532		f5_38 := 38 * f5 // 1.31*2^30
   533		f6_19 := 19 * f6 // 1.31*2^30
   534		f7_38 := 38 * f7 // 1.31*2^30
   535		f8_19 := 19 * f8 // 1.31*2^30
   536		f9_38 := 38 * f9 // 1.31*2^30
   537		f0f0 := int64(f0) * int64(f0)
   538		f0f1_2 := int64(f0_2) * int64(f1)
   539		f0f2_2 := int64(f0_2) * int64(f2)
   540		f0f3_2 := int64(f0_2) * int64(f3)
   541		f0f4_2 := int64(f0_2) * int64(f4)
   542		f0f5_2 := int64(f0_2) * int64(f5)
   543		f0f6_2 := int64(f0_2) * int64(f6)
   544		f0f7_2 := int64(f0_2) * int64(f7)
   545		f0f8_2 := int64(f0_2) * int64(f8)
   546		f0f9_2 := int64(f0_2) * int64(f9)
   547		f1f1_2 := int64(f1_2) * int64(f1)
   548		f1f2_2 := int64(f1_2) * int64(f2)
   549		f1f3_4 := int64(f1_2) * int64(f3_2)
   550		f1f4_2 := int64(f1_2) * int64(f4)
   551		f1f5_4 := int64(f1_2) * int64(f5_2)
   552		f1f6_2 := int64(f1_2) * int64(f6)
   553		f1f7_4 := int64(f1_2) * int64(f7_2)
   554		f1f8_2 := int64(f1_2) * int64(f8)
   555		f1f9_76 := int64(f1_2) * int64(f9_38)
   556		f2f2 := int64(f2) * int64(f2)
   557		f2f3_2 := int64(f2_2) * int64(f3)
   558		f2f4_2 := int64(f2_2) * int64(f4)
   559		f2f5_2 := int64(f2_2) * int64(f5)
   560		f2f6_2 := int64(f2_2) * int64(f6)
   561		f2f7_2 := int64(f2_2) * int64(f7)
   562		f2f8_38 := int64(f2_2) * int64(f8_19)
   563		f2f9_38 := int64(f2) * int64(f9_38)
   564		f3f3_2 := int64(f3_2) * int64(f3)
   565		f3f4_2 := int64(f3_2) * int64(f4)
   566		f3f5_4 := int64(f3_2) * int64(f5_2)
   567		f3f6_2 := int64(f3_2) * int64(f6)
   568		f3f7_76 := int64(f3_2) * int64(f7_38)
   569		f3f8_38 := int64(f3_2) * int64(f8_19)
   570		f3f9_76 := int64(f3_2) * int64(f9_38)
   571		f4f4 := int64(f4) * int64(f4)
   572		f4f5_2 := int64(f4_2) * int64(f5)
   573		f4f6_38 := int64(f4_2) * int64(f6_19)
   574		f4f7_38 := int64(f4) * int64(f7_38)
   575		f4f8_38 := int64(f4_2) * int64(f8_19)
   576		f4f9_38 := int64(f4) * int64(f9_38)
   577		f5f5_38 := int64(f5) * int64(f5_38)
   578		f5f6_38 := int64(f5_2) * int64(f6_19)
   579		f5f7_76 := int64(f5_2) * int64(f7_38)
   580		f5f8_38 := int64(f5_2) * int64(f8_19)
   581		f5f9_76 := int64(f5_2) * int64(f9_38)
   582		f6f6_19 := int64(f6) * int64(f6_19)
   583		f6f7_38 := int64(f6) * int64(f7_38)
   584		f6f8_38 := int64(f6_2) * int64(f8_19)
   585		f6f9_38 := int64(f6) * int64(f9_38)
   586		f7f7_38 := int64(f7) * int64(f7_38)
   587		f7f8_38 := int64(f7_2) * int64(f8_19)
   588		f7f9_76 := int64(f7_2) * int64(f9_38)
   589		f8f8_19 := int64(f8) * int64(f8_19)
   590		f8f9_38 := int64(f8) * int64(f9_38)
   591		f9f9_38 := int64(f9) * int64(f9_38)
   592		h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
   593		h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
   594		h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
   595		h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
   596		h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
   597		h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
   598		h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
   599		h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
   600		h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
   601		h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
   602		var carry [10]int64
   603	
   604		carry[0] = (h0 + (1 << 25)) >> 26
   605		h1 += carry[0]
   606		h0 -= carry[0] << 26
   607		carry[4] = (h4 + (1 << 25)) >> 26
   608		h5 += carry[4]
   609		h4 -= carry[4] << 26
   610	
   611		carry[1] = (h1 + (1 << 24)) >> 25
   612		h2 += carry[1]
   613		h1 -= carry[1] << 25
   614		carry[5] = (h5 + (1 << 24)) >> 25
   615		h6 += carry[5]
   616		h5 -= carry[5] << 25
   617	
   618		carry[2] = (h2 + (1 << 25)) >> 26
   619		h3 += carry[2]
   620		h2 -= carry[2] << 26
   621		carry[6] = (h6 + (1 << 25)) >> 26
   622		h7 += carry[6]
   623		h6 -= carry[6] << 26
   624	
   625		carry[3] = (h3 + (1 << 24)) >> 25
   626		h4 += carry[3]
   627		h3 -= carry[3] << 25
   628		carry[7] = (h7 + (1 << 24)) >> 25
   629		h8 += carry[7]
   630		h7 -= carry[7] << 25
   631	
   632		carry[4] = (h4 + (1 << 25)) >> 26
   633		h5 += carry[4]
   634		h4 -= carry[4] << 26
   635		carry[8] = (h8 + (1 << 25)) >> 26
   636		h9 += carry[8]
   637		h8 -= carry[8] << 26
   638	
   639		carry[9] = (h9 + (1 << 24)) >> 25
   640		h0 += carry[9] * 19
   641		h9 -= carry[9] << 25
   642	
   643		carry[0] = (h0 + (1 << 25)) >> 26
   644		h1 += carry[0]
   645		h0 -= carry[0] << 26
   646	
   647		h[0] = int32(h0)
   648		h[1] = int32(h1)
   649		h[2] = int32(h2)
   650		h[3] = int32(h3)
   651		h[4] = int32(h4)
   652		h[5] = int32(h5)
   653		h[6] = int32(h6)
   654		h[7] = int32(h7)
   655		h[8] = int32(h8)
   656		h[9] = int32(h9)
   657	}
   658	
   659	// feMul121666 calculates h = f * 121666. Can overlap h with f.
   660	//
   661	// Preconditions:
   662	//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
   663	//
   664	// Postconditions:
   665	//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
   666	func feMul121666(h, f *fieldElement) {
   667		h0 := int64(f[0]) * 121666
   668		h1 := int64(f[1]) * 121666
   669		h2 := int64(f[2]) * 121666
   670		h3 := int64(f[3]) * 121666
   671		h4 := int64(f[4]) * 121666
   672		h5 := int64(f[5]) * 121666
   673		h6 := int64(f[6]) * 121666
   674		h7 := int64(f[7]) * 121666
   675		h8 := int64(f[8]) * 121666
   676		h9 := int64(f[9]) * 121666
   677		var carry [10]int64
   678	
   679		carry[9] = (h9 + (1 << 24)) >> 25
   680		h0 += carry[9] * 19
   681		h9 -= carry[9] << 25
   682		carry[1] = (h1 + (1 << 24)) >> 25
   683		h2 += carry[1]
   684		h1 -= carry[1] << 25
   685		carry[3] = (h3 + (1 << 24)) >> 25
   686		h4 += carry[3]
   687		h3 -= carry[3] << 25
   688		carry[5] = (h5 + (1 << 24)) >> 25
   689		h6 += carry[5]
   690		h5 -= carry[5] << 25
   691		carry[7] = (h7 + (1 << 24)) >> 25
   692		h8 += carry[7]
   693		h7 -= carry[7] << 25
   694	
   695		carry[0] = (h0 + (1 << 25)) >> 26
   696		h1 += carry[0]
   697		h0 -= carry[0] << 26
   698		carry[2] = (h2 + (1 << 25)) >> 26
   699		h3 += carry[2]
   700		h2 -= carry[2] << 26
   701		carry[4] = (h4 + (1 << 25)) >> 26
   702		h5 += carry[4]
   703		h4 -= carry[4] << 26
   704		carry[6] = (h6 + (1 << 25)) >> 26
   705		h7 += carry[6]
   706		h6 -= carry[6] << 26
   707		carry[8] = (h8 + (1 << 25)) >> 26
   708		h9 += carry[8]
   709		h8 -= carry[8] << 26
   710	
   711		h[0] = int32(h0)
   712		h[1] = int32(h1)
   713		h[2] = int32(h2)
   714		h[3] = int32(h3)
   715		h[4] = int32(h4)
   716		h[5] = int32(h5)
   717		h[6] = int32(h6)
   718		h[7] = int32(h7)
   719		h[8] = int32(h8)
   720		h[9] = int32(h9)
   721	}
   722	
   723	// feInvert sets out = z^-1.
   724	func feInvert(out, z *fieldElement) {
   725		var t0, t1, t2, t3 fieldElement
   726		var i int
   727	
   728		feSquare(&t0, z)
   729		for i = 1; i < 1; i++ {
   730			feSquare(&t0, &t0)
   731		}
   732		feSquare(&t1, &t0)
   733		for i = 1; i < 2; i++ {
   734			feSquare(&t1, &t1)
   735		}
   736		feMul(&t1, z, &t1)
   737		feMul(&t0, &t0, &t1)
   738		feSquare(&t2, &t0)
   739		for i = 1; i < 1; i++ {
   740			feSquare(&t2, &t2)
   741		}
   742		feMul(&t1, &t1, &t2)
   743		feSquare(&t2, &t1)
   744		for i = 1; i < 5; i++ {
   745			feSquare(&t2, &t2)
   746		}
   747		feMul(&t1, &t2, &t1)
   748		feSquare(&t2, &t1)
   749		for i = 1; i < 10; i++ {
   750			feSquare(&t2, &t2)
   751		}
   752		feMul(&t2, &t2, &t1)
   753		feSquare(&t3, &t2)
   754		for i = 1; i < 20; i++ {
   755			feSquare(&t3, &t3)
   756		}
   757		feMul(&t2, &t3, &t2)
   758		feSquare(&t2, &t2)
   759		for i = 1; i < 10; i++ {
   760			feSquare(&t2, &t2)
   761		}
   762		feMul(&t1, &t2, &t1)
   763		feSquare(&t2, &t1)
   764		for i = 1; i < 50; i++ {
   765			feSquare(&t2, &t2)
   766		}
   767		feMul(&t2, &t2, &t1)
   768		feSquare(&t3, &t2)
   769		for i = 1; i < 100; i++ {
   770			feSquare(&t3, &t3)
   771		}
   772		feMul(&t2, &t3, &t2)
   773		feSquare(&t2, &t2)
   774		for i = 1; i < 50; i++ {
   775			feSquare(&t2, &t2)
   776		}
   777		feMul(&t1, &t2, &t1)
   778		feSquare(&t1, &t1)
   779		for i = 1; i < 5; i++ {
   780			feSquare(&t1, &t1)
   781		}
   782		feMul(out, &t1, &t0)
   783	}
   784	
   785	func scalarMult(out, in, base *[32]byte) {
   786		var e [32]byte
   787	
   788		copy(e[:], in[:])
   789		e[0] &= 248
   790		e[31] &= 127
   791		e[31] |= 64
   792	
   793		var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
   794		feFromBytes(&x1, base)
   795		feOne(&x2)
   796		feCopy(&x3, &x1)
   797		feOne(&z3)
   798	
   799		swap := int32(0)
   800		for pos := 254; pos >= 0; pos-- {
   801			b := e[pos/8] >> uint(pos&7)
   802			b &= 1
   803			swap ^= int32(b)
   804			feCSwap(&x2, &x3, swap)
   805			feCSwap(&z2, &z3, swap)
   806			swap = int32(b)
   807	
   808			feSub(&tmp0, &x3, &z3)
   809			feSub(&tmp1, &x2, &z2)
   810			feAdd(&x2, &x2, &z2)
   811			feAdd(&z2, &x3, &z3)
   812			feMul(&z3, &tmp0, &x2)
   813			feMul(&z2, &z2, &tmp1)
   814			feSquare(&tmp0, &tmp1)
   815			feSquare(&tmp1, &x2)
   816			feAdd(&x3, &z3, &z2)
   817			feSub(&z2, &z3, &z2)
   818			feMul(&x2, &tmp1, &tmp0)
   819			feSub(&tmp1, &tmp1, &tmp0)
   820			feSquare(&z2, &z2)
   821			feMul121666(&z3, &tmp1)
   822			feSquare(&x3, &x3)
   823			feAdd(&tmp0, &tmp0, &z3)
   824			feMul(&z3, &x1, &z2)
   825			feMul(&z2, &tmp1, &tmp0)
   826		}
   827	
   828		feCSwap(&x2, &x3, swap)
   829		feCSwap(&z2, &z3, swap)
   830	
   831		feInvert(&z2, &z2)
   832		feMul(&x2, &x2, &z2)
   833		feToBytes(out, &x2)
   834	}
   835	

View as plain text