...

Text file src/crypto/sha1/sha1block_amd64p32.s

     1	// Copyright 2013 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "textflag.h"
     6	
     7	// SHA-1 block routine. See sha1block.go for Go equivalent.
     8	//
     9	// There are 80 rounds of 4 types:
    10	//   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    11	//   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    12	//   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    13	//   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    14	//   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    15	//
    16	// Each round loads or shuffles the data, then computes a per-round
    17	// function of b, c, d, and then mixes the result into and rotates the
    18	// five registers a, b, c, d, e holding the intermediate results.
    19	//
    20	// The register rotation is implemented by rotating the arguments to
    21	// the round macros instead of by explicit move instructions.
    22	//
    23	// amd64p32 version.
    24	// To ensure safety for Native Client, avoids use of BP and R15
    25	// as well as two-register addressing modes.
    26	
    27	#define LOAD(index) \
    28		MOVL	(index*4)(SI), R10; \
    29		BSWAPL	R10; \
    30		MOVL	R10, (index*4)(SP)
    31	
    32	#define SHUFFLE(index) \
    33		MOVL	(((index)&0xf)*4)(SP), R10; \
    34		XORL	(((index-3)&0xf)*4)(SP), R10; \
    35		XORL	(((index-8)&0xf)*4)(SP), R10; \
    36		XORL	(((index-14)&0xf)*4)(SP), R10; \
    37		ROLL	$1, R10; \
    38		MOVL	R10, (((index)&0xf)*4)(SP)
    39	
    40	#define FUNC1(a, b, c, d, e) \
    41		MOVL	d, R9; \
    42		XORL	c, R9; \
    43		ANDL	b, R9; \
    44		XORL	d, R9
    45	
    46	#define FUNC2(a, b, c, d, e) \
    47		MOVL	b, R9; \
    48		XORL	c, R9; \
    49		XORL	d, R9
    50	
    51	#define FUNC3(a, b, c, d, e) \
    52		MOVL	b, R8; \
    53		ORL	c, R8; \
    54		ANDL	d, R8; \
    55		MOVL	b, R9; \
    56		ANDL	c, R9; \
    57		ORL	R8, R9
    58	
    59	#define FUNC4 FUNC2
    60	
    61	#define MIX(a, b, c, d, e, const) \
    62		ROLL	$30, b; \
    63		ADDL	R9, e; \
    64		MOVL	a, R8; \
    65		ROLL	$5, R8; \
    66		LEAL	const(e)(R10*1), e; \
    67		ADDL	R8, e
    68	
    69	#define ROUND1(a, b, c, d, e, index) \
    70		LOAD(index); \
    71		FUNC1(a, b, c, d, e); \
    72		MIX(a, b, c, d, e, 0x5A827999)
    73	
    74	#define ROUND1x(a, b, c, d, e, index) \
    75		SHUFFLE(index); \
    76		FUNC1(a, b, c, d, e); \
    77		MIX(a, b, c, d, e, 0x5A827999)
    78	
    79	#define ROUND2(a, b, c, d, e, index) \
    80		SHUFFLE(index); \
    81		FUNC2(a, b, c, d, e); \
    82		MIX(a, b, c, d, e, 0x6ED9EBA1)
    83	
    84	#define ROUND3(a, b, c, d, e, index) \
    85		SHUFFLE(index); \
    86		FUNC3(a, b, c, d, e); \
    87		MIX(a, b, c, d, e, 0x8F1BBCDC)
    88	
    89	#define ROUND4(a, b, c, d, e, index) \
    90		SHUFFLE(index); \
    91		FUNC4(a, b, c, d, e); \
    92		MIX(a, b, c, d, e, 0xCA62C1D6)
    93	
    94	TEXT ·block(SB),NOSPLIT,$64-16
    95		MOVL	dig+0(FP),	R14
    96		MOVL	p_base+4(FP),	SI
    97		MOVL	p_len+8(FP),	DX
    98		SHRQ	$6,		DX
    99		SHLQ	$6,		DX
   100	
   101		LEAQ	(SI)(DX*1),	DI
   102		MOVL	(0*4)(R14),	AX
   103		MOVL	(1*4)(R14),	BX
   104		MOVL	(2*4)(R14),	CX
   105		MOVL	(3*4)(R14),	DX
   106		MOVL	(4*4)(R14),	R13
   107	
   108		CMPQ	SI,		DI
   109		JEQ	end
   110	
   111	loop:
   112	#define BP R13 /* keep diff from sha1block_amd64.s small */
   113		ROUND1(AX, BX, CX, DX, BP, 0)
   114		ROUND1(BP, AX, BX, CX, DX, 1)
   115		ROUND1(DX, BP, AX, BX, CX, 2)
   116		ROUND1(CX, DX, BP, AX, BX, 3)
   117		ROUND1(BX, CX, DX, BP, AX, 4)
   118		ROUND1(AX, BX, CX, DX, BP, 5)
   119		ROUND1(BP, AX, BX, CX, DX, 6)
   120		ROUND1(DX, BP, AX, BX, CX, 7)
   121		ROUND1(CX, DX, BP, AX, BX, 8)
   122		ROUND1(BX, CX, DX, BP, AX, 9)
   123		ROUND1(AX, BX, CX, DX, BP, 10)
   124		ROUND1(BP, AX, BX, CX, DX, 11)
   125		ROUND1(DX, BP, AX, BX, CX, 12)
   126		ROUND1(CX, DX, BP, AX, BX, 13)
   127		ROUND1(BX, CX, DX, BP, AX, 14)
   128		ROUND1(AX, BX, CX, DX, BP, 15)
   129	
   130		ROUND1x(BP, AX, BX, CX, DX, 16)
   131		ROUND1x(DX, BP, AX, BX, CX, 17)
   132		ROUND1x(CX, DX, BP, AX, BX, 18)
   133		ROUND1x(BX, CX, DX, BP, AX, 19)
   134	
   135		ROUND2(AX, BX, CX, DX, BP, 20)
   136		ROUND2(BP, AX, BX, CX, DX, 21)
   137		ROUND2(DX, BP, AX, BX, CX, 22)
   138		ROUND2(CX, DX, BP, AX, BX, 23)
   139		ROUND2(BX, CX, DX, BP, AX, 24)
   140		ROUND2(AX, BX, CX, DX, BP, 25)
   141		ROUND2(BP, AX, BX, CX, DX, 26)
   142		ROUND2(DX, BP, AX, BX, CX, 27)
   143		ROUND2(CX, DX, BP, AX, BX, 28)
   144		ROUND2(BX, CX, DX, BP, AX, 29)
   145		ROUND2(AX, BX, CX, DX, BP, 30)
   146		ROUND2(BP, AX, BX, CX, DX, 31)
   147		ROUND2(DX, BP, AX, BX, CX, 32)
   148		ROUND2(CX, DX, BP, AX, BX, 33)
   149		ROUND2(BX, CX, DX, BP, AX, 34)
   150		ROUND2(AX, BX, CX, DX, BP, 35)
   151		ROUND2(BP, AX, BX, CX, DX, 36)
   152		ROUND2(DX, BP, AX, BX, CX, 37)
   153		ROUND2(CX, DX, BP, AX, BX, 38)
   154		ROUND2(BX, CX, DX, BP, AX, 39)
   155	
   156		ROUND3(AX, BX, CX, DX, BP, 40)
   157		ROUND3(BP, AX, BX, CX, DX, 41)
   158		ROUND3(DX, BP, AX, BX, CX, 42)
   159		ROUND3(CX, DX, BP, AX, BX, 43)
   160		ROUND3(BX, CX, DX, BP, AX, 44)
   161		ROUND3(AX, BX, CX, DX, BP, 45)
   162		ROUND3(BP, AX, BX, CX, DX, 46)
   163		ROUND3(DX, BP, AX, BX, CX, 47)
   164		ROUND3(CX, DX, BP, AX, BX, 48)
   165		ROUND3(BX, CX, DX, BP, AX, 49)
   166		ROUND3(AX, BX, CX, DX, BP, 50)
   167		ROUND3(BP, AX, BX, CX, DX, 51)
   168		ROUND3(DX, BP, AX, BX, CX, 52)
   169		ROUND3(CX, DX, BP, AX, BX, 53)
   170		ROUND3(BX, CX, DX, BP, AX, 54)
   171		ROUND3(AX, BX, CX, DX, BP, 55)
   172		ROUND3(BP, AX, BX, CX, DX, 56)
   173		ROUND3(DX, BP, AX, BX, CX, 57)
   174		ROUND3(CX, DX, BP, AX, BX, 58)
   175		ROUND3(BX, CX, DX, BP, AX, 59)
   176	
   177		ROUND4(AX, BX, CX, DX, BP, 60)
   178		ROUND4(BP, AX, BX, CX, DX, 61)
   179		ROUND4(DX, BP, AX, BX, CX, 62)
   180		ROUND4(CX, DX, BP, AX, BX, 63)
   181		ROUND4(BX, CX, DX, BP, AX, 64)
   182		ROUND4(AX, BX, CX, DX, BP, 65)
   183		ROUND4(BP, AX, BX, CX, DX, 66)
   184		ROUND4(DX, BP, AX, BX, CX, 67)
   185		ROUND4(CX, DX, BP, AX, BX, 68)
   186		ROUND4(BX, CX, DX, BP, AX, 69)
   187		ROUND4(AX, BX, CX, DX, BP, 70)
   188		ROUND4(BP, AX, BX, CX, DX, 71)
   189		ROUND4(DX, BP, AX, BX, CX, 72)
   190		ROUND4(CX, DX, BP, AX, BX, 73)
   191		ROUND4(BX, CX, DX, BP, AX, 74)
   192		ROUND4(AX, BX, CX, DX, BP, 75)
   193		ROUND4(BP, AX, BX, CX, DX, 76)
   194		ROUND4(DX, BP, AX, BX, CX, 77)
   195		ROUND4(CX, DX, BP, AX, BX, 78)
   196		ROUND4(BX, CX, DX, BP, AX, 79)
   197	#undef BP
   198	
   199		ADDL	(0*4)(R14), AX
   200		ADDL	(1*4)(R14), BX
   201		ADDL	(2*4)(R14), CX
   202		ADDL	(3*4)(R14), DX
   203		ADDL	(4*4)(R14), R13
   204	
   205		MOVL	AX, (0*4)(R14)
   206		MOVL	BX, (1*4)(R14)
   207		MOVL	CX, (2*4)(R14)
   208		MOVL	DX, (3*4)(R14)
   209		MOVL	R13, (4*4)(R14)
   210	
   211		ADDQ	$64, SI
   212		CMPQ	SI, DI
   213		JB	loop
   214	
   215	end:
   216		RET

View as plain text