...

Text file src/pkg/crypto/sha1/sha1block_arm.s

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	//
     5	// ARM version of md5block.go
     6	
     7	#include "textflag.h"
     8	
     9	// SHA-1 block routine. See sha1block.go for Go equivalent.
    10	//
    11	// There are 80 rounds of 4 types:
    12	//   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    13	//   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    14	//   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    15	//   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    16	//   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    17	//
    18	// Each round loads or shuffles the data, then computes a per-round
    19	// function of b, c, d, and then mixes the result into and rotates the
    20	// five registers a, b, c, d, e holding the intermediate results.
    21	//
    22	// The register rotation is implemented by rotating the arguments to
    23	// the round macros instead of by explicit move instructions.
    24	
    25	// Register definitions
    26	#define Rdata	R0	// Pointer to incoming data
    27	#define Rconst	R1	// Current constant for SHA round
    28	#define Ra	R2		// SHA-1 accumulator
    29	#define Rb	R3		// SHA-1 accumulator
    30	#define Rc	R4		// SHA-1 accumulator
    31	#define Rd	R5		// SHA-1 accumulator
    32	#define Re	R6		// SHA-1 accumulator
    33	#define Rt0	R7		// Temporary
    34	#define Rt1	R8		// Temporary
    35	// r9, r10 are forbidden
    36	// r11 is OK provided you check the assembler that no synthetic instructions use it
    37	#define Rt2	R11		// Temporary
    38	#define Rctr	R12	// loop counter
    39	#define Rw	R14		// point to w buffer
    40	
    41	// func block(dig *digest, p []byte)
    42	// 0(FP) is *digest
    43	// 4(FP) is p.array (struct Slice)
    44	// 8(FP) is p.len
    45	//12(FP) is p.cap
    46	//
    47	// Stack frame
    48	#define p_end	end-4(SP)		// pointer to the end of data
    49	#define p_data	data-8(SP)	// current data pointer (unused?)
    50	#define w_buf	buf-(8+4*80)(SP)	//80 words temporary buffer w uint32[80]
    51	#define saved	abcde-(8+4*80+4*5)(SP)	// saved sha1 registers a,b,c,d,e - these must be last (unused?)
    52	// Total size +4 for saved LR is 352
    53	
    54		// w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
    55		// e += w[i]
    56	#define LOAD(Re) \
    57		MOVBU	2(Rdata), Rt0 ; \
    58		MOVBU	3(Rdata), Rt1 ; \
    59		MOVBU	1(Rdata), Rt2 ; \
    60		ORR	Rt0<<8, Rt1, Rt0	    ; \
    61		MOVBU.P	4(Rdata), Rt1 ; \
    62		ORR	Rt2<<16, Rt0, Rt0	    ; \
    63		ORR	Rt1<<24, Rt0, Rt0	    ; \
    64		MOVW.P	Rt0, 4(Rw)		    ; \
    65		ADD	Rt0, Re, Re
    66	
    67		// tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
    68		// w[i&0xf] = tmp<<1 | tmp>>(32-1)
    69		// e += w[i&0xf]
    70	#define SHUFFLE(Re) \
    71		MOVW	(-16*4)(Rw), Rt0 ; \
    72		MOVW	(-14*4)(Rw), Rt1 ; \
    73		MOVW	(-8*4)(Rw), Rt2  ; \
    74		EOR	Rt0, Rt1, Rt0  ; \
    75		MOVW	(-3*4)(Rw), Rt1  ; \
    76		EOR	Rt2, Rt0, Rt0  ; \
    77		EOR	Rt0, Rt1, Rt0  ; \
    78		MOVW	Rt0@>(32-1), Rt0  ; \
    79		MOVW.P	Rt0, 4(Rw)	  ; \
    80		ADD	Rt0, Re, Re
    81	
    82		// t1 = (b & c) | ((~b) & d)
    83	#define FUNC1(Ra, Rb, Rc, Rd, Re) \
    84		MVN	Rb, Rt1	   ; \
    85		AND	Rb, Rc, Rt0  ; \
    86		AND	Rd, Rt1, Rt1 ; \
    87		ORR	Rt0, Rt1, Rt1
    88	
    89		// t1 = b ^ c ^ d
    90	#define FUNC2(Ra, Rb, Rc, Rd, Re) \
    91		EOR	Rb, Rc, Rt1 ; \
    92		EOR	Rd, Rt1, Rt1
    93	
    94		// t1 = (b & c) | (b & d) | (c & d) =
    95		// t1 = (b & c) | ((b | c) & d)
    96	#define FUNC3(Ra, Rb, Rc, Rd, Re) \
    97		ORR	Rb, Rc, Rt0  ; \
    98		AND	Rb, Rc, Rt1  ; \
    99		AND	Rd, Rt0, Rt0 ; \
   100		ORR	Rt0, Rt1, Rt1
   101	
   102	#define FUNC4 FUNC2
   103	
   104		// a5 := a<<5 | a>>(32-5)
   105		// b = b<<30 | b>>(32-30)
   106		// e = a5 + t1 + e + const
   107	#define MIX(Ra, Rb, Rc, Rd, Re) \
   108		ADD	Rt1, Re, Re	 ; \
   109		MOVW	Rb@>(32-30), Rb	 ; \
   110		ADD	Ra@>(32-5), Re, Re ; \
   111		ADD	Rconst, Re, Re
   112	
   113	#define ROUND1(Ra, Rb, Rc, Rd, Re) \
   114		LOAD(Re)		; \
   115		FUNC1(Ra, Rb, Rc, Rd, Re)	; \
   116		MIX(Ra, Rb, Rc, Rd, Re)
   117	
   118	#define ROUND1x(Ra, Rb, Rc, Rd, Re) \
   119		SHUFFLE(Re)	; \
   120		FUNC1(Ra, Rb, Rc, Rd, Re)	; \
   121		MIX(Ra, Rb, Rc, Rd, Re)
   122	
   123	#define ROUND2(Ra, Rb, Rc, Rd, Re) \
   124		SHUFFLE(Re)	; \
   125		FUNC2(Ra, Rb, Rc, Rd, Re)	; \
   126		MIX(Ra, Rb, Rc, Rd, Re)
   127	
   128	#define ROUND3(Ra, Rb, Rc, Rd, Re) \
   129		SHUFFLE(Re)	; \
   130		FUNC3(Ra, Rb, Rc, Rd, Re)	; \
   131		MIX(Ra, Rb, Rc, Rd, Re)
   132	
   133	#define ROUND4(Ra, Rb, Rc, Rd, Re) \
   134		SHUFFLE(Re)	; \
   135		FUNC4(Ra, Rb, Rc, Rd, Re)	; \
   136		MIX(Ra, Rb, Rc, Rd, Re)
   137	
   138	
   139	// func block(dig *digest, p []byte)
   140	TEXT	·block(SB), 0, $352-16
   141		MOVW	p+4(FP), Rdata	// pointer to the data
   142		MOVW	p_len+8(FP), Rt0	// number of bytes
   143		ADD	Rdata, Rt0
   144		MOVW	Rt0, p_end	// pointer to end of data
   145	
   146		// Load up initial SHA-1 accumulator
   147		MOVW	dig+0(FP), Rt0
   148		MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re]
   149	
   150	loop:
   151		// Save registers at SP+4 onwards
   152		MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13)
   153	
   154		MOVW	$w_buf, Rw
   155		MOVW	$0x5A827999, Rconst
   156		MOVW	$3, Rctr
   157	loop1:	ROUND1(Ra, Rb, Rc, Rd, Re)
   158		ROUND1(Re, Ra, Rb, Rc, Rd)
   159		ROUND1(Rd, Re, Ra, Rb, Rc)
   160		ROUND1(Rc, Rd, Re, Ra, Rb)
   161		ROUND1(Rb, Rc, Rd, Re, Ra)
   162		SUB.S	$1, Rctr
   163		BNE	loop1
   164	
   165		ROUND1(Ra, Rb, Rc, Rd, Re)
   166		ROUND1x(Re, Ra, Rb, Rc, Rd)
   167		ROUND1x(Rd, Re, Ra, Rb, Rc)
   168		ROUND1x(Rc, Rd, Re, Ra, Rb)
   169		ROUND1x(Rb, Rc, Rd, Re, Ra)
   170	
   171		MOVW	$0x6ED9EBA1, Rconst
   172		MOVW	$4, Rctr
   173	loop2:	ROUND2(Ra, Rb, Rc, Rd, Re)
   174		ROUND2(Re, Ra, Rb, Rc, Rd)
   175		ROUND2(Rd, Re, Ra, Rb, Rc)
   176		ROUND2(Rc, Rd, Re, Ra, Rb)
   177		ROUND2(Rb, Rc, Rd, Re, Ra)
   178		SUB.S	$1, Rctr
   179		BNE	loop2
   180	
   181		MOVW	$0x8F1BBCDC, Rconst
   182		MOVW	$4, Rctr
   183	loop3:	ROUND3(Ra, Rb, Rc, Rd, Re)
   184		ROUND3(Re, Ra, Rb, Rc, Rd)
   185		ROUND3(Rd, Re, Ra, Rb, Rc)
   186		ROUND3(Rc, Rd, Re, Ra, Rb)
   187		ROUND3(Rb, Rc, Rd, Re, Ra)
   188		SUB.S	$1, Rctr
   189		BNE	loop3
   190	
   191		MOVW	$0xCA62C1D6, Rconst
   192		MOVW	$4, Rctr
   193	loop4:	ROUND4(Ra, Rb, Rc, Rd, Re)
   194		ROUND4(Re, Ra, Rb, Rc, Rd)
   195		ROUND4(Rd, Re, Ra, Rb, Rc)
   196		ROUND4(Rc, Rd, Re, Ra, Rb)
   197		ROUND4(Rb, Rc, Rd, Re, Ra)
   198		SUB.S	$1, Rctr
   199		BNE	loop4
   200	
   201		// Accumulate - restoring registers from SP+4
   202		MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw]
   203		ADD	Rt0, Ra
   204		ADD	Rt1, Rb
   205		ADD	Rt2, Rc
   206		ADD	Rctr, Rd
   207		ADD	Rw, Re
   208	
   209		MOVW	p_end, Rt0
   210		CMP	Rt0, Rdata
   211		BLO	loop
   212	
   213		// Save final SHA-1 accumulator
   214		MOVW	dig+0(FP), Rt0
   215		MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0)
   216	
   217		RET

View as plain text