...

Text file src/crypto/md5/md5block_arm.s

     1	// Copyright 2013 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	//
     5	// ARM version of md5block.go
     6	
     7	#include "textflag.h"
     8	
     9	// Register definitions
    10	#define Rtable	R0	// Pointer to MD5 constants table
    11	#define Rdata	R1	// Pointer to data to hash
    12	#define Ra	R2	// MD5 accumulator
    13	#define Rb	R3	// MD5 accumulator
    14	#define Rc	R4	// MD5 accumulator
    15	#define Rd	R5	// MD5 accumulator
    16	#define Rc0	R6	// MD5 constant
    17	#define Rc1	R7	// MD5 constant
    18	#define Rc2	R8	// MD5 constant
    19	// r9, r10 are forbidden
    20	// r11 is OK provided you check the assembler that no synthetic instructions use it
    21	#define Rc3	R11	// MD5 constant
    22	#define Rt0	R12	// temporary
    23	#define Rt1	R14	// temporary
    24	
    25	// func block(dig *digest, p []byte)
    26	// 0(FP) is *digest
    27	// 4(FP) is p.array (struct Slice)
    28	// 8(FP) is p.len
    29	//12(FP) is p.cap
    30	//
    31	// Stack frame
    32	#define p_end	end-4(SP)	// pointer to the end of data
    33	#define p_data	data-8(SP)	// current data pointer
    34	#define buf	buffer-(8+4*16)(SP)	//16 words temporary buffer
    35			// 3 words at 4..12(R13) for called routine parameters
    36	
    37	TEXT	·block(SB), NOSPLIT, $84-16
    38		MOVW	p+4(FP), Rdata	// pointer to the data
    39		MOVW	p_len+8(FP), Rt0	// number of bytes
    40		ADD	Rdata, Rt0
    41		MOVW	Rt0, p_end	// pointer to end of data
    42	
    43	loop:
    44		MOVW	Rdata, p_data	// Save Rdata
    45		AND.S	$3, Rdata, Rt0	// TST $3, Rdata not working see issue 5921
    46		BEQ	aligned			// aligned detected - skip copy
    47	
    48		// Copy the unaligned source data into the aligned temporary buffer
    49		// memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
    50		MOVW	$buf, Rtable	// to
    51		MOVW	$64, Rc0		// n
    52		MOVM.IB	[Rtable,Rdata,Rc0], (R13)
    53		BL	runtime·memmove(SB)
    54	
    55		// Point to the local aligned copy of the data
    56		MOVW	$buf, Rdata
    57	
    58	aligned:
    59		// Point to the table of constants
    60		// A PC relative add would be cheaper than this
    61		MOVW	$·table(SB), Rtable
    62	
    63		// Load up initial MD5 accumulator
    64		MOVW	dig+0(FP), Rc0
    65		MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
    66	
    67	// a += (((c^d)&b)^d) + X[index] + const
    68	// a = a<<shift | a>>(32-shift) + b
    69	#define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
    70		EOR	Rc, Rd, Rt0		; \
    71		AND	Rb, Rt0			; \
    72		EOR	Rd, Rt0			; \
    73		MOVW	(index<<2)(Rdata), Rt1	; \
    74		ADD	Rt1, Rt0			; \
    75		ADD	Rconst, Rt0			; \
    76		ADD	Rt0, Ra			; \
    77		ADD	Ra@>(32-shift), Rb, Ra	;
    78	
    79		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    80		ROUND1(Ra, Rb, Rc, Rd,  0,	7, Rc0)
    81		ROUND1(Rd, Ra, Rb, Rc,  1, 12, Rc1)
    82		ROUND1(Rc, Rd, Ra, Rb,  2, 17, Rc2)
    83		ROUND1(Rb, Rc, Rd, Ra,  3, 22, Rc3)
    84	
    85		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    86		ROUND1(Ra, Rb, Rc, Rd,  4,	7, Rc0)
    87		ROUND1(Rd, Ra, Rb, Rc,  5, 12, Rc1)
    88		ROUND1(Rc, Rd, Ra, Rb,  6, 17, Rc2)
    89		ROUND1(Rb, Rc, Rd, Ra,  7, 22, Rc3)
    90	
    91		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    92		ROUND1(Ra, Rb, Rc, Rd,  8,	7, Rc0)
    93		ROUND1(Rd, Ra, Rb, Rc,  9, 12, Rc1)
    94		ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
    95		ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
    96	
    97		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    98		ROUND1(Ra, Rb, Rc, Rd, 12,	7, Rc0)
    99		ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
   100		ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
   101		ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
   102	
   103	// a += (((b^c)&d)^c) + X[index] + const
   104	// a = a<<shift | a>>(32-shift) + b
   105	#define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   106		EOR	Rb, Rc, Rt0		; \
   107		AND	Rd, Rt0			; \
   108		EOR	Rc, Rt0			; \
   109		MOVW	(index<<2)(Rdata), Rt1	; \
   110		ADD	Rt1, Rt0			; \
   111		ADD	Rconst, Rt0			; \
   112		ADD	Rt0, Ra			; \
   113		ADD	Ra@>(32-shift), Rb, Ra	;
   114	
   115		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   116		ROUND2(Ra, Rb, Rc, Rd,  1,	5, Rc0)
   117		ROUND2(Rd, Ra, Rb, Rc,  6,	9, Rc1)
   118		ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
   119		ROUND2(Rb, Rc, Rd, Ra,  0, 20, Rc3)
   120	
   121		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   122		ROUND2(Ra, Rb, Rc, Rd,  5,	5, Rc0)
   123		ROUND2(Rd, Ra, Rb, Rc, 10,	9, Rc1)
   124		ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
   125		ROUND2(Rb, Rc, Rd, Ra,  4, 20, Rc3)
   126	
   127		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   128		ROUND2(Ra, Rb, Rc, Rd,  9,	5, Rc0)
   129		ROUND2(Rd, Ra, Rb, Rc, 14,	9, Rc1)
   130		ROUND2(Rc, Rd, Ra, Rb,  3, 14, Rc2)
   131		ROUND2(Rb, Rc, Rd, Ra,  8, 20, Rc3)
   132	
   133		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   134		ROUND2(Ra, Rb, Rc, Rd, 13,	5, Rc0)
   135		ROUND2(Rd, Ra, Rb, Rc,  2,	9, Rc1)
   136		ROUND2(Rc, Rd, Ra, Rb,  7, 14, Rc2)
   137		ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
   138	
   139	// a += (b^c^d) + X[index] + const
   140	// a = a<<shift | a>>(32-shift) + b
   141	#define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   142		EOR	Rb, Rc, Rt0		; \
   143		EOR	Rd, Rt0			; \
   144		MOVW	(index<<2)(Rdata), Rt1	; \
   145		ADD	Rt1, Rt0			; \
   146		ADD	Rconst, Rt0			; \
   147		ADD	Rt0, Ra			; \
   148		ADD	Ra@>(32-shift), Rb, Ra	;
   149	
   150		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   151		ROUND3(Ra, Rb, Rc, Rd,  5,	4, Rc0)
   152		ROUND3(Rd, Ra, Rb, Rc,  8, 11, Rc1)
   153		ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
   154		ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
   155	
   156		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   157		ROUND3(Ra, Rb, Rc, Rd,  1,	4, Rc0)
   158		ROUND3(Rd, Ra, Rb, Rc,  4, 11, Rc1)
   159		ROUND3(Rc, Rd, Ra, Rb,  7, 16, Rc2)
   160		ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
   161	
   162		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   163		ROUND3(Ra, Rb, Rc, Rd, 13,	4, Rc0)
   164		ROUND3(Rd, Ra, Rb, Rc,  0, 11, Rc1)
   165		ROUND3(Rc, Rd, Ra, Rb,  3, 16, Rc2)
   166		ROUND3(Rb, Rc, Rd, Ra,  6, 23, Rc3)
   167	
   168		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   169		ROUND3(Ra, Rb, Rc, Rd,  9,	4, Rc0)
   170		ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
   171		ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
   172		ROUND3(Rb, Rc, Rd, Ra,  2, 23, Rc3)
   173	
   174	// a += (c^(b|^d)) + X[index] + const
   175	// a = a<<shift | a>>(32-shift) + b
   176	#define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   177		MVN	Rd, Rt0			; \
   178		ORR	Rb, Rt0			; \
   179		EOR	Rc, Rt0			; \
   180		MOVW	(index<<2)(Rdata), Rt1	; \
   181		ADD	Rt1, Rt0			; \
   182		ADD	Rconst, Rt0			; \
   183		ADD	Rt0, Ra			; \
   184		ADD	Ra@>(32-shift), Rb, Ra	;
   185	
   186		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   187		ROUND4(Ra, Rb, Rc, Rd,  0,	6, Rc0)
   188		ROUND4(Rd, Ra, Rb, Rc,  7, 10, Rc1)
   189		ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
   190		ROUND4(Rb, Rc, Rd, Ra,  5, 21, Rc3)
   191	
   192		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   193		ROUND4(Ra, Rb, Rc, Rd, 12,	6, Rc0)
   194		ROUND4(Rd, Ra, Rb, Rc,  3, 10, Rc1)
   195		ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
   196		ROUND4(Rb, Rc, Rd, Ra,  1, 21, Rc3)
   197	
   198		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   199		ROUND4(Ra, Rb, Rc, Rd,  8,	6, Rc0)
   200		ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
   201		ROUND4(Rc, Rd, Ra, Rb,  6, 15, Rc2)
   202		ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
   203	
   204		MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   205		ROUND4(Ra, Rb, Rc, Rd,  4,	6, Rc0)
   206		ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
   207		ROUND4(Rc, Rd, Ra, Rb,  2, 15, Rc2)
   208		ROUND4(Rb, Rc, Rd, Ra,  9, 21, Rc3)
   209	
   210		MOVW	dig+0(FP), Rt0
   211		MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
   212	
   213		ADD	Rc0, Ra
   214		ADD	Rc1, Rb
   215		ADD	Rc2, Rc
   216		ADD	Rc3, Rd
   217	
   218		MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
   219	
   220		MOVW	p_data, Rdata
   221		MOVW	p_end, Rt0
   222		ADD	$64, Rdata
   223		CMP	Rt0, Rdata
   224		BLO	loop
   225	
   226		RET
   227	
   228	// MD5 constants table
   229	
   230		// Round 1
   231		DATA	·table+0x00(SB)/4, $0xd76aa478
   232		DATA	·table+0x04(SB)/4, $0xe8c7b756
   233		DATA	·table+0x08(SB)/4, $0x242070db
   234		DATA	·table+0x0c(SB)/4, $0xc1bdceee
   235		DATA	·table+0x10(SB)/4, $0xf57c0faf
   236		DATA	·table+0x14(SB)/4, $0x4787c62a
   237		DATA	·table+0x18(SB)/4, $0xa8304613
   238		DATA	·table+0x1c(SB)/4, $0xfd469501
   239		DATA	·table+0x20(SB)/4, $0x698098d8
   240		DATA	·table+0x24(SB)/4, $0x8b44f7af
   241		DATA	·table+0x28(SB)/4, $0xffff5bb1
   242		DATA	·table+0x2c(SB)/4, $0x895cd7be
   243		DATA	·table+0x30(SB)/4, $0x6b901122
   244		DATA	·table+0x34(SB)/4, $0xfd987193
   245		DATA	·table+0x38(SB)/4, $0xa679438e
   246		DATA	·table+0x3c(SB)/4, $0x49b40821
   247		// Round 2
   248		DATA	·table+0x40(SB)/4, $0xf61e2562
   249		DATA	·table+0x44(SB)/4, $0xc040b340
   250		DATA	·table+0x48(SB)/4, $0x265e5a51
   251		DATA	·table+0x4c(SB)/4, $0xe9b6c7aa
   252		DATA	·table+0x50(SB)/4, $0xd62f105d
   253		DATA	·table+0x54(SB)/4, $0x02441453
   254		DATA	·table+0x58(SB)/4, $0xd8a1e681
   255		DATA	·table+0x5c(SB)/4, $0xe7d3fbc8
   256		DATA	·table+0x60(SB)/4, $0x21e1cde6
   257		DATA	·table+0x64(SB)/4, $0xc33707d6
   258		DATA	·table+0x68(SB)/4, $0xf4d50d87
   259		DATA	·table+0x6c(SB)/4, $0x455a14ed
   260		DATA	·table+0x70(SB)/4, $0xa9e3e905
   261		DATA	·table+0x74(SB)/4, $0xfcefa3f8
   262		DATA	·table+0x78(SB)/4, $0x676f02d9
   263		DATA	·table+0x7c(SB)/4, $0x8d2a4c8a
   264		// Round 3
   265		DATA	·table+0x80(SB)/4, $0xfffa3942
   266		DATA	·table+0x84(SB)/4, $0x8771f681
   267		DATA	·table+0x88(SB)/4, $0x6d9d6122
   268		DATA	·table+0x8c(SB)/4, $0xfde5380c
   269		DATA	·table+0x90(SB)/4, $0xa4beea44
   270		DATA	·table+0x94(SB)/4, $0x4bdecfa9
   271		DATA	·table+0x98(SB)/4, $0xf6bb4b60
   272		DATA	·table+0x9c(SB)/4, $0xbebfbc70
   273		DATA	·table+0xa0(SB)/4, $0x289b7ec6
   274		DATA	·table+0xa4(SB)/4, $0xeaa127fa
   275		DATA	·table+0xa8(SB)/4, $0xd4ef3085
   276		DATA	·table+0xac(SB)/4, $0x04881d05
   277		DATA	·table+0xb0(SB)/4, $0xd9d4d039
   278		DATA	·table+0xb4(SB)/4, $0xe6db99e5
   279		DATA	·table+0xb8(SB)/4, $0x1fa27cf8
   280		DATA	·table+0xbc(SB)/4, $0xc4ac5665
   281		// Round 4
   282		DATA	·table+0xc0(SB)/4, $0xf4292244
   283		DATA	·table+0xc4(SB)/4, $0x432aff97
   284		DATA	·table+0xc8(SB)/4, $0xab9423a7
   285		DATA	·table+0xcc(SB)/4, $0xfc93a039
   286		DATA	·table+0xd0(SB)/4, $0x655b59c3
   287		DATA	·table+0xd4(SB)/4, $0x8f0ccc92
   288		DATA	·table+0xd8(SB)/4, $0xffeff47d
   289		DATA	·table+0xdc(SB)/4, $0x85845dd1
   290		DATA	·table+0xe0(SB)/4, $0x6fa87e4f
   291		DATA	·table+0xe4(SB)/4, $0xfe2ce6e0
   292		DATA	·table+0xe8(SB)/4, $0xa3014314
   293		DATA	·table+0xec(SB)/4, $0x4e0811a1
   294		DATA	·table+0xf0(SB)/4, $0xf7537e82
   295		DATA	·table+0xf4(SB)/4, $0xbd3af235
   296		DATA	·table+0xf8(SB)/4, $0x2ad7d2bb
   297		DATA	·table+0xfc(SB)/4, $0xeb86d391
   298		// Global definition
   299		GLOBL	·table(SB),8,$256

View as plain text