...

Text file src/pkg/crypto/md5/md5block_ppc64x.s

     1	// Original source:
     2	//	http://www.zorinaq.com/papers/md5-amd64.html
     3	//	http://www.zorinaq.com/papers/md5-amd64.tar.bz2
     4	//
     5	// MD5 optimized for ppc64le using Go's assembler for
     6	// ppc64le, based on md5block_amd64.s implementation by
     7	// the Go authors.
     8	//
     9	// Author: Marc Bevand <bevand_m (at) epita.fr>
    10	// Licence: I hereby disclaim the copyright on this code and place it
    11	// in the public domain.
    12	
    13	// +build ppc64 ppc64le
    14	
    15	#include "textflag.h"
    16	
    17	// ENDIAN_MOVE generates the appropriate
    18	// 4 byte load for big or little endian.
    19	// The 4 bytes at ptr+off is loaded into dst.
    20	// The idx reg is only needed for big endian
    21	// and is clobbered when used.
    22	#ifdef GOARCH_ppc64le
    23	#define ENDIAN_MOVE(off, ptr, dst, idx) \
    24		MOVWZ	off(ptr),dst
    25	#else
    26	#define ENDIAN_MOVE(off, ptr, dst, idx) \
    27		MOVD	$off,idx; \
    28		MOVWBR	(idx)(ptr), dst
    29	#endif
    30	
    31	TEXT ·block(SB),NOSPLIT,$0-32
    32		MOVD	dig+0(FP), R10
    33		MOVD	p+8(FP), R6
    34		MOVD	p_len+16(FP), R5
    35		SLD	$6, R5
    36		SRD	$6, R5
    37		ADD	R6, R5, R7
    38	
    39		MOVWZ	0(R10), R22
    40		MOVWZ	4(R10), R3
    41		MOVWZ	8(R10), R4
    42		MOVWZ	12(R10), R5
    43		CMP	R6, R7
    44		BEQ	end
    45	
    46	loop:
    47		MOVWZ	R22, R14
    48		MOVWZ	R3, R15
    49		MOVWZ	R4, R16
    50		MOVWZ	R5, R17
    51	
    52		ENDIAN_MOVE(0,R6,R8,R21)
    53		MOVWZ	R5, R9
    54	
    55	#define ROUND1(a, b, c, d, index, const, shift) \
    56		XOR	c, R9; \
    57		ADD	$const, a; \
    58		ADD	R8, a; \
    59		AND	b, R9; \
    60		XOR	d, R9; \
    61		ENDIAN_MOVE(index*4,R6,R8,R21); \
    62		ADD	R9, a; \
    63		RLWMI	$shift, a, $0xffffffff, a; \
    64		MOVWZ	c, R9; \
    65		ADD	b, a; \
    66		MOVWZ	a, a
    67	
    68		ROUND1(R22,R3,R4,R5, 1,0xd76aa478, 7);
    69		ROUND1(R5,R22,R3,R4, 2,0xe8c7b756,12);
    70		ROUND1(R4,R5,R22,R3, 3,0x242070db,17);
    71		ROUND1(R3,R4,R5,R22, 4,0xc1bdceee,22);
    72		ROUND1(R22,R3,R4,R5, 5,0xf57c0faf, 7);
    73		ROUND1(R5,R22,R3,R4, 6,0x4787c62a,12);
    74		ROUND1(R4,R5,R22,R3, 7,0xa8304613,17);
    75		ROUND1(R3,R4,R5,R22, 8,0xfd469501,22);
    76		ROUND1(R22,R3,R4,R5, 9,0x698098d8, 7);
    77		ROUND1(R5,R22,R3,R4,10,0x8b44f7af,12);
    78		ROUND1(R4,R5,R22,R3,11,0xffff5bb1,17);
    79		ROUND1(R3,R4,R5,R22,12,0x895cd7be,22);
    80		ROUND1(R22,R3,R4,R5,13,0x6b901122, 7);
    81		ROUND1(R5,R22,R3,R4,14,0xfd987193,12);
    82		ROUND1(R4,R5,R22,R3,15,0xa679438e,17);
    83		ROUND1(R3,R4,R5,R22, 0,0x49b40821,22);
    84	
    85		ENDIAN_MOVE(1*4,R6,R8,R21)
    86		MOVWZ	R5, R9
    87		MOVWZ	R5, R10
    88	
    89	#define ROUND2(a, b, c, d, index, const, shift) \
    90		XOR	$0xffffffff, R9; \ // NOTW R9
    91		ADD	$const, a; \
    92		ADD	R8, a; \
    93		AND	b, R10; \
    94		AND	c, R9; \
    95		ENDIAN_MOVE(index*4,R6,R8,R21); \
    96		OR	R9, R10; \
    97		MOVWZ	c, R9; \
    98		ADD	R10, a; \
    99		MOVWZ	c, R10; \
   100		RLWMI	$shift, a, $0xffffffff, a; \
   101		ADD	b, a; \
   102		MOVWZ	a, a
   103	
   104		ROUND2(R22,R3,R4,R5, 6,0xf61e2562, 5);
   105		ROUND2(R5,R22,R3,R4,11,0xc040b340, 9);
   106		ROUND2(R4,R5,R22,R3, 0,0x265e5a51,14);
   107		ROUND2(R3,R4,R5,R22, 5,0xe9b6c7aa,20);
   108		ROUND2(R22,R3,R4,R5,10,0xd62f105d, 5);
   109		ROUND2(R5,R22,R3,R4,15, 0x2441453, 9);
   110		ROUND2(R4,R5,R22,R3, 4,0xd8a1e681,14);
   111		ROUND2(R3,R4,R5,R22, 9,0xe7d3fbc8,20);
   112		ROUND2(R22,R3,R4,R5,14,0x21e1cde6, 5);
   113		ROUND2(R5,R22,R3,R4, 3,0xc33707d6, 9);
   114		ROUND2(R4,R5,R22,R3, 8,0xf4d50d87,14);
   115		ROUND2(R3,R4,R5,R22,13,0x455a14ed,20);
   116		ROUND2(R22,R3,R4,R5, 2,0xa9e3e905, 5);
   117		ROUND2(R5,R22,R3,R4, 7,0xfcefa3f8, 9);
   118		ROUND2(R4,R5,R22,R3,12,0x676f02d9,14);
   119		ROUND2(R3,R4,R5,R22, 0,0x8d2a4c8a,20);
   120	
   121		ENDIAN_MOVE(5*4,R6,R8,R21)
   122		MOVWZ	R4, R9
   123	
   124	#define ROUND3(a, b, c, d, index, const, shift) \
   125		ADD	$const, a; \
   126		ADD	R8, a; \
   127		ENDIAN_MOVE(index*4,R6,R8,R21); \
   128		XOR	d, R9; \
   129		XOR	b, R9; \
   130		ADD	R9, a; \
   131		RLWMI	$shift, a, $0xffffffff, a; \
   132		MOVWZ	b, R9; \
   133		ADD	b, a; \
   134		MOVWZ	a, a
   135	
   136		ROUND3(R22,R3,R4,R5, 8,0xfffa3942, 4);
   137		ROUND3(R5,R22,R3,R4,11,0x8771f681,11);
   138		ROUND3(R4,R5,R22,R3,14,0x6d9d6122,16);
   139		ROUND3(R3,R4,R5,R22, 1,0xfde5380c,23);
   140		ROUND3(R22,R3,R4,R5, 4,0xa4beea44, 4);
   141		ROUND3(R5,R22,R3,R4, 7,0x4bdecfa9,11);
   142		ROUND3(R4,R5,R22,R3,10,0xf6bb4b60,16);
   143		ROUND3(R3,R4,R5,R22,13,0xbebfbc70,23);
   144		ROUND3(R22,R3,R4,R5, 0,0x289b7ec6, 4);
   145		ROUND3(R5,R22,R3,R4, 3,0xeaa127fa,11);
   146		ROUND3(R4,R5,R22,R3, 6,0xd4ef3085,16);
   147		ROUND3(R3,R4,R5,R22, 9, 0x4881d05,23);
   148		ROUND3(R22,R3,R4,R5,12,0xd9d4d039, 4);
   149		ROUND3(R5,R22,R3,R4,15,0xe6db99e5,11);
   150		ROUND3(R4,R5,R22,R3, 2,0x1fa27cf8,16);
   151		ROUND3(R3,R4,R5,R22, 0,0xc4ac5665,23);
   152	
   153		ENDIAN_MOVE(0,R6,R8,R21)
   154		MOVWZ	$0xffffffff, R9
   155		XOR	R5, R9
   156	
   157	#define ROUND4(a, b, c, d, index, const, shift) \
   158		ADD	$const, a; \
   159		ADD	R8, a; \
   160		OR	b, R9; \
   161		XOR	c, R9; \
   162		ADD	R9, a; \
   163		ENDIAN_MOVE(index*4,R6,R8,R21); \
   164		MOVWZ	$0xffffffff, R9; \
   165		RLWMI	$shift, a, $0xffffffff, a; \
   166		XOR	c, R9; \
   167		ADD	b, a; \
   168		MOVWZ	a, a
   169	
   170		ROUND4(R22,R3,R4,R5, 7,0xf4292244, 6);
   171		ROUND4(R5,R22,R3,R4,14,0x432aff97,10);
   172		ROUND4(R4,R5,R22,R3, 5,0xab9423a7,15);
   173		ROUND4(R3,R4,R5,R22,12,0xfc93a039,21);
   174		ROUND4(R22,R3,R4,R5, 3,0x655b59c3, 6);
   175		ROUND4(R5,R22,R3,R4,10,0x8f0ccc92,10);
   176		ROUND4(R4,R5,R22,R3, 1,0xffeff47d,15);
   177		ROUND4(R3,R4,R5,R22, 8,0x85845dd1,21);
   178		ROUND4(R22,R3,R4,R5,15,0x6fa87e4f, 6);
   179		ROUND4(R5,R22,R3,R4, 6,0xfe2ce6e0,10);
   180		ROUND4(R4,R5,R22,R3,13,0xa3014314,15);
   181		ROUND4(R3,R4,R5,R22, 4,0x4e0811a1,21);
   182		ROUND4(R22,R3,R4,R5,11,0xf7537e82, 6);
   183		ROUND4(R5,R22,R3,R4, 2,0xbd3af235,10);
   184		ROUND4(R4,R5,R22,R3, 9,0x2ad7d2bb,15);
   185		ROUND4(R3,R4,R5,R22, 0,0xeb86d391,21);
   186	
   187		ADD	R14, R22
   188		ADD	R15, R3
   189		ADD	R16, R4
   190		ADD	R17, R5
   191		ADD	$64, R6
   192		CMP	R6, R7
   193		BLT	loop
   194	
   195	end:
   196		MOVD	dig+0(FP), R10
   197		MOVWZ	R22, 0(R10)
   198		MOVWZ	R3, 4(R10)
   199		MOVWZ	R4, 8(R10)
   200		MOVWZ	R5, 12(R10)
   201		RET

View as plain text