Text file src/pkg/crypto/md5/md5block_ppc64x.s
1 // Original source:
2 // http://www.zorinaq.com/papers/md5-amd64.html
3 // http://www.zorinaq.com/papers/md5-amd64.tar.bz2
4 //
5 // MD5 optimized for ppc64le using Go's assembler for
6 // ppc64le, based on md5block_amd64.s implementation by
7 // the Go authors.
8 //
9 // Author: Marc Bevand <bevand_m (at) epita.fr>
10 // Licence: I hereby disclaim the copyright on this code and place it
11 // in the public domain.
12
13 // +build ppc64 ppc64le
14
15 #include "textflag.h"
16
17 // ENDIAN_MOVE generates the appropriate
18 // 4 byte load for big or little endian.
19 // The 4 bytes at ptr+off is loaded into dst.
20 // The idx reg is only needed for big endian
21 // and is clobbered when used.
22 #ifdef GOARCH_ppc64le
23 #define ENDIAN_MOVE(off, ptr, dst, idx) \
24 MOVWZ off(ptr),dst
25 #else
26 #define ENDIAN_MOVE(off, ptr, dst, idx) \
27 MOVD $off,idx; \
28 MOVWBR (idx)(ptr), dst
29 #endif
30
31 TEXT ·block(SB),NOSPLIT,$0-32
32 MOVD dig+0(FP), R10
33 MOVD p+8(FP), R6
34 MOVD p_len+16(FP), R5
35 SLD $6, R5
36 SRD $6, R5
37 ADD R6, R5, R7
38
39 MOVWZ 0(R10), R22
40 MOVWZ 4(R10), R3
41 MOVWZ 8(R10), R4
42 MOVWZ 12(R10), R5
43 CMP R6, R7
44 BEQ end
45
46 loop:
47 MOVWZ R22, R14
48 MOVWZ R3, R15
49 MOVWZ R4, R16
50 MOVWZ R5, R17
51
52 ENDIAN_MOVE(0,R6,R8,R21)
53 MOVWZ R5, R9
54
55 #define ROUND1(a, b, c, d, index, const, shift) \
56 XOR c, R9; \
57 ADD $const, a; \
58 ADD R8, a; \
59 AND b, R9; \
60 XOR d, R9; \
61 ENDIAN_MOVE(index*4,R6,R8,R21); \
62 ADD R9, a; \
63 RLWMI $shift, a, $0xffffffff, a; \
64 MOVWZ c, R9; \
65 ADD b, a; \
66 MOVWZ a, a
67
68 ROUND1(R22,R3,R4,R5, 1,0xd76aa478, 7);
69 ROUND1(R5,R22,R3,R4, 2,0xe8c7b756,12);
70 ROUND1(R4,R5,R22,R3, 3,0x242070db,17);
71 ROUND1(R3,R4,R5,R22, 4,0xc1bdceee,22);
72 ROUND1(R22,R3,R4,R5, 5,0xf57c0faf, 7);
73 ROUND1(R5,R22,R3,R4, 6,0x4787c62a,12);
74 ROUND1(R4,R5,R22,R3, 7,0xa8304613,17);
75 ROUND1(R3,R4,R5,R22, 8,0xfd469501,22);
76 ROUND1(R22,R3,R4,R5, 9,0x698098d8, 7);
77 ROUND1(R5,R22,R3,R4,10,0x8b44f7af,12);
78 ROUND1(R4,R5,R22,R3,11,0xffff5bb1,17);
79 ROUND1(R3,R4,R5,R22,12,0x895cd7be,22);
80 ROUND1(R22,R3,R4,R5,13,0x6b901122, 7);
81 ROUND1(R5,R22,R3,R4,14,0xfd987193,12);
82 ROUND1(R4,R5,R22,R3,15,0xa679438e,17);
83 ROUND1(R3,R4,R5,R22, 0,0x49b40821,22);
84
85 ENDIAN_MOVE(1*4,R6,R8,R21)
86 MOVWZ R5, R9
87 MOVWZ R5, R10
88
89 #define ROUND2(a, b, c, d, index, const, shift) \
90 XOR $0xffffffff, R9; \ // NOTW R9
91 ADD $const, a; \
92 ADD R8, a; \
93 AND b, R10; \
94 AND c, R9; \
95 ENDIAN_MOVE(index*4,R6,R8,R21); \
96 OR R9, R10; \
97 MOVWZ c, R9; \
98 ADD R10, a; \
99 MOVWZ c, R10; \
100 RLWMI $shift, a, $0xffffffff, a; \
101 ADD b, a; \
102 MOVWZ a, a
103
104 ROUND2(R22,R3,R4,R5, 6,0xf61e2562, 5);
105 ROUND2(R5,R22,R3,R4,11,0xc040b340, 9);
106 ROUND2(R4,R5,R22,R3, 0,0x265e5a51,14);
107 ROUND2(R3,R4,R5,R22, 5,0xe9b6c7aa,20);
108 ROUND2(R22,R3,R4,R5,10,0xd62f105d, 5);
109 ROUND2(R5,R22,R3,R4,15, 0x2441453, 9);
110 ROUND2(R4,R5,R22,R3, 4,0xd8a1e681,14);
111 ROUND2(R3,R4,R5,R22, 9,0xe7d3fbc8,20);
112 ROUND2(R22,R3,R4,R5,14,0x21e1cde6, 5);
113 ROUND2(R5,R22,R3,R4, 3,0xc33707d6, 9);
114 ROUND2(R4,R5,R22,R3, 8,0xf4d50d87,14);
115 ROUND2(R3,R4,R5,R22,13,0x455a14ed,20);
116 ROUND2(R22,R3,R4,R5, 2,0xa9e3e905, 5);
117 ROUND2(R5,R22,R3,R4, 7,0xfcefa3f8, 9);
118 ROUND2(R4,R5,R22,R3,12,0x676f02d9,14);
119 ROUND2(R3,R4,R5,R22, 0,0x8d2a4c8a,20);
120
121 ENDIAN_MOVE(5*4,R6,R8,R21)
122 MOVWZ R4, R9
123
124 #define ROUND3(a, b, c, d, index, const, shift) \
125 ADD $const, a; \
126 ADD R8, a; \
127 ENDIAN_MOVE(index*4,R6,R8,R21); \
128 XOR d, R9; \
129 XOR b, R9; \
130 ADD R9, a; \
131 RLWMI $shift, a, $0xffffffff, a; \
132 MOVWZ b, R9; \
133 ADD b, a; \
134 MOVWZ a, a
135
136 ROUND3(R22,R3,R4,R5, 8,0xfffa3942, 4);
137 ROUND3(R5,R22,R3,R4,11,0x8771f681,11);
138 ROUND3(R4,R5,R22,R3,14,0x6d9d6122,16);
139 ROUND3(R3,R4,R5,R22, 1,0xfde5380c,23);
140 ROUND3(R22,R3,R4,R5, 4,0xa4beea44, 4);
141 ROUND3(R5,R22,R3,R4, 7,0x4bdecfa9,11);
142 ROUND3(R4,R5,R22,R3,10,0xf6bb4b60,16);
143 ROUND3(R3,R4,R5,R22,13,0xbebfbc70,23);
144 ROUND3(R22,R3,R4,R5, 0,0x289b7ec6, 4);
145 ROUND3(R5,R22,R3,R4, 3,0xeaa127fa,11);
146 ROUND3(R4,R5,R22,R3, 6,0xd4ef3085,16);
147 ROUND3(R3,R4,R5,R22, 9, 0x4881d05,23);
148 ROUND3(R22,R3,R4,R5,12,0xd9d4d039, 4);
149 ROUND3(R5,R22,R3,R4,15,0xe6db99e5,11);
150 ROUND3(R4,R5,R22,R3, 2,0x1fa27cf8,16);
151 ROUND3(R3,R4,R5,R22, 0,0xc4ac5665,23);
152
153 ENDIAN_MOVE(0,R6,R8,R21)
154 MOVWZ $0xffffffff, R9
155 XOR R5, R9
156
157 #define ROUND4(a, b, c, d, index, const, shift) \
158 ADD $const, a; \
159 ADD R8, a; \
160 OR b, R9; \
161 XOR c, R9; \
162 ADD R9, a; \
163 ENDIAN_MOVE(index*4,R6,R8,R21); \
164 MOVWZ $0xffffffff, R9; \
165 RLWMI $shift, a, $0xffffffff, a; \
166 XOR c, R9; \
167 ADD b, a; \
168 MOVWZ a, a
169
170 ROUND4(R22,R3,R4,R5, 7,0xf4292244, 6);
171 ROUND4(R5,R22,R3,R4,14,0x432aff97,10);
172 ROUND4(R4,R5,R22,R3, 5,0xab9423a7,15);
173 ROUND4(R3,R4,R5,R22,12,0xfc93a039,21);
174 ROUND4(R22,R3,R4,R5, 3,0x655b59c3, 6);
175 ROUND4(R5,R22,R3,R4,10,0x8f0ccc92,10);
176 ROUND4(R4,R5,R22,R3, 1,0xffeff47d,15);
177 ROUND4(R3,R4,R5,R22, 8,0x85845dd1,21);
178 ROUND4(R22,R3,R4,R5,15,0x6fa87e4f, 6);
179 ROUND4(R5,R22,R3,R4, 6,0xfe2ce6e0,10);
180 ROUND4(R4,R5,R22,R3,13,0xa3014314,15);
181 ROUND4(R3,R4,R5,R22, 4,0x4e0811a1,21);
182 ROUND4(R22,R3,R4,R5,11,0xf7537e82, 6);
183 ROUND4(R5,R22,R3,R4, 2,0xbd3af235,10);
184 ROUND4(R4,R5,R22,R3, 9,0x2ad7d2bb,15);
185 ROUND4(R3,R4,R5,R22, 0,0xeb86d391,21);
186
187 ADD R14, R22
188 ADD R15, R3
189 ADD R16, R4
190 ADD R17, R5
191 ADD $64, R6
192 CMP R6, R7
193 BLT loop
194
195 end:
196 MOVD dig+0(FP), R10
197 MOVWZ R22, 0(R10)
198 MOVWZ R3, 4(R10)
199 MOVWZ R4, 8(R10)
200 MOVWZ R5, 12(R10)
201 RET
View as plain text