Text file src/internal/bytealg/compare_ppc64x.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build ppc64 ppc64le
6
7 #include "go_asm.h"
8 #include "textflag.h"
9
10 TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
11 MOVD a_base+0(FP), R5
12 MOVD b_base+24(FP), R6
13 MOVD a_len+8(FP), R3
14 CMP R5,R6,CR7
15 MOVD b_len+32(FP), R4
16 MOVD $ret+48(FP), R7
17 CMP R3,R4,CR6
18 BEQ CR7,equal
19
20 #ifdef GOARCH_ppc64le
21 BR cmpbodyLE<>(SB)
22 #else
23 BR cmpbodyBE<>(SB)
24 #endif
25
26 equal:
27 BEQ CR6,done
28 MOVD $1, R8
29 BGT CR6,greater
30 NEG R8
31
32 greater:
33 MOVD R8, (R7)
34 RET
35
36 done:
37 MOVD $0, (R7)
38 RET
39
40 TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
41 MOVD a_base+0(FP), R5
42 MOVD b_base+16(FP), R6
43 MOVD a_len+8(FP), R3
44 CMP R5,R6,CR7
45 MOVD b_len+24(FP), R4
46 MOVD $ret+32(FP), R7
47 CMP R3,R4,CR6
48 BEQ CR7,equal
49
50 #ifdef GOARCH_ppc64le
51 BR cmpbodyLE<>(SB)
52 #else
53 BR cmpbodyBE<>(SB)
54 #endif
55
56 equal:
57 BEQ CR6,done
58 MOVD $1, R8
59 BGT CR6,greater
60 NEG R8
61
62 greater:
63 MOVD R8, (R7)
64 RET
65
66 done:
67 MOVD $0, (R7)
68 RET
69
70 // Do an efficient memcmp for ppc64le
71 // R3 = a len
72 // R4 = b len
73 // R5 = a addr
74 // R6 = b addr
75 // R7 = addr of return value
76 TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
77 MOVD R3,R8 // set up length
78 CMP R3,R4,CR2 // unequal?
79 BC 12,8,setuplen // BLT CR2
80 MOVD R4,R8 // use R4 for comparison len
81 setuplen:
82 MOVD R8,CTR // set up loop counter
83 CMP R8,$8 // only optimize >=8
84 BLT simplecheck
85 DCBT (R5) // cache hint
86 DCBT (R6)
87 CMP R8,$32 // optimize >= 32
88 MOVD R8,R9
89 BLT setup8a // 8 byte moves only
90 setup32a:
91 SRADCC $5,R8,R9 // number of 32 byte chunks
92 MOVD R9,CTR
93
94 // Special processing for 32 bytes or longer.
95 // Loading this way is faster and correct as long as the
96 // doublewords being compared are equal. Once they
97 // are found unequal, reload them in proper byte order
98 // to determine greater or less than.
99 loop32a:
100 MOVD 0(R5),R9 // doublewords to compare
101 MOVD 0(R6),R10 // get 4 doublewords
102 MOVD 8(R5),R14
103 MOVD 8(R6),R15
104 CMPU R9,R10 // bytes equal?
105 MOVD $0,R16 // set up for cmpne
106 BNE cmpne // further compare for LT or GT
107 MOVD 16(R5),R9 // get next pair of doublewords
108 MOVD 16(R6),R10
109 CMPU R14,R15 // bytes match?
110 MOVD $8,R16 // set up for cmpne
111 BNE cmpne // further compare for LT or GT
112 MOVD 24(R5),R14 // get next pair of doublewords
113 MOVD 24(R6),R15
114 CMPU R9,R10 // bytes match?
115 MOVD $16,R16 // set up for cmpne
116 BNE cmpne // further compare for LT or GT
117 MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32
118 ADD $32,R5 // bump up to next 32
119 ADD $32,R6
120 CMPU R14,R15 // bytes match?
121 BC 8,2,loop32a // br ctr and cr
122 BNE cmpne
123 ANDCC $24,R8,R9 // Any 8 byte chunks?
124 BEQ leftover // and result is 0
125 setup8a:
126 SRADCC $3,R9,R9 // get the 8 byte count
127 BEQ leftover // shifted value is 0
128 MOVD R9,CTR // loop count for doublewords
129 loop8:
130 MOVDBR (R5+R0),R9 // doublewords to compare
131 MOVDBR (R6+R0),R10 // LE compare order
132 ADD $8,R5
133 ADD $8,R6
134 CMPU R9,R10 // match?
135 BC 8,2,loop8 // bt ctr <> 0 && cr
136 BGT greater
137 BLT less
138 leftover:
139 ANDCC $7,R8,R9 // check for leftover bytes
140 MOVD R9,CTR // save the ctr
141 BNE simple // leftover bytes
142 BC 12,10,equal // test CR2 for length comparison
143 BC 12,8,less
144 BR greater
145 simplecheck:
146 CMP R8,$0 // remaining compare length 0
147 BNE simple // do simple compare
148 BC 12,10,equal // test CR2 for length comparison
149 BC 12,8,less // 1st len < 2nd len, result less
150 BR greater // 1st len > 2nd len must be greater
151 simple:
152 MOVBZ 0(R5), R9 // get byte from 1st operand
153 ADD $1,R5
154 MOVBZ 0(R6), R10 // get byte from 2nd operand
155 ADD $1,R6
156 CMPU R9, R10
157 BC 8,2,simple // bc ctr <> 0 && cr
158 BGT greater // 1st > 2nd
159 BLT less // 1st < 2nd
160 BC 12,10,equal // test CR2 for length comparison
161 BC 12,9,greater // 2nd len > 1st len
162 BR less // must be less
163 cmpne: // only here is not equal
164 MOVDBR (R5+R16),R8 // reload in reverse order
165 MOVDBR (R6+R16),R9
166 CMPU R8,R9 // compare correct endianness
167 BGT greater // here only if NE
168 less:
169 MOVD $-1,R3
170 MOVD R3,(R7) // return value if A < B
171 RET
172 equal:
173 MOVD $0,(R7) // return value if A == B
174 RET
175 greater:
176 MOVD $1,R3
177 MOVD R3,(R7) // return value if A > B
178 RET
179
180 // Do an efficient memcmp for ppc64 (BE)
181 // R3 = a len
182 // R4 = b len
183 // R5 = a addr
184 // R6 = b addr
185 // R7 = addr of return value
186 TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
187 MOVD R3,R8 // set up length
188 CMP R3,R4,CR2 // unequal?
189 BC 12,8,setuplen // BLT CR2
190 MOVD R4,R8 // use R4 for comparison len
191 setuplen:
192 MOVD R8,CTR // set up loop counter
193 CMP R8,$8 // only optimize >=8
194 BLT simplecheck
195 DCBT (R5) // cache hint
196 DCBT (R6)
197 CMP R8,$32 // optimize >= 32
198 MOVD R8,R9
199 BLT setup8a // 8 byte moves only
200
201 setup32a:
202 SRADCC $5,R8,R9 // number of 32 byte chunks
203 MOVD R9,CTR
204 loop32a:
205 MOVD 0(R5),R9 // doublewords to compare
206 MOVD 0(R6),R10 // get 4 doublewords
207 MOVD 8(R5),R14
208 MOVD 8(R6),R15
209 CMPU R9,R10 // bytes equal?
210 BLT less // found to be less
211 BGT greater // found to be greater
212 MOVD 16(R5),R9 // get next pair of doublewords
213 MOVD 16(R6),R10
214 CMPU R14,R15 // bytes match?
215 BLT less // found less
216 BGT greater // found greater
217 MOVD 24(R5),R14 // get next pair of doublewords
218 MOVD 24(R6),R15
219 CMPU R9,R10 // bytes match?
220 BLT less // found to be less
221 BGT greater // found to be greater
222 ADD $32,R5 // bump up to next 32
223 ADD $32,R6
224 CMPU R14,R15 // bytes match?
225 BC 8,2,loop32a // br ctr and cr
226 BLT less // with BE, byte ordering is
227 BGT greater // good for compare
228 ANDCC $24,R8,R9 // Any 8 byte chunks?
229 BEQ leftover // and result is 0
230 setup8a:
231 SRADCC $3,R9,R9 // get the 8 byte count
232 BEQ leftover // shifted value is 0
233 MOVD R9,CTR // loop count for doublewords
234 loop8:
235 MOVD (R5),R9
236 MOVD (R6),R10
237 ADD $8,R5
238 ADD $8,R6
239 CMPU R9,R10 // match?
240 BC 8,2,loop8 // bt ctr <> 0 && cr
241 BGT greater
242 BLT less
243 leftover:
244 ANDCC $7,R8,R9 // check for leftover bytes
245 MOVD R9,CTR // save the ctr
246 BNE simple // leftover bytes
247 BC 12,10,equal // test CR2 for length comparison
248 BC 12,8,less
249 BR greater
250 simplecheck:
251 CMP R8,$0 // remaining compare length 0
252 BNE simple // do simple compare
253 BC 12,10,equal // test CR2 for length comparison
254 BC 12,8,less // 1st len < 2nd len, result less
255 BR greater // same len, must be equal
256 simple:
257 MOVBZ 0(R5),R9 // get byte from 1st operand
258 ADD $1,R5
259 MOVBZ 0(R6),R10 // get byte from 2nd operand
260 ADD $1,R6
261 CMPU R9,R10
262 BC 8,2,simple // bc ctr <> 0 && cr
263 BGT greater // 1st > 2nd
264 BLT less // 1st < 2nd
265 BC 12,10,equal // test CR2 for length comparison
266 BC 12,9,greater // 2nd len > 1st len
267 less:
268 MOVD $-1,R3
269 MOVD R3,(R7) // return value if A < B
270 RET
271 equal:
272 MOVD $0,(R7) // return value if A == B
273 RET
274 greater:
275 MOVD $1,R3
276 MOVD R3,(R7) // return value if A > B
277 RET
View as plain text