Text file src/runtime/memclr_ppc64x.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build ppc64 ppc64le
6
7 #include "textflag.h"
8
9 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
10 TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT|NOFRAME, $0-16
11 MOVD ptr+0(FP), R3
12 MOVD n+8(FP), R4
13
14 // Determine if there are doublewords to clear
15 check:
16 ANDCC $7, R4, R5 // R5: leftover bytes to clear
17 SRD $3, R4, R6 // R6: double words to clear
18 CMP R6, $0, CR1 // CR1[EQ] set if no double words
19
20 BC 12, 6, nozerolarge // only single bytes
21 CMP R4, $512
22 BLT under512 // special case for < 512
23 ANDCC $127, R3, R8 // check for 128 alignment of address
24 BEQ zero512setup
25
26 ANDCC $7, R3, R15
27 BEQ zero512xsetup // at least 8 byte aligned
28
29 // zero bytes up to 8 byte alignment
30
31 ANDCC $1, R3, R15 // check for byte alignment
32 BEQ byte2
33 MOVB R0, 0(R3) // zero 1 byte
34 ADD $1, R3 // bump ptr by 1
35 ADD $-1, R4
36
37 byte2:
38 ANDCC $2, R3, R15 // check for 2 byte alignment
39 BEQ byte4
40 MOVH R0, 0(R3) // zero 2 bytes
41 ADD $2, R3 // bump ptr by 2
42 ADD $-2, R4
43
44 byte4:
45 ANDCC $4, R3, R15 // check for 4 byte alignment
46 BEQ zero512xsetup
47 MOVW R0, 0(R3) // zero 4 bytes
48 ADD $4, R3 // bump ptr by 4
49 ADD $-4, R4
50 BR zero512xsetup // ptr should now be 8 byte aligned
51
52 under512:
53 MOVD R6, CTR // R6 = number of double words
54 SRDCC $2, R6, R7 // 32 byte chunks?
55 BNE zero32setup
56
57 // Clear double words
58
59 zero8:
60 MOVD R0, 0(R3) // double word
61 ADD $8, R3
62 ADD $-8, R4
63 BC 16, 0, zero8 // dec ctr, br zero8 if ctr not 0
64 BR nozerolarge // handle leftovers
65
66 // Prepare to clear 32 bytes at a time.
67
68 zero32setup:
69 DCBTST (R3) // prepare data cache
70 XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
71 MOVD R7, CTR // number of 32 byte chunks
72 MOVD $16, R8
73
74 zero32:
75 STXVD2X VS32, (R3+R0) // store 16 bytes
76 STXVD2X VS32, (R3+R8)
77 ADD $32, R3
78 ADD $-32, R4
79 BC 16, 0, zero32 // dec ctr, br zero32 if ctr not 0
80 RLDCLCC $61, R4, $3, R6 // remaining doublewords
81 BEQ nozerolarge
82 MOVD R6, CTR // set up the CTR for doublewords
83 BR zero8
84
85 nozerolarge:
86 ANDCC $7, R4, R5 // any remaining bytes
87 BC 4, 1, LR // ble lr
88
89 zerotail:
90 MOVD R5, CTR // set up to clear tail bytes
91
92 zerotailloop:
93 MOVB R0, 0(R3) // clear single bytes
94 ADD $1, R3
95 BC 16, 0, zerotailloop // dec ctr, br zerotailloop if ctr not 0
96 RET
97
98 zero512xsetup: // 512 chunk with extra needed
99 ANDCC $8, R3, R11 // 8 byte alignment?
100 BEQ zero512setup16
101 MOVD R0, 0(R3) // clear 8 bytes
102 ADD $8, R3 // update ptr to next 8
103 ADD $-8, R4 // dec count by 8
104
105 zero512setup16:
106 ANDCC $127, R3, R14 // < 128 byte alignment
107 BEQ zero512setup // handle 128 byte alignment
108 MOVD $128, R15
109 SUB R14, R15, R14 // find increment to 128 alignment
110 SRD $4, R14, R15 // number of 16 byte chunks
111
112 zero512presetup:
113 MOVD R15, CTR // loop counter of 16 bytes
114 XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
115
116 zero512preloop: // clear up to 128 alignment
117 STXVD2X VS32, (R3+R0) // clear 16 bytes
118 ADD $16, R3 // update ptr
119 ADD $-16, R4 // dec count
120 BC 16, 0, zero512preloop
121
122 zero512setup: // setup for dcbz loop
123 CMP R4, $512 // check if at least 512
124 BLT remain
125 SRD $9, R4, R8 // loop count for 512 chunks
126 MOVD R8, CTR // set up counter
127 MOVD $128, R9 // index regs for 128 bytes
128 MOVD $256, R10
129 MOVD $384, R11
130
131 zero512:
132 DCBZ (R3+R0) // clear first chunk
133 DCBZ (R3+R9) // clear second chunk
134 DCBZ (R3+R10) // clear third chunk
135 DCBZ (R3+R11) // clear fourth chunk
136 ADD $512, R3
137 ADD $-512, R4
138 BC 16, 0, zero512
139
140 remain:
141 CMP R4, $128 // check if 128 byte chunks left
142 BLT smaller
143 DCBZ (R3+R0) // clear 128
144 ADD $128, R3
145 ADD $-128, R4
146 BR remain
147
148 smaller:
149 ANDCC $127, R4, R7 // find leftovers
150 BEQ done
151 CMP R7, $64 // more than 64, do 32 at a time
152 BLT zero8setup // less than 64, do 8 at a time
153 SRD $5, R7, R7 // set up counter for 32
154 BR zero32setup
155
156 zero8setup:
157 SRDCC $3, R7, R7 // less than 8 bytes
158 BEQ nozerolarge
159 MOVD R7, CTR
160 BR zero8
161
162 done:
163 RET
View as plain text