Text file src/runtime/memmove_arm.s
1 // Inferno's libkern/memmove-arm.s
2 // https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-arm.s
3 //
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
6 // Portions Copyright 2009 The Go Authors. All rights reserved.
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 // THE SOFTWARE.
25
26 #include "textflag.h"
27
28 // TE or TS are spilled to the stack during bulk register moves.
29 #define TS R0
30 #define TE R8
31
32 // Warning: the linker will use R11 to synthesize certain instructions. Please
33 // take care and double check with objdump.
34 #define FROM R11
35 #define N R12
36 #define TMP R12 /* N and TMP don't overlap */
37 #define TMP1 R5
38
39 #define RSHIFT R5
40 #define LSHIFT R6
41 #define OFFSET R7
42
43 #define BR0 R0 /* shared with TS */
44 #define BW0 R1
45 #define BR1 R1
46 #define BW1 R2
47 #define BR2 R2
48 #define BW2 R3
49 #define BR3 R3
50 #define BW3 R4
51
52 #define FW0 R1
53 #define FR0 R2
54 #define FW1 R2
55 #define FR1 R3
56 #define FW2 R3
57 #define FR2 R4
58 #define FW3 R4
59 #define FR3 R8 /* shared with TE */
60
61 // func memmove(to, from unsafe.Pointer, n uintptr)
62 TEXT runtime·memmove(SB), NOSPLIT, $4-12
63 _memmove:
64 MOVW to+0(FP), TS
65 MOVW from+4(FP), FROM
66 MOVW n+8(FP), N
67
68 ADD N, TS, TE /* to end pointer */
69
70 CMP FROM, TS
71 BLS _forward
72
73 _back:
74 ADD N, FROM /* from end pointer */
75 CMP $4, N /* need at least 4 bytes to copy */
76 BLT _b1tail
77
78 _b4align: /* align destination on 4 */
79 AND.S $3, TE, TMP
80 BEQ _b4aligned
81
82 MOVBU.W -1(FROM), TMP /* pre-indexed */
83 MOVBU.W TMP, -1(TE) /* pre-indexed */
84 B _b4align
85
86 _b4aligned: /* is source now aligned? */
87 AND.S $3, FROM, TMP
88 BNE _bunaligned
89
90 ADD $31, TS, TMP /* do 32-byte chunks if possible */
91 MOVW TS, savedts-4(SP)
92 _b32loop:
93 CMP TMP, TE
94 BLS _b4tail
95
96 MOVM.DB.W (FROM), [R0-R7]
97 MOVM.DB.W [R0-R7], (TE)
98 B _b32loop
99
100 _b4tail: /* do remaining words if possible */
101 MOVW savedts-4(SP), TS
102 ADD $3, TS, TMP
103 _b4loop:
104 CMP TMP, TE
105 BLS _b1tail
106
107 MOVW.W -4(FROM), TMP1 /* pre-indexed */
108 MOVW.W TMP1, -4(TE) /* pre-indexed */
109 B _b4loop
110
111 _b1tail: /* remaining bytes */
112 CMP TE, TS
113 BEQ _return
114
115 MOVBU.W -1(FROM), TMP /* pre-indexed */
116 MOVBU.W TMP, -1(TE) /* pre-indexed */
117 B _b1tail
118
119 _forward:
120 CMP $4, N /* need at least 4 bytes to copy */
121 BLT _f1tail
122
123 _f4align: /* align destination on 4 */
124 AND.S $3, TS, TMP
125 BEQ _f4aligned
126
127 MOVBU.P 1(FROM), TMP /* implicit write back */
128 MOVBU.P TMP, 1(TS) /* implicit write back */
129 B _f4align
130
131 _f4aligned: /* is source now aligned? */
132 AND.S $3, FROM, TMP
133 BNE _funaligned
134
135 SUB $31, TE, TMP /* do 32-byte chunks if possible */
136 MOVW TE, savedte-4(SP)
137 _f32loop:
138 CMP TMP, TS
139 BHS _f4tail
140
141 MOVM.IA.W (FROM), [R1-R8]
142 MOVM.IA.W [R1-R8], (TS)
143 B _f32loop
144
145 _f4tail:
146 MOVW savedte-4(SP), TE
147 SUB $3, TE, TMP /* do remaining words if possible */
148 _f4loop:
149 CMP TMP, TS
150 BHS _f1tail
151
152 MOVW.P 4(FROM), TMP1 /* implicit write back */
153 MOVW.P TMP1, 4(TS) /* implicit write back */
154 B _f4loop
155
156 _f1tail:
157 CMP TS, TE
158 BEQ _return
159
160 MOVBU.P 1(FROM), TMP /* implicit write back */
161 MOVBU.P TMP, 1(TS) /* implicit write back */
162 B _f1tail
163
164 _return:
165 MOVW to+0(FP), R0
166 RET
167
168 _bunaligned:
169 CMP $2, TMP /* is TMP < 2 ? */
170
171 MOVW.LT $8, RSHIFT /* (R(n)<<24)|(R(n-1)>>8) */
172 MOVW.LT $24, LSHIFT
173 MOVW.LT $1, OFFSET
174
175 MOVW.EQ $16, RSHIFT /* (R(n)<<16)|(R(n-1)>>16) */
176 MOVW.EQ $16, LSHIFT
177 MOVW.EQ $2, OFFSET
178
179 MOVW.GT $24, RSHIFT /* (R(n)<<8)|(R(n-1)>>24) */
180 MOVW.GT $8, LSHIFT
181 MOVW.GT $3, OFFSET
182
183 ADD $16, TS, TMP /* do 16-byte chunks if possible */
184 CMP TMP, TE
185 BLS _b1tail
186
187 BIC $3, FROM /* align source */
188 MOVW TS, savedts-4(SP)
189 MOVW (FROM), BR0 /* prime first block register */
190
191 _bu16loop:
192 CMP TMP, TE
193 BLS _bu1tail
194
195 MOVW BR0<<LSHIFT, BW3
196 MOVM.DB.W (FROM), [BR0-BR3]
197 ORR BR3>>RSHIFT, BW3
198
199 MOVW BR3<<LSHIFT, BW2
200 ORR BR2>>RSHIFT, BW2
201
202 MOVW BR2<<LSHIFT, BW1
203 ORR BR1>>RSHIFT, BW1
204
205 MOVW BR1<<LSHIFT, BW0
206 ORR BR0>>RSHIFT, BW0
207
208 MOVM.DB.W [BW0-BW3], (TE)
209 B _bu16loop
210
211 _bu1tail:
212 MOVW savedts-4(SP), TS
213 ADD OFFSET, FROM
214 B _b1tail
215
216 _funaligned:
217 CMP $2, TMP
218
219 MOVW.LT $8, RSHIFT /* (R(n+1)<<24)|(R(n)>>8) */
220 MOVW.LT $24, LSHIFT
221 MOVW.LT $3, OFFSET
222
223 MOVW.EQ $16, RSHIFT /* (R(n+1)<<16)|(R(n)>>16) */
224 MOVW.EQ $16, LSHIFT
225 MOVW.EQ $2, OFFSET
226
227 MOVW.GT $24, RSHIFT /* (R(n+1)<<8)|(R(n)>>24) */
228 MOVW.GT $8, LSHIFT
229 MOVW.GT $1, OFFSET
230
231 SUB $16, TE, TMP /* do 16-byte chunks if possible */
232 CMP TMP, TS
233 BHS _f1tail
234
235 BIC $3, FROM /* align source */
236 MOVW TE, savedte-4(SP)
237 MOVW.P 4(FROM), FR3 /* prime last block register, implicit write back */
238
239 _fu16loop:
240 CMP TMP, TS
241 BHS _fu1tail
242
243 MOVW FR3>>RSHIFT, FW0
244 MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
245 ORR FR0<<LSHIFT, FW0
246
247 MOVW FR0>>RSHIFT, FW1
248 ORR FR1<<LSHIFT, FW1
249
250 MOVW FR1>>RSHIFT, FW2
251 ORR FR2<<LSHIFT, FW2
252
253 MOVW FR2>>RSHIFT, FW3
254 ORR FR3<<LSHIFT, FW3
255
256 MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
257 B _fu16loop
258
259 _fu1tail:
260 MOVW savedte-4(SP), TE
261 SUB OFFSET, FROM
262 B _f1tail
View as plain text