Text file src/pkg/runtime/memmove_arm64.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // func memmove(to, from unsafe.Pointer, n uintptr)
8 TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
9 MOVD to+0(FP), R3
10 MOVD from+8(FP), R4
11 MOVD n+16(FP), R5
12 CBNZ R5, check
13 RET
14
15 check:
16 CMP $16, R5
17 BLE copy16
18
19 AND $~31, R5, R7 // R7 is N&~31
20 SUB R7, R5, R6 // R6 is N&31
21
22 CMP R3, R4
23 BLT backward
24
25 // Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes.
26 // R3 and R4 are advanced as we copy.
27
28 // (There may be implementations of armv8 where copying by bytes until
29 // at least one of source or dest is word aligned is a worthwhile
30 // optimization, but the on the one tested so far (xgene) it did not
31 // make a significance difference.)
32
33 CBZ R7, noforwardlarge // Do we need to do any quadword copying?
34
35 ADD R3, R7, R9 // R9 points just past where we copy by word
36
37 forwardlargeloop:
38 // Copy 32 bytes at a time.
39 LDP.P 32(R4), (R8, R10)
40 STP.P (R8, R10), 32(R3)
41 LDP -16(R4), (R11, R12)
42 STP (R11, R12), -16(R3)
43 SUB $32, R7, R7
44 CBNZ R7, forwardlargeloop
45
46 noforwardlarge:
47 CBNZ R6, forwardtail // Do we need to copy any tail bytes?
48 RET
49
50 forwardtail:
51 // There are R6 <= 31 bytes remaining to copy.
52 // This is large enough to still contain pointers,
53 // which must be copied atomically.
54 // Copy the next 16 bytes, then 8 bytes, then any remaining bytes.
55 TBZ $4, R6, 3(PC) // write 16 bytes if R6&16 != 0
56 LDP.P 16(R4), (R8, R10)
57 STP.P (R8, R10), 16(R3)
58
59 TBZ $3, R6, 3(PC) // write 8 bytes if R6&8 != 0
60 MOVD.P 8(R4), R8
61 MOVD.P R8, 8(R3)
62
63 AND $7, R6
64 CBNZ R6, 2(PC)
65 RET
66
67 ADD R3, R6, R9 // R9 points just past the destination memory
68
69 forwardtailloop:
70 MOVBU.P 1(R4), R8
71 MOVBU.P R8, 1(R3)
72 CMP R3, R9
73 BNE forwardtailloop
74 RET
75
76 // Small copies: 1..16 bytes.
77 copy16:
78 ADD R4, R5, R8 // R8 points just past the last source byte
79 ADD R3, R5, R9 // R9 points just past the last destination byte
80 CMP $8, R5
81 BLT copy7
82 MOVD (R4), R6
83 MOVD -8(R8), R7
84 MOVD R6, (R3)
85 MOVD R7, -8(R9)
86 RET
87
88 copy7:
89 TBZ $2, R5, copy3
90 MOVWU (R4), R6
91 MOVWU -4(R8), R7
92 MOVW R6, (R3)
93 MOVW R7, -4(R9)
94 RET
95
96 copy3:
97 TBZ $1, R5, copy1
98 MOVHU (R4), R6
99 MOVHU -2(R8), R7
100 MOVH R6, (R3)
101 MOVH R7, -2(R9)
102 RET
103
104 copy1:
105 MOVBU (R4), R6
106 MOVB R6, (R3)
107 RET
108
109 backward:
110 // Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords.
111 // R3 and R4 are advanced to the end of the destination/source buffers
112 // respectively and moved back as we copy.
113
114 ADD R4, R5, R4 // R4 points just past the last source byte
115 ADD R3, R5, R3 // R3 points just past the last destination byte
116
117 CBZ R6, nobackwardtail // Do we need to do any byte-by-byte copying?
118
119 AND $7, R6, R12
120 CBZ R12, backwardtaillarge
121
122 SUB R12, R3, R9 // R9 points at the lowest destination byte that should be copied by byte.
123 backwardtailloop:
124 // Copy sub-pointer-size tail.
125 MOVBU.W -1(R4), R8
126 MOVBU.W R8, -1(R3)
127 CMP R9, R3
128 BNE backwardtailloop
129
130 backwardtaillarge:
131 // Do 8/16-byte write if possible.
132 // See comment at forwardtail.
133 TBZ $3, R6, 3(PC)
134 MOVD.W -8(R4), R8
135 MOVD.W R8, -8(R3)
136
137 TBZ $4, R6, 3(PC)
138 LDP.W -16(R4), (R8, R10)
139 STP.W (R8, R10), -16(R3)
140
141 nobackwardtail:
142 CBNZ R7, backwardlarge // Do we need to do any doubleword-by-doubleword copying?
143 RET
144
145 backwardlarge:
146 SUB R7, R3, R9 // R9 points at the lowest destination byte
147
148 backwardlargeloop:
149 LDP -16(R4), (R8, R10)
150 STP (R8, R10), -16(R3)
151 LDP.W -32(R4), (R11, R12)
152 STP.W (R11, R12), -32(R3)
153 CMP R9, R3
154 BNE backwardlargeloop
155 RET
View as plain text