Text file src/pkg/runtime/memmove_ppc64x.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build ppc64 ppc64le
6
7 #include "textflag.h"
8
9 // func memmove(to, from unsafe.Pointer, n uintptr)
10 TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
11 MOVD to+0(FP), R3
12 MOVD from+8(FP), R4
13 MOVD n+16(FP), R5
14
15 // Determine if there are doublewords to
16 // copy so a more efficient move can be done
17 check:
18 ANDCC $7, R5, R7 // R7: bytes to copy
19 SRD $3, R5, R6 // R6: double words to copy
20 CMP R6, $0, CR1 // CR1[EQ] set if no double words to copy
21
22 // Determine overlap by subtracting dest - src and comparing against the
23 // length. The catches the cases where src and dest are in different types
24 // of storage such as stack and static to avoid doing backward move when not
25 // necessary.
26
27 SUB R4, R3, R8 // dest - src
28 CMPU R8, R5, CR2 // < len?
29 BC 12, 8, backward // BLT CR2 backward
30
31 // Copying forward if no overlap.
32
33 BC 12, 6, noforwardlarge // "BEQ CR1, noforwardlarge"
34 SRDCC $2,R6,R8 // 32 byte chunks?
35 BNE forward32setup //
36 MOVD R6,CTR // R6 = number of double words
37
38 // Move double words
39
40 forward8:
41 MOVD 0(R4), R8 // double word
42 ADD $8,R4
43 MOVD R8, 0(R3) //
44 ADD $8,R3
45 BC 16, 0, forward8
46 BR noforwardlarge // handle remainder
47
48 // Prepare for moves of 32 bytes at a time.
49
50 forward32setup:
51 DCBTST (R3) // prepare data cache
52 DCBT (R4)
53 MOVD R8, CTR // double work count
54 MOVD $16, R8
55
56 forward32:
57 LXVD2X (R4+R0), VS32 // load 16 bytes
58 LXVD2X (R4+R8), VS33
59 ADD $32, R4
60 STXVD2X VS32, (R3+R0) // store 16 bytes
61 STXVD2X VS33, (R3+R8)
62 ADD $32,R3 // bump up for next set
63 BC 16, 0, forward32 // continue
64 RLDCLCC $61,R5,$3,R6 // remaining doublewords
65 BEQ noforwardlarge
66 MOVD R6,CTR // set up the CTR
67 BR forward8
68
69 noforwardlarge:
70 CMP R7,$0 // any remaining bytes
71 BC 4, 1, LR // ble lr
72
73 forwardtail:
74 MOVD R7, CTR // move tail bytes
75
76 forwardtailloop:
77 MOVBZ 0(R4), R8 // move single bytes
78 ADD $1,R4
79 MOVBZ R8, 0(R3)
80 ADD $1,R3
81 BC 16, 0, forwardtailloop
82 RET
83
84 backward:
85 // Copying backwards proceeds by copying R7 bytes then copying R6 double words.
86 // R3 and R4 are advanced to the end of the destination/source buffers
87 // respectively and moved back as we copy.
88
89 ADD R5, R4, R4 // end of source
90 ADD R3, R5, R3 // end of dest
91
92 BEQ nobackwardtail // earlier condition
93
94 MOVD R7, CTR // bytes to move
95
96 backwardtailloop:
97 MOVBZ -1(R4), R8 // point to last byte
98 SUB $1,R4
99 MOVBZ R8, -1(R3)
100 SUB $1,R3
101 BC 16, 0, backwardtailloop // bndz
102
103 nobackwardtail:
104 BC 4, 5, LR // ble CR1 lr
105
106 backwardlarge:
107 MOVD R6, CTR
108 SUB R3, R4, R9 // Use vsx if moving
109 CMP R9, $32 // at least 32 byte chunks
110 BLT backwardlargeloop // and distance >= 32
111 SRDCC $2,R6,R8 // 32 byte chunks
112 BNE backward32setup
113
114 backwardlargeloop:
115 MOVD -8(R4), R8
116 SUB $8,R4
117 MOVD R8, -8(R3)
118 SUB $8,R3
119 BC 16, 0, backwardlargeloop // bndz
120 RET
121
122 backward32setup:
123 MOVD R8, CTR // set up loop ctr
124 MOVD $16, R8 // 32 bytes at at time
125
126 backward32loop:
127 SUB $32, R4
128 SUB $32, R3
129 LXVD2X (R4+R0), VS32 // load 16 bytes
130 LXVD2X (R4+R8), VS33
131 STXVD2X VS32, (R3+R0) // store 16 bytes
132 STXVD2X VS33, (R3+R8)
133 BC 16, 0, backward32loop // bndz
134 BC 4, 5, LR // ble CR1 lr
135 MOVD R6, CTR
136 BR backwardlargeloop
View as plain text