Text file src/pkg/cmd/compile/internal/ssa/gen/S390X.rules
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Lowering arithmetic
6 (Add(64|Ptr) x y) -> (ADD x y)
7 (Add(32|16|8) x y) -> (ADDW x y)
8 (Add32F x y) -> (FADDS x y)
9 (Add64F x y) -> (FADD x y)
10
11 (Sub(64|Ptr) x y) -> (SUB x y)
12 (Sub(32|16|8) x y) -> (SUBW x y)
13 (Sub32F x y) -> (FSUBS x y)
14 (Sub64F x y) -> (FSUB x y)
15
16 (Mul64 x y) -> (MULLD x y)
17 (Mul(32|16|8) x y) -> (MULLW x y)
18 (Mul32F x y) -> (FMULS x y)
19 (Mul64F x y) -> (FMUL x y)
20
21 (Div32F x y) -> (FDIVS x y)
22 (Div64F x y) -> (FDIV x y)
23
24 (Div64 x y) -> (DIVD x y)
25 (Div64u x y) -> (DIVDU x y)
26 // DIVW/DIVWU has a 64-bit dividend and a 32-bit divisor,
27 // so a sign/zero extension of the dividend is required.
28 (Div32 x y) -> (DIVW (MOVWreg x) y)
29 (Div32u x y) -> (DIVWU (MOVWZreg x) y)
30 (Div16 x y) -> (DIVW (MOVHreg x) (MOVHreg y))
31 (Div16u x y) -> (DIVWU (MOVHZreg x) (MOVHZreg y))
32 (Div8 x y) -> (DIVW (MOVBreg x) (MOVBreg y))
33 (Div8u x y) -> (DIVWU (MOVBZreg x) (MOVBZreg y))
34
35 (Hmul(64|64u) x y) -> (MULH(D|DU) x y)
36 (Hmul32 x y) -> (SRDconst [32] (MULLD (MOVWreg x) (MOVWreg y)))
37 (Hmul32u x y) -> (SRDconst [32] (MULLD (MOVWZreg x) (MOVWZreg y)))
38
39 (Mod(64|64u) x y) -> (MOD(D|DU) x y)
40 // MODW/MODWU has a 64-bit dividend and a 32-bit divisor,
41 // so a sign/zero extension of the dividend is required.
42 (Mod32 x y) -> (MODW (MOVWreg x) y)
43 (Mod32u x y) -> (MODWU (MOVWZreg x) y)
44 (Mod16 x y) -> (MODW (MOVHreg x) (MOVHreg y))
45 (Mod16u x y) -> (MODWU (MOVHZreg x) (MOVHZreg y))
46 (Mod8 x y) -> (MODW (MOVBreg x) (MOVBreg y))
47 (Mod8u x y) -> (MODWU (MOVBZreg x) (MOVBZreg y))
48
49 // (x + y) / 2 with x>=y -> (x - y) / 2 + y
50 (Avg64u <t> x y) -> (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
51
52 (And64 x y) -> (AND x y)
53 (And(32|16|8) x y) -> (ANDW x y)
54
55 (Or64 x y) -> (OR x y)
56 (Or(32|16|8) x y) -> (ORW x y)
57
58 (Xor64 x y) -> (XOR x y)
59 (Xor(32|16|8) x y) -> (XORW x y)
60
61 (Neg64 x) -> (NEG x)
62 (Neg(32|16|8) x) -> (NEGW x)
63 (Neg32F x) -> (FNEGS x)
64 (Neg64F x) -> (FNEG x)
65
66 (Com64 x) -> (NOT x)
67 (Com(32|16|8) x) -> (NOTW x)
68 (NOT x) && true -> (XOR (MOVDconst [-1]) x)
69 (NOTW x) && true -> (XORWconst [-1] x)
70
71 // Lowering boolean ops
72 (AndB x y) -> (ANDW x y)
73 (OrB x y) -> (ORW x y)
74 (Not x) -> (XORWconst [1] x)
75
76 // Lowering pointer arithmetic
77 (OffPtr [off] ptr:(SP)) -> (MOVDaddr [off] ptr)
78 (OffPtr [off] ptr) && is32Bit(off) -> (ADDconst [off] ptr)
79 (OffPtr [off] ptr) -> (ADD (MOVDconst [off]) ptr)
80
81 // TODO: optimize these cases?
82 (Ctz64NonZero x) -> (Ctz64 x)
83 (Ctz32NonZero x) -> (Ctz32 x)
84
85 // Ctz(x) = 64 - findLeftmostOne((x-1)&^x)
86 (Ctz64 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (AND <t> (SUBconst <t> [1] x) (NOT <t> x))))
87 (Ctz32 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW <t> (SUBWconst <t> [1] x) (NOTW <t> x)))))
88
89 (BitLen64 x) -> (SUB (MOVDconst [64]) (FLOGR x))
90
91 // POPCNT treats the input register as a vector of 8 bytes, producing
92 // a population count for each individual byte. For inputs larger than
93 // a single byte we therefore need to sum the individual bytes produced
94 // by the POPCNT instruction. For example, the following instruction
95 // sequence could be used to calculate the population count of a 4-byte
96 // value:
97 //
98 // MOVD $0x12345678, R1 // R1=0x12345678 <-- input
99 // POPCNT R1, R2 // R2=0x02030404
100 // SRW $16, R2, R3 // R3=0x00000203
101 // ADDW R2, R3, R4 // R4=0x02030607
102 // SRW $8, R4, R5 // R5=0x00020306
103 // ADDW R4, R5, R6 // R6=0x0205090d
104 // MOVBZ R6, R7 // R7=0x0000000d <-- result is 13
105 //
106 (PopCount8 x) -> (POPCNT (MOVBZreg x))
107 (PopCount16 x) -> (MOVBZreg (SumBytes2 (POPCNT <typ.UInt16> x)))
108 (PopCount32 x) -> (MOVBZreg (SumBytes4 (POPCNT <typ.UInt32> x)))
109 (PopCount64 x) -> (MOVBZreg (SumBytes8 (POPCNT <typ.UInt64> x)))
110
111 // SumBytes{2,4,8} pseudo operations sum the values of the rightmost
112 // 2, 4 or 8 bytes respectively. The result is a single byte however
113 // other bytes might contain junk so a zero extension is required if
114 // the desired output type is larger than 1 byte.
115 (SumBytes2 x) -> (ADDW (SRWconst <typ.UInt8> x [8]) x)
116 (SumBytes4 x) -> (SumBytes2 (ADDW <typ.UInt16> (SRWconst <typ.UInt16> x [16]) x))
117 (SumBytes8 x) -> (SumBytes4 (ADDW <typ.UInt32> (SRDconst <typ.UInt32> x [32]) x))
118
119 (Bswap64 x) -> (MOVDBR x)
120 (Bswap32 x) -> (MOVWBR x)
121
122 // add with carry
123 (Select0 (Add64carry x y c))
124 -> (Select0 <typ.UInt64> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1]))))
125 (Select1 (Add64carry x y c))
126 -> (Select0 <typ.UInt64> (ADDE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1]))))))
127
128 // subtract with borrow
129 (Select0 (Sub64borrow x y c))
130 -> (Select0 <typ.UInt64> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c))))
131 (Select1 (Sub64borrow x y c))
132 -> (NEG (Select0 <typ.UInt64> (SUBE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c)))))))
133
134 // math package intrinsics
135 (Sqrt x) -> (FSQRT x)
136 (Floor x) -> (FIDBR [7] x)
137 (Ceil x) -> (FIDBR [6] x)
138 (Trunc x) -> (FIDBR [5] x)
139 (RoundToEven x) -> (FIDBR [4] x)
140 (Round x) -> (FIDBR [1] x)
141
142 // Atomic loads and stores.
143 // The SYNC instruction (fast-BCR-serialization) prevents store-load
144 // reordering. Other sequences of memory operations (load-load,
145 // store-store and load-store) are already guaranteed not to be reordered.
146 (AtomicLoad(8|32|Acq32|64|Ptr) ptr mem) -> (MOV(BZ|WZ|WZ|D|D)atomicload ptr mem)
147 (AtomicStore(32|64|PtrNoWB) ptr val mem) -> (SYNC (MOV(W|D|D)atomicstore ptr val mem))
148
149 // Store-release doesn't require store-load ordering.
150 (AtomicStoreRel32 ptr val mem) -> (MOVWatomicstore ptr val mem)
151
152 // Atomic adds.
153 (AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (LAA ptr val mem))
154 (AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (LAAG ptr val mem))
155 (Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDW val (Select0 <t> tuple))
156 (Select1 (AddTupleFirst32 _ tuple)) -> (Select1 tuple)
157 (Select0 <t> (AddTupleFirst64 val tuple)) -> (ADD val (Select0 <t> tuple))
158 (Select1 (AddTupleFirst64 _ tuple)) -> (Select1 tuple)
159
160 // Atomic exchanges.
161 (AtomicExchange32 ptr val mem) -> (LoweredAtomicExchange32 ptr val mem)
162 (AtomicExchange64 ptr val mem) -> (LoweredAtomicExchange64 ptr val mem)
163
164 // Atomic compare and swap.
165 (AtomicCompareAndSwap32 ptr old new_ mem) -> (LoweredAtomicCas32 ptr old new_ mem)
166 (AtomicCompareAndSwap64 ptr old new_ mem) -> (LoweredAtomicCas64 ptr old new_ mem)
167
168 // Lowering extension
169 // Note: we always extend to 64 bits even though some ops don't need that many result bits.
170 (SignExt8to(16|32|64) x) -> (MOVBreg x)
171 (SignExt16to(32|64) x) -> (MOVHreg x)
172 (SignExt32to64 x) -> (MOVWreg x)
173
174 (ZeroExt8to(16|32|64) x) -> (MOVBZreg x)
175 (ZeroExt16to(32|64) x) -> (MOVHZreg x)
176 (ZeroExt32to64 x) -> (MOVWZreg x)
177
178 (Slicemask <t> x) -> (SRADconst (NEG <t> x) [63])
179
180 // Lowering truncation
181 // Because we ignore high parts of registers, truncates are just copies.
182 (Trunc(16|32|64)to8 x) -> x
183 (Trunc(32|64)to16 x) -> x
184 (Trunc64to32 x) -> x
185
186 // Lowering float <-> int
187 (Cvt32to32F x) -> (CEFBRA x)
188 (Cvt32to64F x) -> (CDFBRA x)
189 (Cvt64to32F x) -> (CEGBRA x)
190 (Cvt64to64F x) -> (CDGBRA x)
191
192 (Cvt32Fto32 x) -> (CFEBRA x)
193 (Cvt32Fto64 x) -> (CGEBRA x)
194 (Cvt64Fto32 x) -> (CFDBRA x)
195 (Cvt64Fto64 x) -> (CGDBRA x)
196
197 (Cvt32Fto64F x) -> (LDEBR x)
198 (Cvt64Fto32F x) -> (LEDBR x)
199
200 (Round(32|64)F x) -> (LoweredRound(32|64)F x)
201
202 // Lowering shifts
203
204 // Lower bounded shifts first. No need to check shift value.
205 (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SLD x y)
206 (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SLW x y)
207 (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SLW x y)
208 (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SLW x y)
209 (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRD x y)
210 (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRW x y)
211 (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRW (MOVHZreg x) y)
212 (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRW (MOVBZreg x) y)
213 (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SRAD x y)
214 (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SRAW x y)
215 (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SRAW (MOVHreg x) y)
216 (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SRAW (MOVBreg x) y)
217
218 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
219 // result = shift >= 64 ? 0 : arg << shift
220 (Lsh(64|32|16|8)x64 <t> x y) -> (MOVDGE <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPUconst y [64]))
221 (Lsh(64|32|16|8)x32 <t> x y) -> (MOVDGE <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst y [64]))
222 (Lsh(64|32|16|8)x16 <t> x y) -> (MOVDGE <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64]))
223 (Lsh(64|32|16|8)x8 <t> x y) -> (MOVDGE <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64]))
224
225 (Rsh(64|32)Ux64 <t> x y) -> (MOVDGE <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPUconst y [64]))
226 (Rsh(64|32)Ux32 <t> x y) -> (MOVDGE <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst y [64]))
227 (Rsh(64|32)Ux16 <t> x y) -> (MOVDGE <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64]))
228 (Rsh(64|32)Ux8 <t> x y) -> (MOVDGE <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64]))
229
230 (Rsh(16|8)Ux64 <t> x y) -> (MOVDGE <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPUconst y [64]))
231 (Rsh(16|8)Ux32 <t> x y) -> (MOVDGE <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst y [64]))
232 (Rsh(16|8)Ux16 <t> x y) -> (MOVDGE <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64]))
233 (Rsh(16|8)Ux8 <t> x y) -> (MOVDGE <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64]))
234
235 // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value.
236 // We implement this by setting the shift value to 63 (all ones) if the shift value is more than 63.
237 // result = arg >> (shift >= 64 ? 63 : shift)
238 (Rsh(64|32)x64 x y) -> (SRA(D|W) x (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPUconst y [64])))
239 (Rsh(64|32)x32 x y) -> (SRA(D|W) x (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst y [64])))
240 (Rsh(64|32)x16 x y) -> (SRA(D|W) x (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVHZreg y) [64])))
241 (Rsh(64|32)x8 x y) -> (SRA(D|W) x (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVBZreg y) [64])))
242
243 (Rsh(16|8)x64 x y) -> (SRAW (MOV(H|B)reg x) (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPUconst y [64])))
244 (Rsh(16|8)x32 x y) -> (SRAW (MOV(H|B)reg x) (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst y [64])))
245 (Rsh(16|8)x16 x y) -> (SRAW (MOV(H|B)reg x) (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVHZreg y) [64])))
246 (Rsh(16|8)x8 x y) -> (SRAW (MOV(H|B)reg x) (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVBZreg y) [64])))
247
248 // Lowering rotates
249 (RotateLeft8 <t> x (MOVDconst [c])) -> (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
250 (RotateLeft16 <t> x (MOVDconst [c])) -> (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
251 (RotateLeft32 x y) -> (RLL x y)
252 (RotateLeft64 x y) -> (RLLG x y)
253
254 // Lowering comparisons
255 (Less64 x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
256 (Less32 x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
257 (Less(16|8) x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
258 (Less64U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
259 (Less32U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
260 (Less(16|8)U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
261 // Use SETG with reversed operands to dodge NaN case.
262 (Less64F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP y x))
263 (Less32F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS y x))
264
265 (Leq64 x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
266 (Leq32 x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
267 (Leq(16|8) x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
268 (Leq64U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
269 (Leq32U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
270 (Leq(16|8)U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
271 // Use SETGE with reversed operands to dodge NaN case.
272 (Leq64F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP y x))
273 (Leq32F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS y x))
274
275 (Greater64 x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
276 (Greater32 x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
277 (Greater(16|8) x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
278 (Greater64U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
279 (Greater32U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
280 (Greater(16|8)U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
281 (Greater64F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
282 (Greater32F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
283
284 (Geq64 x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
285 (Geq32 x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
286 (Geq(16|8) x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
287 (Geq64U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
288 (Geq32U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
289 (Geq(16|8)U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
290 (Geq64F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
291 (Geq32F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
292
293 (Eq(64|Ptr) x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
294 (Eq32 x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
295 (Eq(16|8|B) x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y)))
296 (Eq64F x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
297 (Eq32F x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
298
299 (Neq(64|Ptr) x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
300 (Neq32 x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
301 (Neq(16|8|B) x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y)))
302 (Neq64F x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
303 (Neq32F x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
304
305 // Lowering loads
306 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
307 (Load <t> ptr mem) && is32BitInt(t) && isSigned(t) -> (MOVWload ptr mem)
308 (Load <t> ptr mem) && is32BitInt(t) && !isSigned(t) -> (MOVWZload ptr mem)
309 (Load <t> ptr mem) && is16BitInt(t) && isSigned(t) -> (MOVHload ptr mem)
310 (Load <t> ptr mem) && is16BitInt(t) && !isSigned(t) -> (MOVHZload ptr mem)
311 (Load <t> ptr mem) && is8BitInt(t) && isSigned(t) -> (MOVBload ptr mem)
312 (Load <t> ptr mem) && (t.IsBoolean() || (is8BitInt(t) && !isSigned(t))) -> (MOVBZload ptr mem)
313 (Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
314 (Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
315
316 // Lowering stores
317 // These more-specific FP versions of Store pattern should come first.
318 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem)
319 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (FMOVSstore ptr val mem)
320
321 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVDstore ptr val mem)
322 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVWstore ptr val mem)
323 (Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVHstore ptr val mem)
324 (Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
325
326 // Lowering moves
327
328 // Load and store for small copies.
329 (Move [0] _ _ mem) -> mem
330 (Move [1] dst src mem) -> (MOVBstore dst (MOVBZload src mem) mem)
331 (Move [2] dst src mem) -> (MOVHstore dst (MOVHZload src mem) mem)
332 (Move [4] dst src mem) -> (MOVWstore dst (MOVWZload src mem) mem)
333 (Move [8] dst src mem) -> (MOVDstore dst (MOVDload src mem) mem)
334 (Move [16] dst src mem) ->
335 (MOVDstore [8] dst (MOVDload [8] src mem)
336 (MOVDstore dst (MOVDload src mem) mem))
337 (Move [24] dst src mem) ->
338 (MOVDstore [16] dst (MOVDload [16] src mem)
339 (MOVDstore [8] dst (MOVDload [8] src mem)
340 (MOVDstore dst (MOVDload src mem) mem)))
341 (Move [3] dst src mem) ->
342 (MOVBstore [2] dst (MOVBZload [2] src mem)
343 (MOVHstore dst (MOVHZload src mem) mem))
344 (Move [5] dst src mem) ->
345 (MOVBstore [4] dst (MOVBZload [4] src mem)
346 (MOVWstore dst (MOVWZload src mem) mem))
347 (Move [6] dst src mem) ->
348 (MOVHstore [4] dst (MOVHZload [4] src mem)
349 (MOVWstore dst (MOVWZload src mem) mem))
350 (Move [7] dst src mem) ->
351 (MOVBstore [6] dst (MOVBZload [6] src mem)
352 (MOVHstore [4] dst (MOVHZload [4] src mem)
353 (MOVWstore dst (MOVWZload src mem) mem)))
354
355 // MVC for other moves. Use up to 4 instructions (sizes up to 1024 bytes).
356 (Move [s] dst src mem) && s > 0 && s <= 256 ->
357 (MVC [makeValAndOff(s, 0)] dst src mem)
358 (Move [s] dst src mem) && s > 256 && s <= 512 ->
359 (MVC [makeValAndOff(s-256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem))
360 (Move [s] dst src mem) && s > 512 && s <= 768 ->
361 (MVC [makeValAndOff(s-512, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem)))
362 (Move [s] dst src mem) && s > 768 && s <= 1024 ->
363 (MVC [makeValAndOff(s-768, 768)] dst src (MVC [makeValAndOff(256, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem))))
364
365 // Move more than 1024 bytes using a loop.
366 (Move [s] dst src mem) && s > 1024 ->
367 (LoweredMove [s%256] dst src (ADD <src.Type> src (MOVDconst [(s/256)*256])) mem)
368
369 // Lowering Zero instructions
370 (Zero [0] _ mem) -> mem
371 (Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
372 (Zero [2] destptr mem) -> (MOVHstoreconst [0] destptr mem)
373 (Zero [4] destptr mem) -> (MOVWstoreconst [0] destptr mem)
374 (Zero [8] destptr mem) -> (MOVDstoreconst [0] destptr mem)
375 (Zero [3] destptr mem) ->
376 (MOVBstoreconst [makeValAndOff(0,2)] destptr
377 (MOVHstoreconst [0] destptr mem))
378 (Zero [5] destptr mem) ->
379 (MOVBstoreconst [makeValAndOff(0,4)] destptr
380 (MOVWstoreconst [0] destptr mem))
381 (Zero [6] destptr mem) ->
382 (MOVHstoreconst [makeValAndOff(0,4)] destptr
383 (MOVWstoreconst [0] destptr mem))
384 (Zero [7] destptr mem) ->
385 (MOVWstoreconst [makeValAndOff(0,3)] destptr
386 (MOVWstoreconst [0] destptr mem))
387
388 (Zero [s] destptr mem) && s > 0 && s <= 1024 ->
389 (CLEAR [makeValAndOff(s, 0)] destptr mem)
390
391 // Move more than 1024 bytes using a loop.
392 (Zero [s] destptr mem) && s > 1024 ->
393 (LoweredZero [s%256] destptr (ADDconst <destptr.Type> destptr [(s/256)*256]) mem)
394
395 // Lowering constants
396 (Const(64|32|16|8) [val]) -> (MOVDconst [val])
397 (Const(32|64)F [val]) -> (FMOV(S|D)const [val])
398 (ConstNil) -> (MOVDconst [0])
399 (ConstBool [b]) -> (MOVDconst [b])
400
401 // Lowering calls
402 (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
403 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
404 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
405
406 // Miscellaneous
407 (IsNonNil p) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPconst p [0]))
408 (IsInBounds idx len) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len))
409 (IsSliceInBounds idx len) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len))
410 (NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
411 (GetG mem) -> (LoweredGetG mem)
412 (GetClosurePtr) -> (LoweredGetClosurePtr)
413 (GetCallerSP) -> (LoweredGetCallerSP)
414 (GetCallerPC) -> (LoweredGetCallerPC)
415 (Addr {sym} base) -> (MOVDaddr {sym} base)
416 (LocalAddr {sym} base _) -> (MOVDaddr {sym} base)
417 (ITab (Load ptr mem)) -> (MOVDload ptr mem)
418
419 // block rewrites
420 (If (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (LT cmp yes no)
421 (If (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (LE cmp yes no)
422 (If (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GT cmp yes no)
423 (If (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GE cmp yes no)
424 (If (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (EQ cmp yes no)
425 (If (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (NE cmp yes no)
426
427 // Special case for floating point - LF/LEF not generated.
428 (If (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GTF cmp yes no)
429 (If (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GEF cmp yes no)
430
431 (If cond yes no) -> (NE (CMPWconst [0] (MOVBZreg <typ.Bool> cond)) yes no)
432
433 // Write barrier.
434 (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
435
436 (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
437 (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
438 (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
439
440 // ***************************
441 // Above: lowering rules
442 // Below: optimizations
443 // ***************************
444 // TODO: Should the optimizations be a separate pass?
445
446 // Fold unnecessary type conversions.
447 (MOVDreg <t> x) && t.Compare(x.Type) == types.CMPeq -> x
448 (MOVDnop <t> x) && t.Compare(x.Type) == types.CMPeq -> x
449
450 // Propagate constants through type conversions.
451 (MOVDreg (MOVDconst [c])) -> (MOVDconst [c])
452 (MOVDnop (MOVDconst [c])) -> (MOVDconst [c])
453
454 // If a register move has only 1 use, just use the same register without emitting instruction.
455 // MOVDnop doesn't emit instruction, only for ensuring the type.
456 (MOVDreg x) && x.Uses == 1 -> (MOVDnop x)
457
458 // Fold type changes into loads.
459 (MOVDreg <t> x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <t> [off] {sym} ptr mem)
460 (MOVDreg <t> x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <t> [off] {sym} ptr mem)
461 (MOVDreg <t> x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <t> [off] {sym} ptr mem)
462 (MOVDreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload <t> [off] {sym} ptr mem)
463 (MOVDreg <t> x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <t> [off] {sym} ptr mem)
464 (MOVDreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <t> [off] {sym} ptr mem)
465 (MOVDreg <t> x:(MOVDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDload <t> [off] {sym} ptr mem)
466
467 (MOVDnop <t> x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <t> [off] {sym} ptr mem)
468 (MOVDnop <t> x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <t> [off] {sym} ptr mem)
469 (MOVDnop <t> x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <t> [off] {sym} ptr mem)
470 (MOVDnop <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload <t> [off] {sym} ptr mem)
471 (MOVDnop <t> x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <t> [off] {sym} ptr mem)
472 (MOVDnop <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <t> [off] {sym} ptr mem)
473 (MOVDnop <t> x:(MOVDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDload <t> [off] {sym} ptr mem)
474
475 (MOVDreg <t> x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
476 (MOVDreg <t> x:(MOVBloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx <t> [off] {sym} ptr idx mem)
477 (MOVDreg <t> x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
478 (MOVDreg <t> x:(MOVHloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx <t> [off] {sym} ptr idx mem)
479 (MOVDreg <t> x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
480 (MOVDreg <t> x:(MOVWloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx <t> [off] {sym} ptr idx mem)
481 (MOVDreg <t> x:(MOVDloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDloadidx <t> [off] {sym} ptr idx mem)
482
483 (MOVDnop <t> x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
484 (MOVDnop <t> x:(MOVBloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx <t> [off] {sym} ptr idx mem)
485 (MOVDnop <t> x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
486 (MOVDnop <t> x:(MOVHloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx <t> [off] {sym} ptr idx mem)
487 (MOVDnop <t> x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
488 (MOVDnop <t> x:(MOVWloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx <t> [off] {sym} ptr idx mem)
489 (MOVDnop <t> x:(MOVDloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDloadidx <t> [off] {sym} ptr idx mem)
490
491 // Fold sign extensions into conditional moves of constants.
492 // Designed to remove the MOVBZreg inserted by the If lowering.
493 (MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
494 (MOVBZreg x:(MOVDLE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
495 (MOVBZreg x:(MOVDGT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
496 (MOVBZreg x:(MOVDGE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
497 (MOVBZreg x:(MOVDEQ (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
498 (MOVBZreg x:(MOVDNE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
499 (MOVBZreg x:(MOVDGTnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
500 (MOVBZreg x:(MOVDGEnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
501
502 // Fold boolean tests into blocks.
503 (NE (CMPWconst [0] (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LT cmp yes no)
504 (NE (CMPWconst [0] (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LE cmp yes no)
505 (NE (CMPWconst [0] (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GT cmp yes no)
506 (NE (CMPWconst [0] (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GE cmp yes no)
507 (NE (CMPWconst [0] (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (EQ cmp yes no)
508 (NE (CMPWconst [0] (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (NE cmp yes no)
509 (NE (CMPWconst [0] (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GTF cmp yes no)
510 (NE (CMPWconst [0] (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GEF cmp yes no)
511
512 // Fold constants into instructions.
513 (ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
514 (ADDW x (MOVDconst [c])) -> (ADDWconst [int64(int32(c))] x)
515
516 (SUB x (MOVDconst [c])) && is32Bit(c) -> (SUBconst x [c])
517 (SUB (MOVDconst [c]) x) && is32Bit(c) -> (NEG (SUBconst <v.Type> x [c]))
518 (SUBW x (MOVDconst [c])) -> (SUBWconst x [int64(int32(c))])
519 (SUBW (MOVDconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [int64(int32(c))]))
520
521 (MULLD x (MOVDconst [c])) && is32Bit(c) -> (MULLDconst [c] x)
522 (MULLW x (MOVDconst [c])) -> (MULLWconst [int64(int32(c))] x)
523
524 // NILF instructions leave the high 32 bits unchanged which is
525 // equivalent to the leftmost 32 bits being set.
526 // TODO(mundaym): modify the assembler to accept 64-bit values
527 // and use isU32Bit(^c).
528 (AND x (MOVDconst [c])) && is32Bit(c) && c < 0 -> (ANDconst [c] x)
529 (AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(int32(c))] x))
530 (ANDW x (MOVDconst [c])) -> (ANDWconst [int64(int32(c))] x)
531
532 (ANDWconst [c] (ANDWconst [d] x)) -> (ANDWconst [c & d] x)
533 (ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c & d] x)
534
535 (OR x (MOVDconst [c])) && isU32Bit(c) -> (ORconst [c] x)
536 (ORW x (MOVDconst [c])) -> (ORWconst [int64(int32(c))] x)
537
538 (XOR x (MOVDconst [c])) && isU32Bit(c) -> (XORconst [c] x)
539 (XORW x (MOVDconst [c])) -> (XORWconst [int64(int32(c))] x)
540
541 // Constant shifts.
542 (S(LD|RD|RAD|LW|RW|RAW) x (MOVDconst [c]))
543 -> (S(LD|RD|RAD|LW|RW|RAW)const x [c&63])
544
545 // Shifts only use the rightmost 6 bits of the shift value.
546 (S(LD|RD|RAD|LW|RW|RAW) x (AND (MOVDconst [c]) y))
547 -> (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst <typ.UInt32> [c&63] y))
548 (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [c] y)) && c&63 == 63
549 -> (S(LD|RD|RAD|LW|RW|RAW) x y)
550 (SLD x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SLD x y)
551 (SRD x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SRD x y)
552 (SRAD x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SRAD x y)
553 (SLW x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SLW x y)
554 (SRW x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SRW x y)
555 (SRAW x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SRAW x y)
556
557 // Constant rotate generation
558 (RLL x (MOVDconst [c])) -> (RLLconst x [c&31])
559 (RLLG x (MOVDconst [c])) -> (RLLGconst x [c&63])
560
561 (ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
562 ( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
563 (XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
564
565 (ADDW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
566 ( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
567 (XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
568
569 (CMP x (MOVDconst [c])) && is32Bit(c) -> (CMPconst x [c])
570 (CMP (MOVDconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPconst x [c]))
571 (CMPW x (MOVDconst [c])) -> (CMPWconst x [int64(int32(c))])
572 (CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int32(c))]))
573 (CMPU x (MOVDconst [c])) && isU32Bit(c) -> (CMPUconst x [int64(int32(c))])
574 (CMPU (MOVDconst [c]) x) && isU32Bit(c) -> (InvertFlags (CMPUconst x [int64(int32(c))]))
575 (CMPWU x (MOVDconst [c])) -> (CMPWUconst x [int64(int32(c))])
576 (CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(int32(c))]))
577
578 // Using MOV{W,H,B}Zreg instead of AND is cheaper.
579 (AND x (MOVDconst [0xFF])) -> (MOVBZreg x)
580 (AND x (MOVDconst [0xFFFF])) -> (MOVHZreg x)
581 (AND x (MOVDconst [0xFFFFFFFF])) -> (MOVWZreg x)
582 (ANDWconst [0xFF] x) -> (MOVBZreg x)
583 (ANDWconst [0xFFFF] x) -> (MOVHZreg x)
584
585 // strength reduction
586 (MULLDconst [-1] x) -> (NEG x)
587 (MULLDconst [0] _) -> (MOVDconst [0])
588 (MULLDconst [1] x) -> x
589 (MULLDconst [c] x) && isPowerOfTwo(c) -> (SLDconst [log2(c)] x)
590 (MULLDconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
591 (MULLDconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
592
593 (MULLWconst [-1] x) -> (NEGW x)
594 (MULLWconst [0] _) -> (MOVDconst [0])
595 (MULLWconst [1] x) -> x
596 (MULLWconst [c] x) && isPowerOfTwo(c) -> (SLWconst [log2(c)] x)
597 (MULLWconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
598 (MULLWconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
599
600 // Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them).
601 (ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
602 (ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(c+d) -> (MOVDaddr [c+d] {s} x)
603 (ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB && idx.Op != OpSB -> (MOVDaddridx [c] {s} ptr idx)
604
605 // fold ADDconst into MOVDaddrx
606 (ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(c+d) -> (MOVDaddridx [c+d] {s} x y)
607 (MOVDaddridx [c] {s} (ADDconst [d] x) y) && is20Bit(c+d) && x.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
608 (MOVDaddridx [c] {s} x (ADDconst [d] y)) && is20Bit(c+d) && y.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
609
610 // reverse ordering of compare instruction
611 (MOVDLT x y (InvertFlags cmp)) -> (MOVDGT x y cmp)
612 (MOVDGT x y (InvertFlags cmp)) -> (MOVDLT x y cmp)
613 (MOVDLE x y (InvertFlags cmp)) -> (MOVDGE x y cmp)
614 (MOVDGE x y (InvertFlags cmp)) -> (MOVDLE x y cmp)
615 (MOVDEQ x y (InvertFlags cmp)) -> (MOVDEQ x y cmp)
616 (MOVDNE x y (InvertFlags cmp)) -> (MOVDNE x y cmp)
617
618 // don't extend after proper load
619 (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
620 (MOVBZreg x:(MOVBZload _ _)) -> (MOVDreg x)
621 (MOVHreg x:(MOVBload _ _)) -> (MOVDreg x)
622 (MOVHreg x:(MOVBZload _ _)) -> (MOVDreg x)
623 (MOVHreg x:(MOVHload _ _)) -> (MOVDreg x)
624 (MOVHZreg x:(MOVBZload _ _)) -> (MOVDreg x)
625 (MOVHZreg x:(MOVHZload _ _)) -> (MOVDreg x)
626 (MOVWreg x:(MOVBload _ _)) -> (MOVDreg x)
627 (MOVWreg x:(MOVBZload _ _)) -> (MOVDreg x)
628 (MOVWreg x:(MOVHload _ _)) -> (MOVDreg x)
629 (MOVWreg x:(MOVHZload _ _)) -> (MOVDreg x)
630 (MOVWreg x:(MOVWload _ _)) -> (MOVDreg x)
631 (MOVWZreg x:(MOVBZload _ _)) -> (MOVDreg x)
632 (MOVWZreg x:(MOVHZload _ _)) -> (MOVDreg x)
633 (MOVWZreg x:(MOVWZload _ _)) -> (MOVDreg x)
634
635 // don't extend if argument is already extended
636 (MOVBreg x:(Arg <t>)) && is8BitInt(t) && isSigned(t) -> (MOVDreg x)
637 (MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !isSigned(t) -> (MOVDreg x)
638 (MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && isSigned(t) -> (MOVDreg x)
639 (MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !isSigned(t) -> (MOVDreg x)
640 (MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t) -> (MOVDreg x)
641 (MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t) -> (MOVDreg x)
642
643 // fold double extensions
644 (MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
645 (MOVBZreg x:(MOVBZreg _)) -> (MOVDreg x)
646 (MOVHreg x:(MOVBreg _)) -> (MOVDreg x)
647 (MOVHreg x:(MOVBZreg _)) -> (MOVDreg x)
648 (MOVHreg x:(MOVHreg _)) -> (MOVDreg x)
649 (MOVHZreg x:(MOVBZreg _)) -> (MOVDreg x)
650 (MOVHZreg x:(MOVHZreg _)) -> (MOVDreg x)
651 (MOVWreg x:(MOVBreg _)) -> (MOVDreg x)
652 (MOVWreg x:(MOVBZreg _)) -> (MOVDreg x)
653 (MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
654 (MOVWreg x:(MOVHZreg _)) -> (MOVDreg x)
655 (MOVWreg x:(MOVWreg _)) -> (MOVDreg x)
656 (MOVWZreg x:(MOVBZreg _)) -> (MOVDreg x)
657 (MOVWZreg x:(MOVHZreg _)) -> (MOVDreg x)
658 (MOVWZreg x:(MOVWZreg _)) -> (MOVDreg x)
659
660 (MOVBreg (MOVBZreg x)) -> (MOVBreg x)
661 (MOVBZreg (MOVBreg x)) -> (MOVBZreg x)
662 (MOVHreg (MOVHZreg x)) -> (MOVHreg x)
663 (MOVHZreg (MOVHreg x)) -> (MOVHZreg x)
664 (MOVWreg (MOVWZreg x)) -> (MOVWreg x)
665 (MOVWZreg (MOVWreg x)) -> (MOVWZreg x)
666
667 // fold extensions into constants
668 (MOVBreg (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
669 (MOVBZreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))])
670 (MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
671 (MOVHZreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
672 (MOVWreg (MOVDconst [c])) -> (MOVDconst [int64(int32(c))])
673 (MOVWZreg (MOVDconst [c])) -> (MOVDconst [int64(uint32(c))])
674
675 // sign extended loads
676 // Note: The combined instruction must end up in the same block
677 // as the original load. If not, we end up making a value with
678 // memory type live in two different blocks, which can lead to
679 // multiple memory values alive simultaneously.
680 // Make sure we don't combine these ops if the load has another use.
681 // This prevents a single load from being split into multiple loads
682 // which then might return different values. See test/atomicload.go.
683 (MOVBreg x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
684 (MOVBreg x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
685 (MOVBZreg x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <v.Type> [off] {sym} ptr mem)
686 (MOVBZreg x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <v.Type> [off] {sym} ptr mem)
687 (MOVHreg x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload <v.Type> [off] {sym} ptr mem)
688 (MOVHreg x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload <v.Type> [off] {sym} ptr mem)
689 (MOVHZreg x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
690 (MOVHZreg x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
691 (MOVWreg x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
692 (MOVWreg x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
693 (MOVWZreg x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
694 (MOVWZreg x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
695
696 (MOVBreg x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx <v.Type> [off] {sym} ptr idx mem)
697 (MOVBreg x:(MOVBloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx <v.Type> [off] {sym} ptr idx mem)
698 (MOVBZreg x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <v.Type> [off] {sym} ptr idx mem)
699 (MOVBZreg x:(MOVBloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <v.Type> [off] {sym} ptr idx mem)
700 (MOVHreg x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx <v.Type> [off] {sym} ptr idx mem)
701 (MOVHreg x:(MOVHloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx <v.Type> [off] {sym} ptr idx mem)
702 (MOVHZreg x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
703 (MOVHZreg x:(MOVHloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
704 (MOVWreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx <v.Type> [off] {sym} ptr idx mem)
705 (MOVWreg x:(MOVWloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx <v.Type> [off] {sym} ptr idx mem)
706 (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
707 (MOVWZreg x:(MOVWloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
708
709 // replace load from same location as preceding store with copy
710 (MOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVDreg x)
711 (MOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWreg x)
712 (MOVHload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHreg x)
713 (MOVBload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBreg x)
714 (MOVWZload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWZreg x)
715 (MOVHZload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHZreg x)
716 (MOVBZload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBZreg x)
717 (MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LGDR x)
718 (FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LDGR x)
719 (FMOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
720 (FMOVSload [off] {sym} ptr1 (FMOVSstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
721
722 // prefer FPR <-> GPR moves over combined load ops
723 (MULLDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (MULLD x (LGDR <t> y))
724 (ADDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (ADD x (LGDR <t> y))
725 (SUBload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (SUB x (LGDR <t> y))
726 (ORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (OR x (LGDR <t> y))
727 (ANDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (AND x (LGDR <t> y))
728 (XORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (XOR x (LGDR <t> y))
729
730 // detect attempts to set/clear the sign bit
731 // may need to be reworked when NIHH/OIHH are added
732 (SRDconst [1] (SLDconst [1] (LGDR <t> x))) -> (LGDR <t> (LPDFR <x.Type> x))
733 (LDGR <t> (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR <t> x))
734 (OR (MOVDconst [-1<<63]) (LGDR <t> x)) -> (LGDR <t> (LNDFR <x.Type> x))
735 (LDGR <t> (OR (MOVDconst [-1<<63]) x)) -> (LNDFR (LDGR <t> x))
736
737 // detect attempts to set the sign bit with load
738 (LDGR <t> x:(ORload <t1> [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (LNDFR <t> (LDGR <t> (MOVDload <t1> [off] {sym} ptr mem)))
739
740 // detect copysign
741 (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x))
742 (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
743 (CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y)
744 (CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y)
745
746 // absorb negations into set/clear sign bit
747 (FNEG (LPDFR x)) -> (LNDFR x)
748 (FNEG (LNDFR x)) -> (LPDFR x)
749 (FNEGS (LPDFR x)) -> (LNDFR x)
750 (FNEGS (LNDFR x)) -> (LPDFR x)
751
752 // no need to convert float32 to float64 to set/clear sign bit
753 (LEDBR (LPDFR (LDEBR x))) -> (LPDFR x)
754 (LEDBR (LNDFR (LDEBR x))) -> (LNDFR x)
755
756 // remove unnecessary FPR <-> GPR moves
757 (LDGR (LGDR x)) -> x
758 (LGDR (LDGR x)) -> (MOVDreg x)
759
760 // Don't extend before storing
761 (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
762 (MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
763 (MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
764 (MOVWstore [off] {sym} ptr (MOVWZreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
765 (MOVHstore [off] {sym} ptr (MOVHZreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
766 (MOVBstore [off] {sym} ptr (MOVBZreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
767
768 // Fold constants into memory operations.
769 // Note that this is not always a good idea because if not all the uses of
770 // the ADDconst get eliminated, we still have to compute the ADDconst and we now
771 // have potentially two live values (ptr and (ADDconst [off] ptr)) instead of one.
772 // Nevertheless, let's do it!
773 (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVDload [off1+off2] {sym} ptr mem)
774 (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWload [off1+off2] {sym} ptr mem)
775 (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHload [off1+off2] {sym} ptr mem)
776 (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem)
777 (MOVWZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWZload [off1+off2] {sym} ptr mem)
778 (MOVHZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHZload [off1+off2] {sym} ptr mem)
779 (MOVBZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBZload [off1+off2] {sym} ptr mem)
780 (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVSload [off1+off2] {sym} ptr mem)
781 (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVDload [off1+off2] {sym} ptr mem)
782
783 (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} ptr val mem)
784 (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem)
785 (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVHstore [off1+off2] {sym} ptr val mem)
786 (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem)
787 (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
788 (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
789
790 (ADDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDload [off1+off2] {sym} x ptr mem)
791 (ADDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDWload [off1+off2] {sym} x ptr mem)
792 (MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLDload [off1+off2] {sym} x ptr mem)
793 (MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLWload [off1+off2] {sym} x ptr mem)
794 (SUBload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBload [off1+off2] {sym} x ptr mem)
795 (SUBWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBWload [off1+off2] {sym} x ptr mem)
796
797 (ANDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDload [off1+off2] {sym} x ptr mem)
798 (ANDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDWload [off1+off2] {sym} x ptr mem)
799 (ORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORload [off1+off2] {sym} x ptr mem)
800 (ORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORWload [off1+off2] {sym} x ptr mem)
801 (XORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORload [off1+off2] {sym} x ptr mem)
802 (XORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORWload [off1+off2] {sym} x ptr mem)
803
804 // Fold constants into stores.
805 (MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
806 (MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
807 (MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
808 (MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
809 (MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && isU12Bit(off) && ptr.Op != OpSB ->
810 (MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
811 (MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && is20Bit(off) && ptr.Op != OpSB ->
812 (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
813
814 // Fold address offsets into constant stores.
815 (MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
816 (MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
817 (MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
818 (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
819 (MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
820 (MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
821 (MOVBstoreconst [sc] {s} (ADDconst [off] ptr) mem) && is20Bit(ValAndOff(sc).Off()+off) ->
822 (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
823
824 // Merge address calculations into loads and stores.
825 // Offsets from SB must not be merged into unaligned memory accesses because
826 // loads/stores using PC-relative addressing directly must be aligned to the
827 // size of the target.
828 (MOVDload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
829 (MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
830 (MOVWZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
831 (MOVWZload [off1+off2] {mergeSym(sym1,sym2)} base mem)
832 (MOVHZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
833 (MOVHZload [off1+off2] {mergeSym(sym1,sym2)} base mem)
834 (MOVBZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
835 (MOVBZload [off1+off2] {mergeSym(sym1,sym2)} base mem)
836 (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
837 (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
838 (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
839 (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
840
841 (MOVWload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
842 (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
843 (MOVHload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
844 (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
845 (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
846 (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
847
848 (MOVDstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
849 (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
850 (MOVWstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
851 (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
852 (MOVHstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
853 (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
854 (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
855 (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
856 (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
857 (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
858 (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
859 (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
860
861 (ADDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
862 (ADDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
863 (MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
864 (MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
865 (SUBload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
866 (SUBWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
867
868 (ANDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
869 (ANDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
870 (ORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
871 (ORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
872 (XORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
873 (XORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
874
875 // Cannot store constant to SB directly (no 'move relative long immediate' instructions).
876 (MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
877 (MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
878 (MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
879 (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
880 (MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
881 (MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
882 (MOVBstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
883 (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
884
885 // generating indexed loads and stores
886 (MOVBZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
887 (MOVBZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
888 (MOVBload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
889 (MOVBloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
890 (MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
891 (MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
892 (MOVHload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
893 (MOVHloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
894 (MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
895 (MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
896 (MOVWload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
897 (MOVWloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
898 (MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
899 (MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
900 (FMOVSload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
901 (FMOVSloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
902 (FMOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
903 (FMOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
904
905 (MOVBstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
906 (MOVBstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
907 (MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
908 (MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
909 (MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
910 (MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
911 (MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
912 (MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
913 (FMOVSstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
914 (FMOVSstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
915 (FMOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
916 (FMOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
917
918 (MOVBZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVBZloadidx [off] {sym} ptr idx mem)
919 (MOVBload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVBloadidx [off] {sym} ptr idx mem)
920 (MOVHZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVHZloadidx [off] {sym} ptr idx mem)
921 (MOVHload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVHloadidx [off] {sym} ptr idx mem)
922 (MOVWZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVWZloadidx [off] {sym} ptr idx mem)
923 (MOVWload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVWloadidx [off] {sym} ptr idx mem)
924 (MOVDload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVDloadidx [off] {sym} ptr idx mem)
925 (FMOVSload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (FMOVSloadidx [off] {sym} ptr idx mem)
926 (FMOVDload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (FMOVDloadidx [off] {sym} ptr idx mem)
927
928 (MOVBstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVBstoreidx [off] {sym} ptr idx val mem)
929 (MOVHstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVHstoreidx [off] {sym} ptr idx val mem)
930 (MOVWstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVWstoreidx [off] {sym} ptr idx val mem)
931 (MOVDstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVDstoreidx [off] {sym} ptr idx val mem)
932 (FMOVSstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (FMOVSstoreidx [off] {sym} ptr idx val mem)
933 (FMOVDstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (FMOVDstoreidx [off] {sym} ptr idx val mem)
934
935 // combine ADD into indexed loads and stores
936 (MOVBZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
937 (MOVBloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBloadidx [c+d] {sym} ptr idx mem)
938 (MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
939 (MOVHloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHloadidx [c+d] {sym} ptr idx mem)
940 (MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
941 (MOVWloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWloadidx [c+d] {sym} ptr idx mem)
942 (MOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVDloadidx [c+d] {sym} ptr idx mem)
943 (FMOVSloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
944 (FMOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
945
946 (MOVBstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVBstoreidx [c+d] {sym} ptr idx val mem)
947 (MOVHstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVHstoreidx [c+d] {sym} ptr idx val mem)
948 (MOVWstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVWstoreidx [c+d] {sym} ptr idx val mem)
949 (MOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVDstoreidx [c+d] {sym} ptr idx val mem)
950 (FMOVSstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
951 (FMOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
952
953 (MOVBZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
954 (MOVBloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBloadidx [c+d] {sym} ptr idx mem)
955 (MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
956 (MOVHloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHloadidx [c+d] {sym} ptr idx mem)
957 (MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
958 (MOVWloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWloadidx [c+d] {sym} ptr idx mem)
959 (MOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVDloadidx [c+d] {sym} ptr idx mem)
960 (FMOVSloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
961 (FMOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
962
963 (MOVBstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVBstoreidx [c+d] {sym} ptr idx val mem)
964 (MOVHstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVHstoreidx [c+d] {sym} ptr idx val mem)
965 (MOVWstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVWstoreidx [c+d] {sym} ptr idx val mem)
966 (MOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVDstoreidx [c+d] {sym} ptr idx val mem)
967 (FMOVSstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
968 (FMOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
969
970 // MOVDaddr into MOVDaddridx
971 (MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
972 (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
973 (MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
974 (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
975
976 // Absorb InvertFlags into branches.
977 ((LT|GT|LE|GE|EQ|NE) (InvertFlags cmp) yes no) -> ((GT|LT|GE|LE|EQ|NE) cmp yes no)
978
979 // Constant comparisons.
980 (CMPconst (MOVDconst [x]) [y]) && x==y -> (FlagEQ)
981 (CMPconst (MOVDconst [x]) [y]) && x<y -> (FlagLT)
982 (CMPconst (MOVDconst [x]) [y]) && x>y -> (FlagGT)
983 (CMPUconst (MOVDconst [x]) [y]) && uint64(x)==uint64(y) -> (FlagEQ)
984 (CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) -> (FlagLT)
985 (CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) -> (FlagGT)
986
987 (CMPWconst (MOVDconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
988 (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) -> (FlagLT)
989 (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) -> (FlagGT)
990 (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)==uint32(y) -> (FlagEQ)
991 (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)<uint32(y) -> (FlagLT)
992 (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)>uint32(y) -> (FlagGT)
993
994 (CMP(W|WU)const (MOVBZreg _) [c]) && 0xff < c -> (FlagLT)
995 (CMP(W|WU)const (MOVHZreg _) [c]) && 0xffff < c -> (FlagLT)
996
997 (CMPconst (SRDconst _ [c]) [n]) && c > 0 && n < 0 -> (FlagGT)
998 (CMPWconst (SRWconst _ [c]) [n]) && c > 0 && n < 0 -> (FlagGT)
999
1000 (CMPUconst (SRDconst _ [c]) [n]) && c > 0 && c < 64 && (1<<uint(64-c)) <= uint64(n) -> (FlagLT)
1001 (CMPWUconst (SRWconst _ [c]) [n]) && c > 0 && c < 32 && (1<<uint(32-c)) <= uint32(n) -> (FlagLT)
1002
1003 (CMPWconst (ANDWconst _ [m]) [n]) && int32(m) >= 0 && int32(m) < int32(n) -> (FlagLT)
1004 (CMPWUconst (ANDWconst _ [m]) [n]) && uint32(m) < uint32(n) -> (FlagLT)
1005
1006 // Convert 64-bit comparisons to 32-bit comparisons and signed comparisons
1007 // to unsigned comparisons.
1008 // Helps simplify constant comparison detection.
1009 (CM(P|PU)const (MOV(W|WZ)reg x) [c]) -> (CMP(W|WU)const x [c])
1010 (CM(P|P|PU|PU)const x:(MOV(H|HZ|H|HZ)reg _) [c]) -> (CMP(W|W|WU|WU)const x [c])
1011 (CM(P|P|PU|PU)const x:(MOV(B|BZ|B|BZ)reg _) [c]) -> (CMP(W|W|WU|WU)const x [c])
1012 (CMPconst (MOV(WZ|W)reg x:(ANDWconst [m] _)) [c]) && int32(m) >= 0 && c >= 0 -> (CMPWUconst x [c])
1013 (CMPUconst (MOV(WZ|W)reg x:(ANDWconst [m] _)) [c]) && int32(m) >= 0 -> (CMPWUconst x [c])
1014 (CMPconst x:(SRDconst _ [c]) [n]) && c > 0 && n >= 0 -> (CMPUconst x [n])
1015 (CMPWconst x:(SRWconst _ [c]) [n]) && c > 0 && n >= 0 -> (CMPWUconst x [n])
1016
1017 // Absorb sign and zero extensions into 32-bit comparisons.
1018 (CMP(W|W|WU|WU) x (MOV(W|WZ|W|WZ)reg y)) -> (CMP(W|W|WU|WU) x y)
1019 (CMP(W|W|WU|WU) (MOV(W|WZ|W|WZ)reg x) y) -> (CMP(W|W|WU|WU) x y)
1020 (CMP(W|W|WU|WU)const (MOV(W|WZ|W|WZ)reg x) [c]) -> (CMP(W|W|WU|WU)const x [c])
1021
1022 // Absorb flag constants into branches.
1023 (EQ (FlagEQ) yes no) -> (First nil yes no)
1024 (EQ (FlagLT) yes no) -> (First nil no yes)
1025 (EQ (FlagGT) yes no) -> (First nil no yes)
1026
1027 (NE (FlagEQ) yes no) -> (First nil no yes)
1028 (NE (FlagLT) yes no) -> (First nil yes no)
1029 (NE (FlagGT) yes no) -> (First nil yes no)
1030
1031 (LT (FlagEQ) yes no) -> (First nil no yes)
1032 (LT (FlagLT) yes no) -> (First nil yes no)
1033 (LT (FlagGT) yes no) -> (First nil no yes)
1034
1035 (LE (FlagEQ) yes no) -> (First nil yes no)
1036 (LE (FlagLT) yes no) -> (First nil yes no)
1037 (LE (FlagGT) yes no) -> (First nil no yes)
1038
1039 (GT (FlagEQ) yes no) -> (First nil no yes)
1040 (GT (FlagLT) yes no) -> (First nil no yes)
1041 (GT (FlagGT) yes no) -> (First nil yes no)
1042
1043 (GE (FlagEQ) yes no) -> (First nil yes no)
1044 (GE (FlagLT) yes no) -> (First nil no yes)
1045 (GE (FlagGT) yes no) -> (First nil yes no)
1046
1047 // Absorb flag constants into SETxx ops.
1048 (MOVDEQ _ x (FlagEQ)) -> x
1049 (MOVDEQ y _ (FlagLT)) -> y
1050 (MOVDEQ y _ (FlagGT)) -> y
1051
1052 (MOVDNE y _ (FlagEQ)) -> y
1053 (MOVDNE _ x (FlagLT)) -> x
1054 (MOVDNE _ x (FlagGT)) -> x
1055
1056 (MOVDLT y _ (FlagEQ)) -> y
1057 (MOVDLT _ x (FlagLT)) -> x
1058 (MOVDLT y _ (FlagGT)) -> y
1059
1060 (MOVDLE _ x (FlagEQ)) -> x
1061 (MOVDLE _ x (FlagLT)) -> x
1062 (MOVDLE y _ (FlagGT)) -> y
1063
1064 (MOVDGT y _ (FlagEQ)) -> y
1065 (MOVDGT y _ (FlagLT)) -> y
1066 (MOVDGT _ x (FlagGT)) -> x
1067
1068 (MOVDGE _ x (FlagEQ)) -> x
1069 (MOVDGE y _ (FlagLT)) -> y
1070 (MOVDGE _ x (FlagGT)) -> x
1071
1072 // Remove redundant *const ops
1073 (ADDconst [0] x) -> x
1074 (ADDWconst [c] x) && int32(c)==0 -> x
1075 (SUBconst [0] x) -> x
1076 (SUBWconst [c] x) && int32(c) == 0 -> x
1077 (ANDconst [0] _) -> (MOVDconst [0])
1078 (ANDWconst [c] _) && int32(c)==0 -> (MOVDconst [0])
1079 (ANDconst [-1] x) -> x
1080 (ANDWconst [c] x) && int32(c)==-1 -> x
1081 (ORconst [0] x) -> x
1082 (ORWconst [c] x) && int32(c)==0 -> x
1083 (ORconst [-1] _) -> (MOVDconst [-1])
1084 (ORWconst [c] _) && int32(c)==-1 -> (MOVDconst [-1])
1085 (XORconst [0] x) -> x
1086 (XORWconst [c] x) && int32(c)==0 -> x
1087
1088 // Convert constant subtracts to constant adds.
1089 (SUBconst [c] x) && c != -(1<<31) -> (ADDconst [-c] x)
1090 (SUBWconst [c] x) -> (ADDWconst [int64(int32(-c))] x)
1091
1092 // generic constant folding
1093 // TODO: more of this
1094 (ADDconst [c] (MOVDconst [d])) -> (MOVDconst [c+d])
1095 (ADDWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c+d))])
1096 (ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
1097 (ADDWconst [c] (ADDWconst [d] x)) -> (ADDWconst [int64(int32(c+d))] x)
1098 (SUBconst (MOVDconst [d]) [c]) -> (MOVDconst [d-c])
1099 (SUBconst (SUBconst x [d]) [c]) && is32Bit(-c-d) -> (ADDconst [-c-d] x)
1100 (SRADconst [c] (MOVDconst [d])) -> (MOVDconst [d>>uint64(c)])
1101 (SRAWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(d))>>uint64(c)])
1102 (NEG (MOVDconst [c])) -> (MOVDconst [-c])
1103 (NEGW (MOVDconst [c])) -> (MOVDconst [int64(int32(-c))])
1104 (MULLDconst [c] (MOVDconst [d])) -> (MOVDconst [c*d])
1105 (MULLWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c*d))])
1106 (AND (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c&d])
1107 (ANDconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
1108 (ANDWconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
1109 (OR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c|d])
1110 (ORconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
1111 (ORWconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
1112 (XOR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c^d])
1113 (XORconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
1114 (XORWconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
1115 (LoweredRound32F x:(FMOVSconst)) -> x
1116 (LoweredRound64F x:(FMOVDconst)) -> x
1117
1118 // generic simplifications
1119 // TODO: more of this
1120 (ADD x (NEG y)) -> (SUB x y)
1121 (ADDW x (NEGW y)) -> (SUBW x y)
1122 (SUB x x) -> (MOVDconst [0])
1123 (SUBW x x) -> (MOVDconst [0])
1124 (AND x x) -> x
1125 (ANDW x x) -> x
1126 (OR x x) -> x
1127 (ORW x x) -> x
1128 (XOR x x) -> (MOVDconst [0])
1129 (XORW x x) -> (MOVDconst [0])
1130 (NEG (ADDconst [c] (NEG x))) && c != -(1<<31) -> (ADDconst [-c] x)
1131 (MOVBZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
1132 (MOVHZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
1133 (MOVBreg (ANDWconst [m] x)) && int8(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
1134 (MOVHreg (ANDWconst [m] x)) && int16(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
1135
1136 // carry flag generation
1137 // (only constant fold carry of zero)
1138 (Select1 (ADDCconst (MOVDconst [c]) [d]))
1139 && uint64(c+d) >= uint64(c) && c+d == 0
1140 -> (FlagEQ)
1141 (Select1 (ADDCconst (MOVDconst [c]) [d]))
1142 && uint64(c+d) >= uint64(c) && c+d != 0
1143 -> (FlagLT)
1144
1145 // borrow flag generation
1146 // (only constant fold borrow of zero)
1147 (Select1 (SUBC (MOVDconst [c]) (MOVDconst [d])))
1148 && uint64(d) <= uint64(c) && c-d == 0
1149 -> (FlagGT)
1150 (Select1 (SUBC (MOVDconst [c]) (MOVDconst [d])))
1151 && uint64(d) <= uint64(c) && c-d != 0
1152 -> (FlagOV)
1153
1154 // add with carry
1155 (ADDE x y (FlagEQ)) -> (ADDC x y)
1156 (ADDE x y (FlagLT)) -> (ADDC x y)
1157 (ADDC x (MOVDconst [c])) && is16Bit(c) -> (ADDCconst x [c])
1158 (Select0 (ADDCconst (MOVDconst [c]) [d])) -> (MOVDconst [c+d])
1159
1160 // subtract with borrow
1161 (SUBE x y (FlagGT)) -> (SUBC x y)
1162 (SUBE x y (FlagOV)) -> (SUBC x y)
1163 (Select0 (SUBC (MOVDconst [c]) (MOVDconst [d]))) -> (MOVDconst [c-d])
1164
1165 // collapse carry chain
1166 (ADDE x y (Select1 (ADDCconst [-1] (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) c)))))
1167 -> (ADDE x y c)
1168
1169 // collapse borrow chain
1170 (SUBE x y (Select1 (SUBC (MOVDconst [0]) (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) c))))))
1171 -> (SUBE x y c)
1172
1173 // fused multiply-add
1174 (FADD (FMUL y z) x) -> (FMADD x y z)
1175 (FADDS (FMULS y z) x) -> (FMADDS x y z)
1176 (FSUB (FMUL y z) x) -> (FMSUB x y z)
1177 (FSUBS (FMULS y z) x) -> (FMSUBS x y z)
1178
1179 // Fold memory operations into operations.
1180 // Exclude global data (SB) because these instructions cannot handle relative addresses.
1181 // TODO(mundaym): use LARL in the assembler to handle SB?
1182 // TODO(mundaym): indexed versions of these?
1183 (ADD <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1184 -> (ADDload <t> [off] {sym} x ptr mem)
1185 (ADD <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1186 -> (ADDload <t> [off] {sym} x ptr mem)
1187 (ADDW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1188 -> (ADDWload <t> [off] {sym} x ptr mem)
1189 (ADDW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1190 -> (ADDWload <t> [off] {sym} x ptr mem)
1191 (ADDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1192 -> (ADDWload <t> [off] {sym} x ptr mem)
1193 (ADDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1194 -> (ADDWload <t> [off] {sym} x ptr mem)
1195 (MULLD <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1196 -> (MULLDload <t> [off] {sym} x ptr mem)
1197 (MULLD <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1198 -> (MULLDload <t> [off] {sym} x ptr mem)
1199 (MULLW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1200 -> (MULLWload <t> [off] {sym} x ptr mem)
1201 (MULLW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1202 -> (MULLWload <t> [off] {sym} x ptr mem)
1203 (MULLW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1204 -> (MULLWload <t> [off] {sym} x ptr mem)
1205 (MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1206 -> (MULLWload <t> [off] {sym} x ptr mem)
1207 (SUB <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1208 -> (SUBload <t> [off] {sym} x ptr mem)
1209 (SUBW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1210 -> (SUBWload <t> [off] {sym} x ptr mem)
1211 (SUBW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1212 -> (SUBWload <t> [off] {sym} x ptr mem)
1213 (AND <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1214 -> (ANDload <t> [off] {sym} x ptr mem)
1215 (AND <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1216 -> (ANDload <t> [off] {sym} x ptr mem)
1217 (ANDW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1218 -> (ANDWload <t> [off] {sym} x ptr mem)
1219 (ANDW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1220 -> (ANDWload <t> [off] {sym} x ptr mem)
1221 (ANDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1222 -> (ANDWload <t> [off] {sym} x ptr mem)
1223 (ANDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1224 -> (ANDWload <t> [off] {sym} x ptr mem)
1225 (OR <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1226 -> (ORload <t> [off] {sym} x ptr mem)
1227 (OR <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1228 -> (ORload <t> [off] {sym} x ptr mem)
1229 (ORW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1230 -> (ORWload <t> [off] {sym} x ptr mem)
1231 (ORW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1232 -> (ORWload <t> [off] {sym} x ptr mem)
1233 (ORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1234 -> (ORWload <t> [off] {sym} x ptr mem)
1235 (ORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1236 -> (ORWload <t> [off] {sym} x ptr mem)
1237 (XOR <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1238 -> (XORload <t> [off] {sym} x ptr mem)
1239 (XOR <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1240 -> (XORload <t> [off] {sym} x ptr mem)
1241 (XORW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1242 -> (XORWload <t> [off] {sym} x ptr mem)
1243 (XORW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1244 -> (XORWload <t> [off] {sym} x ptr mem)
1245 (XORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1246 -> (XORWload <t> [off] {sym} x ptr mem)
1247 (XORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
1248 -> (XORWload <t> [off] {sym} x ptr mem)
1249
1250 // Combine constant stores into larger (unaligned) stores.
1251 // Avoid SB because constant stores to relative offsets are
1252 // emulated by the assembler and also can't handle unaligned offsets.
1253 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
1254 && p.Op != OpSB
1255 && x.Uses == 1
1256 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
1257 && clobber(x)
1258 -> (MOVHstoreconst [makeValAndOff(ValAndOff(c).Val()&0xff | ValAndOff(a).Val()<<8, ValAndOff(a).Off())] {s} p mem)
1259 (MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
1260 && p.Op != OpSB
1261 && x.Uses == 1
1262 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
1263 && clobber(x)
1264 -> (MOVWstore [ValAndOff(a).Off()] {s} p (MOVDconst [int64(int32(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16))]) mem)
1265 (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
1266 && p.Op != OpSB
1267 && x.Uses == 1
1268 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
1269 && clobber(x)
1270 -> (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
1271
1272 // Combine stores into larger (unaligned) stores.
1273 // It doesn't work on global data (based on SB) because stores with relative addressing
1274 // require that the memory operand be aligned.
1275 (MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRDconst [8] w) mem))
1276 && p.Op != OpSB
1277 && x.Uses == 1
1278 && clobber(x)
1279 -> (MOVHstore [i-1] {s} p w mem)
1280 (MOVBstore [i] {s} p w0:(SRDconst [j] w) x:(MOVBstore [i-1] {s} p (SRDconst [j+8] w) mem))
1281 && p.Op != OpSB
1282 && x.Uses == 1
1283 && clobber(x)
1284 -> (MOVHstore [i-1] {s} p w0 mem)
1285 (MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
1286 && p.Op != OpSB
1287 && x.Uses == 1
1288 && clobber(x)
1289 -> (MOVHstore [i-1] {s} p w mem)
1290 (MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
1291 && p.Op != OpSB
1292 && x.Uses == 1
1293 && clobber(x)
1294 -> (MOVHstore [i-1] {s} p w0 mem)
1295 (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
1296 && p.Op != OpSB
1297 && x.Uses == 1
1298 && clobber(x)
1299 -> (MOVWstore [i-2] {s} p w mem)
1300 (MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
1301 && p.Op != OpSB
1302 && x.Uses == 1
1303 && clobber(x)
1304 -> (MOVWstore [i-2] {s} p w0 mem)
1305 (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
1306 && p.Op != OpSB
1307 && x.Uses == 1
1308 && clobber(x)
1309 -> (MOVWstore [i-2] {s} p w mem)
1310 (MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
1311 && p.Op != OpSB
1312 && x.Uses == 1
1313 && clobber(x)
1314 -> (MOVWstore [i-2] {s} p w0 mem)
1315 (MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
1316 && p.Op != OpSB
1317 && x.Uses == 1
1318 && clobber(x)
1319 -> (MOVDstore [i-4] {s} p w mem)
1320 (MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
1321 && p.Op != OpSB
1322 && x.Uses == 1
1323 && clobber(x)
1324 -> (MOVDstore [i-4] {s} p w0 mem)
1325
1326 (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
1327 && x.Uses == 1
1328 && clobber(x)
1329 -> (MOVHstoreidx [i-1] {s} p idx w mem)
1330 (MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
1331 && x.Uses == 1
1332 && clobber(x)
1333 -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
1334 (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
1335 && x.Uses == 1
1336 && clobber(x)
1337 -> (MOVHstoreidx [i-1] {s} p idx w mem)
1338 (MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
1339 && x.Uses == 1
1340 && clobber(x)
1341 -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
1342 (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
1343 && x.Uses == 1
1344 && clobber(x)
1345 -> (MOVWstoreidx [i-2] {s} p idx w mem)
1346 (MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
1347 && x.Uses == 1
1348 && clobber(x)
1349 -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
1350 (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
1351 && x.Uses == 1
1352 && clobber(x)
1353 -> (MOVWstoreidx [i-2] {s} p idx w mem)
1354 (MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
1355 && x.Uses == 1
1356 && clobber(x)
1357 -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
1358 (MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
1359 && x.Uses == 1
1360 && clobber(x)
1361 -> (MOVDstoreidx [i-4] {s} p idx w mem)
1362 (MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
1363 && x.Uses == 1
1364 && clobber(x)
1365 -> (MOVDstoreidx [i-4] {s} p idx w0 mem)
1366
1367 // Combine stores into larger (unaligned) stores with the bytes reversed (little endian).
1368 // Store-with-bytes-reversed instructions do not support relative memory addresses,
1369 // so these stores can't operate on global data (SB).
1370 (MOVBstore [i] {s} p (SRDconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
1371 && p.Op != OpSB
1372 && x.Uses == 1
1373 && clobber(x)
1374 -> (MOVHBRstore [i-1] {s} p w mem)
1375 (MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
1376 && p.Op != OpSB
1377 && x.Uses == 1
1378 && clobber(x)
1379 -> (MOVHBRstore [i-1] {s} p w0 mem)
1380 (MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
1381 && p.Op != OpSB
1382 && x.Uses == 1
1383 && clobber(x)
1384 -> (MOVHBRstore [i-1] {s} p w mem)
1385 (MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
1386 && p.Op != OpSB
1387 && x.Uses == 1
1388 && clobber(x)
1389 -> (MOVHBRstore [i-1] {s} p w0 mem)
1390 (MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
1391 && x.Uses == 1
1392 && clobber(x)
1393 -> (MOVWBRstore [i-2] {s} p w mem)
1394 (MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
1395 && x.Uses == 1
1396 && clobber(x)
1397 -> (MOVWBRstore [i-2] {s} p w0 mem)
1398 (MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
1399 && x.Uses == 1
1400 && clobber(x)
1401 -> (MOVWBRstore [i-2] {s} p w mem)
1402 (MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
1403 && x.Uses == 1
1404 && clobber(x)
1405 -> (MOVWBRstore [i-2] {s} p w0 mem)
1406 (MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
1407 && x.Uses == 1
1408 && clobber(x)
1409 -> (MOVDBRstore [i-4] {s} p w mem)
1410 (MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
1411 && x.Uses == 1
1412 && clobber(x)
1413 -> (MOVDBRstore [i-4] {s} p w0 mem)
1414
1415 (MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
1416 && x.Uses == 1
1417 && clobber(x)
1418 -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
1419 (MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
1420 && x.Uses == 1
1421 && clobber(x)
1422 -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
1423 (MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
1424 && x.Uses == 1
1425 && clobber(x)
1426 -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
1427 (MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
1428 && x.Uses == 1
1429 && clobber(x)
1430 -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
1431 (MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
1432 && x.Uses == 1
1433 && clobber(x)
1434 -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
1435 (MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
1436 && x.Uses == 1
1437 && clobber(x)
1438 -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
1439 (MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
1440 && x.Uses == 1
1441 && clobber(x)
1442 -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
1443 (MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
1444 && x.Uses == 1
1445 && clobber(x)
1446 -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
1447 (MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
1448 && x.Uses == 1
1449 && clobber(x)
1450 -> (MOVDBRstoreidx [i-4] {s} p idx w mem)
1451 (MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
1452 && x.Uses == 1
1453 && clobber(x)
1454 -> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
1455
1456 // Combining byte loads into larger (unaligned) loads.
1457
1458 // Big-endian loads
1459
1460 (ORW x1:(MOVBZload [i1] {s} p mem)
1461 sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
1462 && i1 == i0+1
1463 && p.Op != OpSB
1464 && x0.Uses == 1
1465 && x1.Uses == 1
1466 && sh.Uses == 1
1467 && mergePoint(b,x0,x1) != nil
1468 && clobber(x0)
1469 && clobber(x1)
1470 && clobber(sh)
1471 -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
1472
1473 (OR x1:(MOVBZload [i1] {s} p mem)
1474 sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
1475 && i1 == i0+1
1476 && p.Op != OpSB
1477 && x0.Uses == 1
1478 && x1.Uses == 1
1479 && sh.Uses == 1
1480 && mergePoint(b,x0,x1) != nil
1481 && clobber(x0)
1482 && clobber(x1)
1483 && clobber(sh)
1484 -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
1485
1486 (ORW x1:(MOVHZload [i1] {s} p mem)
1487 sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
1488 && i1 == i0+2
1489 && p.Op != OpSB
1490 && x0.Uses == 1
1491 && x1.Uses == 1
1492 && sh.Uses == 1
1493 && mergePoint(b,x0,x1) != nil
1494 && clobber(x0)
1495 && clobber(x1)
1496 && clobber(sh)
1497 -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
1498
1499 (OR x1:(MOVHZload [i1] {s} p mem)
1500 sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
1501 && i1 == i0+2
1502 && p.Op != OpSB
1503 && x0.Uses == 1
1504 && x1.Uses == 1
1505 && sh.Uses == 1
1506 && mergePoint(b,x0,x1) != nil
1507 && clobber(x0)
1508 && clobber(x1)
1509 && clobber(sh)
1510 -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
1511
1512 (OR x1:(MOVWZload [i1] {s} p mem)
1513 sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
1514 && i1 == i0+4
1515 && p.Op != OpSB
1516 && x0.Uses == 1
1517 && x1.Uses == 1
1518 && sh.Uses == 1
1519 && mergePoint(b,x0,x1) != nil
1520 && clobber(x0)
1521 && clobber(x1)
1522 && clobber(sh)
1523 -> @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
1524
1525 (ORW
1526 s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
1527 or:(ORW
1528 s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
1529 y))
1530 && i1 == i0+1
1531 && j1 == j0-8
1532 && j1 % 16 == 0
1533 && x0.Uses == 1
1534 && x1.Uses == 1
1535 && s0.Uses == 1
1536 && s1.Uses == 1
1537 && or.Uses == 1
1538 && mergePoint(b,x0,x1,y) != nil
1539 && clobber(x0)
1540 && clobber(x1)
1541 && clobber(s0)
1542 && clobber(s1)
1543 && clobber(or)
1544 -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
1545
1546 (OR
1547 s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
1548 or:(OR
1549 s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
1550 y))
1551 && i1 == i0+1
1552 && j1 == j0-8
1553 && j1 % 16 == 0
1554 && x0.Uses == 1
1555 && x1.Uses == 1
1556 && s0.Uses == 1
1557 && s1.Uses == 1
1558 && or.Uses == 1
1559 && mergePoint(b,x0,x1,y) != nil
1560 && clobber(x0)
1561 && clobber(x1)
1562 && clobber(s0)
1563 && clobber(s1)
1564 && clobber(or)
1565 -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
1566
1567 (OR
1568 s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
1569 or:(OR
1570 s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))
1571 y))
1572 && i1 == i0+2
1573 && j1 == j0-16
1574 && j1 % 32 == 0
1575 && x0.Uses == 1
1576 && x1.Uses == 1
1577 && s0.Uses == 1
1578 && s1.Uses == 1
1579 && or.Uses == 1
1580 && mergePoint(b,x0,x1,y) != nil
1581 && clobber(x0)
1582 && clobber(x1)
1583 && clobber(s0)
1584 && clobber(s1)
1585 && clobber(or)
1586 -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
1587
1588 // Big-endian indexed loads
1589
1590 (ORW x1:(MOVBZloadidx [i1] {s} p idx mem)
1591 sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
1592 && i1 == i0+1
1593 && p.Op != OpSB
1594 && x0.Uses == 1
1595 && x1.Uses == 1
1596 && sh.Uses == 1
1597 && mergePoint(b,x0,x1) != nil
1598 && clobber(x0)
1599 && clobber(x1)
1600 && clobber(sh)
1601 -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
1602
1603 (OR x1:(MOVBZloadidx [i1] {s} p idx mem)
1604 sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
1605 && i1 == i0+1
1606 && p.Op != OpSB
1607 && x0.Uses == 1
1608 && x1.Uses == 1
1609 && sh.Uses == 1
1610 && mergePoint(b,x0,x1) != nil
1611 && clobber(x0)
1612 && clobber(x1)
1613 && clobber(sh)
1614 -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
1615
1616 (ORW x1:(MOVHZloadidx [i1] {s} p idx mem)
1617 sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
1618 && i1 == i0+2
1619 && p.Op != OpSB
1620 && x0.Uses == 1
1621 && x1.Uses == 1
1622 && sh.Uses == 1
1623 && mergePoint(b,x0,x1) != nil
1624 && clobber(x0)
1625 && clobber(x1)
1626 && clobber(sh)
1627 -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
1628
1629 (OR x1:(MOVHZloadidx [i1] {s} p idx mem)
1630 sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
1631 && i1 == i0+2
1632 && p.Op != OpSB
1633 && x0.Uses == 1
1634 && x1.Uses == 1
1635 && sh.Uses == 1
1636 && mergePoint(b,x0,x1) != nil
1637 && clobber(x0)
1638 && clobber(x1)
1639 && clobber(sh)
1640 -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
1641
1642 (OR x1:(MOVWZloadidx [i1] {s} p idx mem)
1643 sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
1644 && i1 == i0+4
1645 && p.Op != OpSB
1646 && x0.Uses == 1
1647 && x1.Uses == 1
1648 && sh.Uses == 1
1649 && mergePoint(b,x0,x1) != nil
1650 && clobber(x0)
1651 && clobber(x1)
1652 && clobber(sh)
1653 -> @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
1654
1655 (ORW
1656 s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
1657 or:(ORW
1658 s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
1659 y))
1660 && i1 == i0+1
1661 && j1 == j0-8
1662 && j1 % 16 == 0
1663 && x0.Uses == 1
1664 && x1.Uses == 1
1665 && s0.Uses == 1
1666 && s1.Uses == 1
1667 && or.Uses == 1
1668 && mergePoint(b,x0,x1,y) != nil
1669 && clobber(x0)
1670 && clobber(x1)
1671 && clobber(s0)
1672 && clobber(s1)
1673 && clobber(or)
1674 -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
1675
1676 (OR
1677 s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
1678 or:(OR
1679 s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
1680 y))
1681 && i1 == i0+1
1682 && j1 == j0-8
1683 && j1 % 16 == 0
1684 && x0.Uses == 1
1685 && x1.Uses == 1
1686 && s0.Uses == 1
1687 && s1.Uses == 1
1688 && or.Uses == 1
1689 && mergePoint(b,x0,x1,y) != nil
1690 && clobber(x0)
1691 && clobber(x1)
1692 && clobber(s0)
1693 && clobber(s1)
1694 && clobber(or)
1695 -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
1696
1697 (OR
1698 s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))
1699 or:(OR
1700 s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))
1701 y))
1702 && i1 == i0+2
1703 && j1 == j0-16
1704 && j1 % 32 == 0
1705 && x0.Uses == 1
1706 && x1.Uses == 1
1707 && s0.Uses == 1
1708 && s1.Uses == 1
1709 && or.Uses == 1
1710 && mergePoint(b,x0,x1,y) != nil
1711 && clobber(x0)
1712 && clobber(x1)
1713 && clobber(s0)
1714 && clobber(s1)
1715 && clobber(or)
1716 -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
1717
1718 // Little-endian loads
1719
1720 (ORW x0:(MOVBZload [i0] {s} p mem)
1721 sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
1722 && p.Op != OpSB
1723 && i1 == i0+1
1724 && x0.Uses == 1
1725 && x1.Uses == 1
1726 && sh.Uses == 1
1727 && mergePoint(b,x0,x1) != nil
1728 && clobber(x0)
1729 && clobber(x1)
1730 && clobber(sh)
1731 -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
1732
1733 (OR x0:(MOVBZload [i0] {s} p mem)
1734 sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
1735 && p.Op != OpSB
1736 && i1 == i0+1
1737 && x0.Uses == 1
1738 && x1.Uses == 1
1739 && sh.Uses == 1
1740 && mergePoint(b,x0,x1) != nil
1741 && clobber(x0)
1742 && clobber(x1)
1743 && clobber(sh)
1744 -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
1745
1746 (ORW r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
1747 sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
1748 && i1 == i0+2
1749 && x0.Uses == 1
1750 && x1.Uses == 1
1751 && r0.Uses == 1
1752 && r1.Uses == 1
1753 && sh.Uses == 1
1754 && mergePoint(b,x0,x1) != nil
1755 && clobber(x0)
1756 && clobber(x1)
1757 && clobber(r0)
1758 && clobber(r1)
1759 && clobber(sh)
1760 -> @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
1761
1762 (OR r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
1763 sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
1764 && i1 == i0+2
1765 && x0.Uses == 1
1766 && x1.Uses == 1
1767 && r0.Uses == 1
1768 && r1.Uses == 1
1769 && sh.Uses == 1
1770 && mergePoint(b,x0,x1) != nil
1771 && clobber(x0)
1772 && clobber(x1)
1773 && clobber(r0)
1774 && clobber(r1)
1775 && clobber(sh)
1776 -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
1777
1778 (OR r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
1779 sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
1780 && i1 == i0+4
1781 && x0.Uses == 1
1782 && x1.Uses == 1
1783 && r0.Uses == 1
1784 && r1.Uses == 1
1785 && sh.Uses == 1
1786 && mergePoint(b,x0,x1) != nil
1787 && clobber(x0)
1788 && clobber(x1)
1789 && clobber(r0)
1790 && clobber(r1)
1791 && clobber(sh)
1792 -> @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
1793
1794 (ORW
1795 s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
1796 or:(ORW
1797 s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
1798 y))
1799 && p.Op != OpSB
1800 && i1 == i0+1
1801 && j1 == j0+8
1802 && j0 % 16 == 0
1803 && x0.Uses == 1
1804 && x1.Uses == 1
1805 && s0.Uses == 1
1806 && s1.Uses == 1
1807 && or.Uses == 1
1808 && mergePoint(b,x0,x1,y) != nil
1809 && clobber(x0)
1810 && clobber(x1)
1811 && clobber(s0)
1812 && clobber(s1)
1813 && clobber(or)
1814 -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
1815
1816 (OR
1817 s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
1818 or:(OR
1819 s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
1820 y))
1821 && p.Op != OpSB
1822 && i1 == i0+1
1823 && j1 == j0+8
1824 && j0 % 16 == 0
1825 && x0.Uses == 1
1826 && x1.Uses == 1
1827 && s0.Uses == 1
1828 && s1.Uses == 1
1829 && or.Uses == 1
1830 && mergePoint(b,x0,x1,y) != nil
1831 && clobber(x0)
1832 && clobber(x1)
1833 && clobber(s0)
1834 && clobber(s1)
1835 && clobber(or)
1836 -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
1837
1838 (OR
1839 s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
1840 or:(OR
1841 s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
1842 y))
1843 && i1 == i0+2
1844 && j1 == j0+16
1845 && j0 % 32 == 0
1846 && x0.Uses == 1
1847 && x1.Uses == 1
1848 && r0.Uses == 1
1849 && r1.Uses == 1
1850 && s0.Uses == 1
1851 && s1.Uses == 1
1852 && or.Uses == 1
1853 && mergePoint(b,x0,x1,y) != nil
1854 && clobber(x0)
1855 && clobber(x1)
1856 && clobber(r0)
1857 && clobber(r1)
1858 && clobber(s0)
1859 && clobber(s1)
1860 && clobber(or)
1861 -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
1862
1863 // Little-endian indexed loads
1864
1865 (ORW x0:(MOVBZloadidx [i0] {s} p idx mem)
1866 sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
1867 && p.Op != OpSB
1868 && i1 == i0+1
1869 && x0.Uses == 1
1870 && x1.Uses == 1
1871 && sh.Uses == 1
1872 && mergePoint(b,x0,x1) != nil
1873 && clobber(x0)
1874 && clobber(x1)
1875 && clobber(sh)
1876 -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
1877
1878 (OR x0:(MOVBZloadidx [i0] {s} p idx mem)
1879 sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
1880 && p.Op != OpSB
1881 && i1 == i0+1
1882 && x0.Uses == 1
1883 && x1.Uses == 1
1884 && sh.Uses == 1
1885 && mergePoint(b,x0,x1) != nil
1886 && clobber(x0)
1887 && clobber(x1)
1888 && clobber(sh)
1889 -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
1890
1891 (ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
1892 sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
1893 && i1 == i0+2
1894 && x0.Uses == 1
1895 && x1.Uses == 1
1896 && r0.Uses == 1
1897 && r1.Uses == 1
1898 && sh.Uses == 1
1899 && mergePoint(b,x0,x1) != nil
1900 && clobber(x0)
1901 && clobber(x1)
1902 && clobber(r0)
1903 && clobber(r1)
1904 && clobber(sh)
1905 -> @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
1906
1907 (OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
1908 sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
1909 && i1 == i0+2
1910 && x0.Uses == 1
1911 && x1.Uses == 1
1912 && r0.Uses == 1
1913 && r1.Uses == 1
1914 && sh.Uses == 1
1915 && mergePoint(b,x0,x1) != nil
1916 && clobber(x0)
1917 && clobber(x1)
1918 && clobber(r0)
1919 && clobber(r1)
1920 && clobber(sh)
1921 -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
1922
1923 (OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem))
1924 sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
1925 && i1 == i0+4
1926 && x0.Uses == 1
1927 && x1.Uses == 1
1928 && r0.Uses == 1
1929 && r1.Uses == 1
1930 && sh.Uses == 1
1931 && mergePoint(b,x0,x1) != nil
1932 && clobber(x0)
1933 && clobber(x1)
1934 && clobber(r0)
1935 && clobber(r1)
1936 && clobber(sh)
1937 -> @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
1938
1939 (ORW
1940 s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
1941 or:(ORW
1942 s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
1943 y))
1944 && p.Op != OpSB
1945 && i1 == i0+1
1946 && j1 == j0+8
1947 && j0 % 16 == 0
1948 && x0.Uses == 1
1949 && x1.Uses == 1
1950 && s0.Uses == 1
1951 && s1.Uses == 1
1952 && or.Uses == 1
1953 && mergePoint(b,x0,x1,y) != nil
1954 && clobber(x0)
1955 && clobber(x1)
1956 && clobber(s0)
1957 && clobber(s1)
1958 && clobber(or)
1959 -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
1960
1961 (OR
1962 s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
1963 or:(OR
1964 s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
1965 y))
1966 && p.Op != OpSB
1967 && i1 == i0+1
1968 && j1 == j0+8
1969 && j0 % 16 == 0
1970 && x0.Uses == 1
1971 && x1.Uses == 1
1972 && s0.Uses == 1
1973 && s1.Uses == 1
1974 && or.Uses == 1
1975 && mergePoint(b,x0,x1,y) != nil
1976 && clobber(x0)
1977 && clobber(x1)
1978 && clobber(s0)
1979 && clobber(s1)
1980 && clobber(or)
1981 -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
1982
1983 (OR
1984 s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))
1985 or:(OR
1986 s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
1987 y))
1988 && i1 == i0+2
1989 && j1 == j0+16
1990 && j0 % 32 == 0
1991 && x0.Uses == 1
1992 && x1.Uses == 1
1993 && r0.Uses == 1
1994 && r1.Uses == 1
1995 && s0.Uses == 1
1996 && s1.Uses == 1
1997 && or.Uses == 1
1998 && mergePoint(b,x0,x1,y) != nil
1999 && clobber(x0)
2000 && clobber(x1)
2001 && clobber(r0)
2002 && clobber(r1)
2003 && clobber(s0)
2004 && clobber(s1)
2005 && clobber(or)
2006 -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
2007
2008 // Combine stores into store multiples.
2009 // 32-bit
2010 (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
2011 && p.Op != OpSB
2012 && x.Uses == 1
2013 && is20Bit(i-4)
2014 && clobber(x)
2015 -> (STM2 [i-4] {s} p w0 w1 mem)
2016 (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
2017 && x.Uses == 1
2018 && is20Bit(i-8)
2019 && clobber(x)
2020 -> (STM3 [i-8] {s} p w0 w1 w2 mem)
2021 (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
2022 && x.Uses == 1
2023 && is20Bit(i-12)
2024 && clobber(x)
2025 -> (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
2026 (STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem))
2027 && x.Uses == 1
2028 && is20Bit(i-8)
2029 && clobber(x)
2030 -> (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
2031 // 64-bit
2032 (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
2033 && p.Op != OpSB
2034 && x.Uses == 1
2035 && is20Bit(i-8)
2036 && clobber(x)
2037 -> (STMG2 [i-8] {s} p w0 w1 mem)
2038 (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
2039 && x.Uses == 1
2040 && is20Bit(i-16)
2041 && clobber(x)
2042 -> (STMG3 [i-16] {s} p w0 w1 w2 mem)
2043 (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
2044 && x.Uses == 1
2045 && is20Bit(i-24)
2046 && clobber(x)
2047 -> (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
2048 (STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem))
2049 && x.Uses == 1
2050 && is20Bit(i-16)
2051 && clobber(x)
2052 -> (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
2053
2054 // Convert 32-bit store multiples into 64-bit stores.
2055 (STM2 [i] {s} p (SRDconst [32] x) x mem) -> (MOVDstore [i] {s} p x mem)
View as plain text