Text file src/pkg/cmd/compile/internal/ssa/gen/AMD64.rules
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Lowering arithmetic
6 (Add(64|32|16|8) x y) -> (ADD(Q|L|L|L) x y)
7 (AddPtr x y) && config.PtrSize == 8 -> (ADDQ x y)
8 (AddPtr x y) && config.PtrSize == 4 -> (ADDL x y)
9 (Add(32|64)F x y) -> (ADDS(S|D) x y)
10
11 (Sub(64|32|16|8) x y) -> (SUB(Q|L|L|L) x y)
12 (SubPtr x y) && config.PtrSize == 8 -> (SUBQ x y)
13 (SubPtr x y) && config.PtrSize == 4 -> (SUBL x y)
14 (Sub(32|64)F x y) -> (SUBS(S|D) x y)
15
16 (Mul(64|32|16|8) x y) -> (MUL(Q|L|L|L) x y)
17 (Mul(32|64)F x y) -> (MULS(S|D) x y)
18
19 (Select0 (Mul64uover x y)) -> (Select0 <typ.UInt64> (MULQU x y))
20 (Select0 (Mul32uover x y)) -> (Select0 <typ.UInt32> (MULLU x y))
21 (Select1 (Mul(64|32)uover x y)) -> (SETO (Select1 <types.TypeFlags> (MUL(Q|L)U x y)))
22
23 (Hmul(64|32) x y) -> (HMUL(Q|L) x y)
24 (Hmul(64|32)u x y) -> (HMUL(Q|L)U x y)
25
26 (Div(64|32|16) [a] x y) -> (Select0 (DIV(Q|L|W) [a] x y))
27 (Div8 x y) -> (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
28 (Div(64|32|16)u x y) -> (Select0 (DIV(Q|L|W)U x y))
29 (Div8u x y) -> (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
30 (Div(32|64)F x y) -> (DIVS(S|D) x y)
31
32 (Select0 (Add64carry x y c)) ->
33 (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
34 (Select1 (Add64carry x y c)) ->
35 (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
36 (Select0 (Sub64borrow x y c)) ->
37 (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
38 (Select1 (Sub64borrow x y c)) ->
39 (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
40
41 // Optimize ADCQ and friends
42 (ADCQ x (MOVQconst [c]) carry) && is32Bit(c) -> (ADCQconst x [c] carry)
43 (ADCQ x y (FlagEQ)) -> (ADDQcarry x y)
44 (ADCQconst x [c] (FlagEQ)) -> (ADDQconstcarry x [c])
45 (ADDQcarry x (MOVQconst [c])) && is32Bit(c) -> (ADDQconstcarry x [c])
46 (SBBQ x (MOVQconst [c]) borrow) && is32Bit(c) -> (SBBQconst x [c] borrow)
47 (SBBQ x y (FlagEQ)) -> (SUBQborrow x y)
48 (SBBQconst x [c] (FlagEQ)) -> (SUBQconstborrow x [c])
49 (SUBQborrow x (MOVQconst [c])) && is32Bit(c) -> (SUBQconstborrow x [c])
50 (Select1 (NEGLflags (MOVQconst [0]))) -> (FlagEQ)
51 (Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) -> x
52
53
54 (Mul64uhilo x y) -> (MULQU2 x y)
55 (Div128u xhi xlo y) -> (DIVQU2 xhi xlo y)
56
57 (Avg64u x y) -> (AVGQU x y)
58
59 (Mod(64|32|16) [a] x y) -> (Select1 (DIV(Q|L|W) [a] x y))
60 (Mod8 x y) -> (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
61 (Mod(64|32|16)u x y) -> (Select1 (DIV(Q|L|W)U x y))
62 (Mod8u x y) -> (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
63
64 (And(64|32|16|8) x y) -> (AND(Q|L|L|L) x y)
65 (Or(64|32|16|8) x y) -> (OR(Q|L|L|L) x y)
66 (Xor(64|32|16|8) x y) -> (XOR(Q|L|L|L) x y)
67 (Com(64|32|16|8) x) -> (NOT(Q|L|L|L) x)
68
69 (Neg(64|32|16|8) x) -> (NEG(Q|L|L|L) x)
70 (Neg32F x) -> (PXOR x (MOVSSconst <typ.Float32> [auxFrom32F(float32(math.Copysign(0, -1)))]))
71 (Neg64F x) -> (PXOR x (MOVSDconst <typ.Float64> [auxFrom64F(math.Copysign(0, -1))]))
72
73 // Lowering boolean ops
74 (AndB x y) -> (ANDL x y)
75 (OrB x y) -> (ORL x y)
76 (Not x) -> (XORLconst [1] x)
77
78 // Lowering pointer arithmetic
79 (OffPtr [off] ptr) && config.PtrSize == 8 && is32Bit(off) -> (ADDQconst [off] ptr)
80 (OffPtr [off] ptr) && config.PtrSize == 8 -> (ADDQ (MOVQconst [off]) ptr)
81 (OffPtr [off] ptr) && config.PtrSize == 4 -> (ADDLconst [off] ptr)
82
83 // Lowering other arithmetic
84 (Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
85 (Ctz32 x) -> (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
86 (Ctz16 x) -> (BSFL (BTSLconst <typ.UInt32> [16] x))
87 (Ctz8 x) -> (BSFL (BTSLconst <typ.UInt32> [ 8] x))
88
89 (Ctz64NonZero x) -> (Select0 (BSFQ x))
90 (Ctz32NonZero x) -> (BSFL x)
91 (Ctz16NonZero x) -> (BSFL x)
92 (Ctz8NonZero x) -> (BSFL x)
93
94 // BitLen64 of a 64 bit value x requires checking whether x == 0, since BSRQ is undefined when x == 0.
95 // However, for zero-extended values, we can cheat a bit, and calculate
96 // BSR(x<<1 + 1), which is guaranteed to be non-zero, and which conveniently
97 // places the index of the highest set bit where we want it.
98 (BitLen64 <t> x) -> (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
99 (BitLen32 x) -> (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x))))
100 (BitLen16 x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x)))
101 (BitLen8 x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x)))
102
103 (Bswap(64|32) x) -> (BSWAP(Q|L) x)
104
105 (PopCount64 x) -> (POPCNTQ x)
106 (PopCount32 x) -> (POPCNTL x)
107 (PopCount16 x) -> (POPCNTL (MOVWQZX <typ.UInt32> x))
108 (PopCount8 x) -> (POPCNTL (MOVBQZX <typ.UInt32> x))
109
110 (Sqrt x) -> (SQRTSD x)
111
112 (RoundToEven x) -> (ROUNDSD [0] x)
113 (Floor x) -> (ROUNDSD [1] x)
114 (Ceil x) -> (ROUNDSD [2] x)
115 (Trunc x) -> (ROUNDSD [3] x)
116
117 // Lowering extension
118 // Note: we always extend to 64 bits even though some ops don't need that many result bits.
119 (SignExt8to16 x) -> (MOVBQSX x)
120 (SignExt8to32 x) -> (MOVBQSX x)
121 (SignExt8to64 x) -> (MOVBQSX x)
122 (SignExt16to32 x) -> (MOVWQSX x)
123 (SignExt16to64 x) -> (MOVWQSX x)
124 (SignExt32to64 x) -> (MOVLQSX x)
125
126 (ZeroExt8to16 x) -> (MOVBQZX x)
127 (ZeroExt8to32 x) -> (MOVBQZX x)
128 (ZeroExt8to64 x) -> (MOVBQZX x)
129 (ZeroExt16to32 x) -> (MOVWQZX x)
130 (ZeroExt16to64 x) -> (MOVWQZX x)
131 (ZeroExt32to64 x) -> (MOVLQZX x)
132
133 (Slicemask <t> x) -> (SARQconst (NEGQ <t> x) [63])
134
135 // Lowering truncation
136 // Because we ignore high parts of registers, truncates are just copies.
137 (Trunc16to8 x) -> x
138 (Trunc32to8 x) -> x
139 (Trunc32to16 x) -> x
140 (Trunc64to8 x) -> x
141 (Trunc64to16 x) -> x
142 (Trunc64to32 x) -> x
143
144 // Lowering float <-> int
145 (Cvt32to32F x) -> (CVTSL2SS x)
146 (Cvt32to64F x) -> (CVTSL2SD x)
147 (Cvt64to32F x) -> (CVTSQ2SS x)
148 (Cvt64to64F x) -> (CVTSQ2SD x)
149
150 (Cvt32Fto32 x) -> (CVTTSS2SL x)
151 (Cvt32Fto64 x) -> (CVTTSS2SQ x)
152 (Cvt64Fto32 x) -> (CVTTSD2SL x)
153 (Cvt64Fto64 x) -> (CVTTSD2SQ x)
154
155 (Cvt32Fto64F x) -> (CVTSS2SD x)
156 (Cvt64Fto32F x) -> (CVTSD2SS x)
157
158 (Round(32|64)F x) -> x
159
160 // Lowering shifts
161 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
162 // result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff)
163 (Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
164 (Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
165 (Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
166 (Lsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
167
168 (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLQ x y)
169 (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y)
170 (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y)
171 (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y)
172
173 (Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
174 (Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
175 (Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16])))
176 (Rsh8Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8])))
177
178 (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRQ x y)
179 (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRL x y)
180 (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRW x y)
181 (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRB x y)
182
183 // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value.
184 // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width.
185 (Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64])))))
186 (Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32])))))
187 (Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16])))))
188 (Rsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8])))))
189
190 (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARQ x y)
191 (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARL x y)
192 (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARW x y)
193 (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARB x y)
194
195 // Lowering comparisons
196 (Less(64|32|16|8) x y) -> (SETL (CMP(Q|L|W|B) x y))
197 (Less(64|32|16|8)U x y) -> (SETB (CMP(Q|L|W|B) x y))
198 // Use SETGF with reversed operands to dodge NaN case
199 (Less(32|64)F x y) -> (SETGF (UCOMIS(S|D) y x))
200
201 (Leq(64|32|16|8) x y) -> (SETLE (CMP(Q|L|W|B) x y))
202 (Leq(64|32|16|8)U x y) -> (SETBE (CMP(Q|L|W|B) x y))
203 // Use SETGEF with reversed operands to dodge NaN case
204 (Leq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) y x))
205
206 (Greater(64|32|16|8) x y) -> (SETG (CMP(Q|L|W|B) x y))
207 (Greater(64|32|16|8)U x y) -> (SETA (CMP(Q|L|W|B) x y))
208 // Note Go assembler gets UCOMISx operand order wrong, but it is right here
209 // Bug is accommodated at generation of assembly language.
210 (Greater(32|64)F x y) -> (SETGF (UCOMIS(S|D) x y))
211
212 (Geq(64|32|16|8) x y) -> (SETGE (CMP(Q|L|W|B) x y))
213 (Geq(64|32|16|8)U x y) -> (SETAE (CMP(Q|L|W|B) x y))
214 // Note Go assembler gets UCOMISx operand order wrong, but it is right here
215 // Bug is accommodated at generation of assembly language.
216 (Geq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) x y))
217
218 (Eq(64|32|16|8|B) x y) -> (SETEQ (CMP(Q|L|W|B|B) x y))
219 (EqPtr x y) && config.PtrSize == 8 -> (SETEQ (CMPQ x y))
220 (EqPtr x y) && config.PtrSize == 4 -> (SETEQ (CMPL x y))
221 (Eq(32|64)F x y) -> (SETEQF (UCOMIS(S|D) x y))
222
223 (Neq(64|32|16|8|B) x y) -> (SETNE (CMP(Q|L|W|B|B) x y))
224 (NeqPtr x y) && config.PtrSize == 8 -> (SETNE (CMPQ x y))
225 (NeqPtr x y) && config.PtrSize == 4 -> (SETNE (CMPL x y))
226 (Neq(32|64)F x y) -> (SETNEF (UCOMIS(S|D) x y))
227
228 (Int64Hi x) -> (SHRQconst [32] x) // needed for amd64p32
229 (Int64Lo x) -> x
230
231 // Lowering loads
232 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t) && config.PtrSize == 8) -> (MOVQload ptr mem)
233 (Load <t> ptr mem) && (is32BitInt(t) || isPtr(t) && config.PtrSize == 4) -> (MOVLload ptr mem)
234 (Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem)
235 (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem)
236 (Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem)
237 (Load <t> ptr mem) && is64BitFloat(t) -> (MOVSDload ptr mem)
238
239 // Lowering stores
240 // These more-specific FP versions of Store pattern should come first.
241 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (MOVSDstore ptr val mem)
242 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (MOVSSstore ptr val mem)
243
244 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVQstore ptr val mem)
245 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVLstore ptr val mem)
246 (Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVWstore ptr val mem)
247 (Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
248
249 // Lowering moves
250 (Move [0] _ _ mem) -> mem
251 (Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem)
252 (Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem)
253 (Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem)
254 (Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem)
255 (Move [16] dst src mem) && config.useSSE -> (MOVOstore dst (MOVOload src mem) mem)
256 (Move [16] dst src mem) && !config.useSSE ->
257 (MOVQstore [8] dst (MOVQload [8] src mem)
258 (MOVQstore dst (MOVQload src mem) mem))
259
260 (Move [32] dst src mem) ->
261 (Move [16]
262 (OffPtr <dst.Type> dst [16])
263 (OffPtr <src.Type> src [16])
264 (Move [16] dst src mem))
265
266 (Move [48] dst src mem) && config.useSSE ->
267 (Move [32]
268 (OffPtr <dst.Type> dst [16])
269 (OffPtr <src.Type> src [16])
270 (Move [16] dst src mem))
271
272 (Move [64] dst src mem) && config.useSSE ->
273 (Move [32]
274 (OffPtr <dst.Type> dst [32])
275 (OffPtr <src.Type> src [32])
276 (Move [32] dst src mem))
277
278 (Move [3] dst src mem) ->
279 (MOVBstore [2] dst (MOVBload [2] src mem)
280 (MOVWstore dst (MOVWload src mem) mem))
281 (Move [5] dst src mem) ->
282 (MOVBstore [4] dst (MOVBload [4] src mem)
283 (MOVLstore dst (MOVLload src mem) mem))
284 (Move [6] dst src mem) ->
285 (MOVWstore [4] dst (MOVWload [4] src mem)
286 (MOVLstore dst (MOVLload src mem) mem))
287 (Move [7] dst src mem) ->
288 (MOVLstore [3] dst (MOVLload [3] src mem)
289 (MOVLstore dst (MOVLload src mem) mem))
290 (Move [9] dst src mem) ->
291 (MOVBstore [8] dst (MOVBload [8] src mem)
292 (MOVQstore dst (MOVQload src mem) mem))
293 (Move [10] dst src mem) ->
294 (MOVWstore [8] dst (MOVWload [8] src mem)
295 (MOVQstore dst (MOVQload src mem) mem))
296 (Move [12] dst src mem) ->
297 (MOVLstore [8] dst (MOVLload [8] src mem)
298 (MOVQstore dst (MOVQload src mem) mem))
299 (Move [s] dst src mem) && s == 11 || s >= 13 && s <= 15 ->
300 (MOVQstore [s-8] dst (MOVQload [s-8] src mem)
301 (MOVQstore dst (MOVQload src mem) mem))
302
303 // Adjust moves to be a multiple of 16 bytes.
304 (Move [s] dst src mem)
305 && s > 16 && s%16 != 0 && s%16 <= 8 ->
306 (Move [s-s%16]
307 (OffPtr <dst.Type> dst [s%16])
308 (OffPtr <src.Type> src [s%16])
309 (MOVQstore dst (MOVQload src mem) mem))
310 (Move [s] dst src mem)
311 && s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE ->
312 (Move [s-s%16]
313 (OffPtr <dst.Type> dst [s%16])
314 (OffPtr <src.Type> src [s%16])
315 (MOVOstore dst (MOVOload src mem) mem))
316 (Move [s] dst src mem)
317 && s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE ->
318 (Move [s-s%16]
319 (OffPtr <dst.Type> dst [s%16])
320 (OffPtr <src.Type> src [s%16])
321 (MOVQstore [8] dst (MOVQload [8] src mem)
322 (MOVQstore dst (MOVQload src mem) mem)))
323
324 // Medium copying uses a duff device.
325 (Move [s] dst src mem)
326 && s > 64 && s <= 16*64 && s%16 == 0
327 && !config.noDuffDevice ->
328 (DUFFCOPY [14*(64-s/16)] dst src mem)
329 // 14 and 64 are magic constants. 14 is the number of bytes to encode:
330 // MOVUPS (SI), X0
331 // ADDQ $16, SI
332 // MOVUPS X0, (DI)
333 // ADDQ $16, DI
334 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
335
336 // Large copying uses REP MOVSQ.
337 (Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 ->
338 (REPMOVSQ dst src (MOVQconst [s/8]) mem)
339
340 // Lowering Zero instructions
341 (Zero [0] _ mem) -> mem
342 (Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
343 (Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem)
344 (Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem)
345 (Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem)
346
347 (Zero [3] destptr mem) ->
348 (MOVBstoreconst [makeValAndOff(0,2)] destptr
349 (MOVWstoreconst [0] destptr mem))
350 (Zero [5] destptr mem) ->
351 (MOVBstoreconst [makeValAndOff(0,4)] destptr
352 (MOVLstoreconst [0] destptr mem))
353 (Zero [6] destptr mem) ->
354 (MOVWstoreconst [makeValAndOff(0,4)] destptr
355 (MOVLstoreconst [0] destptr mem))
356 (Zero [7] destptr mem) ->
357 (MOVLstoreconst [makeValAndOff(0,3)] destptr
358 (MOVLstoreconst [0] destptr mem))
359
360 // Strip off any fractional word zeroing.
361 (Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE ->
362 (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
363 (MOVQstoreconst [0] destptr mem))
364
365 // Zero small numbers of words directly.
366 (Zero [16] destptr mem) && !config.useSSE ->
367 (MOVQstoreconst [makeValAndOff(0,8)] destptr
368 (MOVQstoreconst [0] destptr mem))
369 (Zero [24] destptr mem) && !config.useSSE ->
370 (MOVQstoreconst [makeValAndOff(0,16)] destptr
371 (MOVQstoreconst [makeValAndOff(0,8)] destptr
372 (MOVQstoreconst [0] destptr mem)))
373 (Zero [32] destptr mem) && !config.useSSE ->
374 (MOVQstoreconst [makeValAndOff(0,24)] destptr
375 (MOVQstoreconst [makeValAndOff(0,16)] destptr
376 (MOVQstoreconst [makeValAndOff(0,8)] destptr
377 (MOVQstoreconst [0] destptr mem))))
378
379 (Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE ->
380 (MOVQstoreconst [makeValAndOff(0,s-8)] destptr
381 (MOVQstoreconst [0] destptr mem))
382
383 // Adjust zeros to be a multiple of 16 bytes.
384 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE ->
385 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
386 (MOVOstore destptr (MOVOconst [0]) mem))
387
388 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE ->
389 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
390 (MOVQstoreconst [0] destptr mem))
391
392 (Zero [16] destptr mem) && config.useSSE ->
393 (MOVOstore destptr (MOVOconst [0]) mem)
394 (Zero [32] destptr mem) && config.useSSE ->
395 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
396 (MOVOstore destptr (MOVOconst [0]) mem))
397 (Zero [48] destptr mem) && config.useSSE ->
398 (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
399 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
400 (MOVOstore destptr (MOVOconst [0]) mem)))
401 (Zero [64] destptr mem) && config.useSSE ->
402 (MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0])
403 (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
404 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
405 (MOVOstore destptr (MOVOconst [0]) mem))))
406
407 // Medium zeroing uses a duff device.
408 (Zero [s] destptr mem)
409 && s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice ->
410 (DUFFZERO [s] destptr (MOVOconst [0]) mem)
411
412 // Large zeroing uses REP STOSQ.
413 (Zero [s] destptr mem)
414 && (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32))
415 && s%8 == 0 ->
416 (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
417
418 // Lowering constants
419 (Const8 [val]) -> (MOVLconst [val])
420 (Const16 [val]) -> (MOVLconst [val])
421 (Const32 [val]) -> (MOVLconst [val])
422 (Const64 [val]) -> (MOVQconst [val])
423 (Const32F [val]) -> (MOVSSconst [val])
424 (Const64F [val]) -> (MOVSDconst [val])
425 (ConstNil) && config.PtrSize == 8 -> (MOVQconst [0])
426 (ConstNil) && config.PtrSize == 4 -> (MOVLconst [0])
427 (ConstBool [b]) -> (MOVLconst [b])
428
429 // Lowering calls
430 (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
431 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
432 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
433
434 // Lowering conditional moves
435 // If the condition is a SETxx, we can just run a CMOV from the comparison that was
436 // setting the flags.
437 // Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL
438 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t))
439 -> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
440 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t)
441 -> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
442 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t)
443 -> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
444
445 // If the condition does not set the flags, we need to generate a comparison.
446 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1
447 -> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
448 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2
449 -> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
450 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4
451 -> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
452
453 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
454 -> (CMOVQNE y x (CMPQconst [0] check))
455 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
456 -> (CMOVLNE y x (CMPQconst [0] check))
457 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
458 -> (CMOVWNE y x (CMPQconst [0] check))
459
460 // Absorb InvertFlags
461 (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
462 -> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
463 (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
464 -> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
465 (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
466 -> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
467
468 // Absorb constants generated during lower
469 (CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) -> x
470 (CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) -> y
471 (CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) -> x
472 (CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) -> y
473 (CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) -> x
474 (CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) -> y
475 (CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) -> x
476 (CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) -> y
477 (CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) -> x
478 (CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) -> y
479
480 // Miscellaneous
481 (IsNonNil p) && config.PtrSize == 8 -> (SETNE (TESTQ p p))
482 (IsNonNil p) && config.PtrSize == 4 -> (SETNE (TESTL p p))
483 (IsInBounds idx len) && config.PtrSize == 8 -> (SETB (CMPQ idx len))
484 (IsInBounds idx len) && config.PtrSize == 4 -> (SETB (CMPL idx len))
485 (IsSliceInBounds idx len) && config.PtrSize == 8 -> (SETBE (CMPQ idx len))
486 (IsSliceInBounds idx len) && config.PtrSize == 4 -> (SETBE (CMPL idx len))
487 (NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
488 (GetG mem) -> (LoweredGetG mem)
489 (GetClosurePtr) -> (LoweredGetClosurePtr)
490 (GetCallerPC) -> (LoweredGetCallerPC)
491 (GetCallerSP) -> (LoweredGetCallerSP)
492 (Addr {sym} base) && config.PtrSize == 8 -> (LEAQ {sym} base)
493 (Addr {sym} base) && config.PtrSize == 4 -> (LEAL {sym} base)
494 (LocalAddr {sym} base _) && config.PtrSize == 8 -> (LEAQ {sym} base)
495 (LocalAddr {sym} base _) && config.PtrSize == 4 -> (LEAL {sym} base)
496
497 (MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 -> (SETLstore [off] {sym} ptr x mem)
498 (MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 -> (SETLEstore [off] {sym} ptr x mem)
499 (MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 -> (SETGstore [off] {sym} ptr x mem)
500 (MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 -> (SETGEstore [off] {sym} ptr x mem)
501 (MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 -> (SETEQstore [off] {sym} ptr x mem)
502 (MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 -> (SETNEstore [off] {sym} ptr x mem)
503 (MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 -> (SETBstore [off] {sym} ptr x mem)
504 (MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 -> (SETBEstore [off] {sym} ptr x mem)
505 (MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 -> (SETAstore [off] {sym} ptr x mem)
506 (MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 -> (SETAEstore [off] {sym} ptr x mem)
507
508 // block rewrites
509 (If (SETL cmp) yes no) -> (LT cmp yes no)
510 (If (SETLE cmp) yes no) -> (LE cmp yes no)
511 (If (SETG cmp) yes no) -> (GT cmp yes no)
512 (If (SETGE cmp) yes no) -> (GE cmp yes no)
513 (If (SETEQ cmp) yes no) -> (EQ cmp yes no)
514 (If (SETNE cmp) yes no) -> (NE cmp yes no)
515 (If (SETB cmp) yes no) -> (ULT cmp yes no)
516 (If (SETBE cmp) yes no) -> (ULE cmp yes no)
517 (If (SETA cmp) yes no) -> (UGT cmp yes no)
518 (If (SETAE cmp) yes no) -> (UGE cmp yes no)
519 (If (SETO cmp) yes no) -> (OS cmp yes no)
520
521 // Special case for floating point - LF/LEF not generated
522 (If (SETGF cmp) yes no) -> (UGT cmp yes no)
523 (If (SETGEF cmp) yes no) -> (UGE cmp yes no)
524 (If (SETEQF cmp) yes no) -> (EQF cmp yes no)
525 (If (SETNEF cmp) yes no) -> (NEF cmp yes no)
526
527 (If cond yes no) -> (NE (TESTB cond cond) yes no)
528
529 // Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here.
530 (AtomicLoad8 ptr mem) -> (MOVBatomicload ptr mem)
531 (AtomicLoad32 ptr mem) -> (MOVLatomicload ptr mem)
532 (AtomicLoad64 ptr mem) -> (MOVQatomicload ptr mem)
533 (AtomicLoadPtr ptr mem) && config.PtrSize == 8 -> (MOVQatomicload ptr mem)
534 (AtomicLoadPtr ptr mem) && config.PtrSize == 4 -> (MOVLatomicload ptr mem)
535
536 // Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load.
537 // TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those?
538 (AtomicStore32 ptr val mem) -> (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
539 (AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
540 (AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 8 -> (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
541 (AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 4 -> (Select1 (XCHGL <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
542
543 // Atomic exchanges.
544 (AtomicExchange32 ptr val mem) -> (XCHGL val ptr mem)
545 (AtomicExchange64 ptr val mem) -> (XCHGQ val ptr mem)
546
547 // Atomic adds.
548 (AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (XADDLlock val ptr mem))
549 (AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (XADDQlock val ptr mem))
550 (Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDL val (Select0 <t> tuple))
551 (Select1 (AddTupleFirst32 _ tuple)) -> (Select1 tuple)
552 (Select0 <t> (AddTupleFirst64 val tuple)) -> (ADDQ val (Select0 <t> tuple))
553 (Select1 (AddTupleFirst64 _ tuple)) -> (Select1 tuple)
554
555 // Atomic compare and swap.
556 (AtomicCompareAndSwap32 ptr old new_ mem) -> (CMPXCHGLlock ptr old new_ mem)
557 (AtomicCompareAndSwap64 ptr old new_ mem) -> (CMPXCHGQlock ptr old new_ mem)
558
559 // Atomic memory updates.
560 (AtomicAnd8 ptr val mem) -> (ANDBlock ptr val mem)
561 (AtomicOr8 ptr val mem) -> (ORBlock ptr val mem)
562
563 // Write barrier.
564 (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
565
566 (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
567 (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
568 (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
569
570 // For amd64p32
571 (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 0 -> (LoweredPanicExtendA [kind] hi lo y mem)
572 (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 1 -> (LoweredPanicExtendB [kind] hi lo y mem)
573 (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 2 -> (LoweredPanicExtendC [kind] hi lo y mem)
574
575 // ***************************
576 // Above: lowering rules
577 // Below: optimizations
578 // ***************************
579 // TODO: Should the optimizations be a separate pass?
580
581 // Fold boolean tests into blocks
582 (NE (TESTB (SETL cmp) (SETL cmp)) yes no) -> (LT cmp yes no)
583 (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) -> (LE cmp yes no)
584 (NE (TESTB (SETG cmp) (SETG cmp)) yes no) -> (GT cmp yes no)
585 (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) -> (GE cmp yes no)
586 (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) -> (EQ cmp yes no)
587 (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) -> (NE cmp yes no)
588 (NE (TESTB (SETB cmp) (SETB cmp)) yes no) -> (ULT cmp yes no)
589 (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) -> (ULE cmp yes no)
590 (NE (TESTB (SETA cmp) (SETA cmp)) yes no) -> (UGT cmp yes no)
591 (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no)
592 (NE (TESTB (SETO cmp) (SETO cmp)) yes no) -> (OS cmp yes no)
593
594 // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded
595 // Note that BTx instructions use the carry bit, so we need to convert tests for zero flag
596 // into tests for carry flags.
597 // ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis
598 // mutandis, for UGE and SETAE, and CC and SETCC.
599 ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTL x y))
600 ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTQ x y))
601 ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) && !config.nacl
602 -> ((ULT|UGE) (BTLconst [log2uint32(c)] x))
603 ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c) && !config.nacl
604 -> ((ULT|UGE) (BTQconst [log2(c)] x))
605 ((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) && !config.nacl
606 -> ((ULT|UGE) (BTQconst [log2(c)] x))
607 (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) && !config.nacl -> (SET(B|AE) (BTL x y))
608 (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) && !config.nacl -> (SET(B|AE) (BTQ x y))
609 (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) && !config.nacl
610 -> (SET(B|AE) (BTLconst [log2uint32(c)] x))
611 (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c) && !config.nacl
612 -> (SET(B|AE) (BTQconst [log2(c)] x))
613 (SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) && !config.nacl
614 -> (SET(B|AE) (BTQconst [log2(c)] x))
615 // SET..store variant
616 (SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) && !config.nacl
617 -> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
618 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) && !config.nacl
619 -> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
620 (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(c) && !config.nacl
621 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
622 (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(c) && !config.nacl
623 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem)
624 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c) && !config.nacl
625 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem)
626
627 // Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
628 // and further combining shifts.
629 (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 -> (BTQconst [c+d] x)
630 (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d -> (BT(Q|L)const [c-d] x)
631 (BT(Q|L)const [0] s:(SHRQ x y)) -> (BTQ y x)
632 (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 -> (BTLconst [c+d] x)
633 (BTLconst [c] (SHLLconst [d] x)) && c>d -> (BTLconst [c-d] x)
634 (BTLconst [0] s:(SHRL x y)) -> (BTL y x)
635
636 // Rewrite a & 1 != 1 into a & 1 == 0.
637 // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test.
638 (SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) -> (SET(EQ|NE) (CMPLconst [0] s))
639 (SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem)
640 (SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) -> (SET(EQ|NE) (CMPQconst [0] s))
641 (SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem)
642
643 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
644 (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTS(Q|L) x y)
645 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTC(Q|L) x y)
646
647 // Convert ORconst into BTS, if the code gets smaller, with boundary being
648 // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
649 ((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
650 -> (BT(S|C)Qconst [log2(c)] x)
651 ((ORL|XORL)const [c] x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
652 -> (BT(S|C)Lconst [log2uint32(c)] x)
653 ((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
654 -> (BT(S|C)Qconst [log2(c)] x)
655 ((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
656 -> (BT(S|C)Lconst [log2uint32(c)] x)
657
658 // Recognize bit clearing: a &^= 1<<b
659 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) && !config.nacl -> (BTR(Q|L) x y)
660 (ANDQconst [c] x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
661 -> (BTRQconst [log2(^c)] x)
662 (ANDLconst [c] x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
663 -> (BTRLconst [log2uint32(^c)] x)
664 (ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
665 -> (BTRQconst [log2(^c)] x)
666 (ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
667 -> (BTRLconst [log2uint32(^c)] x)
668
669 // Special-case bit patterns on first/last bit.
670 // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
671 // for instance:
672 // x & 0xFFFF0000 -> (x >> 16) << 16
673 // x & 0x80000000 -> (x >> 31) << 31
674 //
675 // In case the mask is just one bit (like second example above), it conflicts
676 // with the above rules to detect bit-testing / bit-clearing of first/last bit.
677 // We thus special-case them, by detecting the shift patterns.
678
679 // Special case resetting first/last bit
680 (SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) && !config.nacl
681 -> (BTR(L|Q)const [0] x)
682 (SHRLconst [1] (SHLLconst [1] x)) && !config.nacl
683 -> (BTRLconst [31] x)
684 (SHRQconst [1] (SHLQconst [1] x)) && !config.nacl
685 -> (BTRQconst [63] x)
686
687 // Special case testing first/last bit (with double-shift generated by generic.rules)
688 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2 && !config.nacl
689 -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
690 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2 && !config.nacl
691 -> ((SETB|SETAE|ULT|UGE) (BTQconst [31] x))
692 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2 && !config.nacl
693 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
694 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2 && !config.nacl
695 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
696
697 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2 && !config.nacl
698 -> ((SETB|SETAE|ULT|UGE) (BTQconst [0] x))
699 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2 && !config.nacl
700 -> ((SETB|SETAE|ULT|UGE) (BTLconst [0] x))
701 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2 && !config.nacl
702 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem)
703 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2 && !config.nacl
704 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem)
705
706 // Special-case manually testing last bit with "a>>63 != 0" (without "&1")
707 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2 && !config.nacl
708 -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
709 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2 && !config.nacl
710 -> ((SETB|SETAE|ULT|UGE) (BTLconst [31] x))
711 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2 && !config.nacl
712 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
713 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2 && !config.nacl
714 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
715
716 // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1)
717 (BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
718 (BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
719 (BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
720 (BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
721
722 // Fold boolean negation into SETcc.
723 (XORLconst [1] (SETNE x)) -> (SETEQ x)
724 (XORLconst [1] (SETEQ x)) -> (SETNE x)
725 (XORLconst [1] (SETL x)) -> (SETGE x)
726 (XORLconst [1] (SETGE x)) -> (SETL x)
727 (XORLconst [1] (SETLE x)) -> (SETG x)
728 (XORLconst [1] (SETG x)) -> (SETLE x)
729 (XORLconst [1] (SETB x)) -> (SETAE x)
730 (XORLconst [1] (SETAE x)) -> (SETB x)
731 (XORLconst [1] (SETBE x)) -> (SETA x)
732 (XORLconst [1] (SETA x)) -> (SETBE x)
733
734 // Special case for floating point - LF/LEF not generated
735 (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) -> (UGT cmp yes no)
736 (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) -> (UGE cmp yes no)
737 (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) -> (EQF cmp yes no)
738 (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF cmp yes no)
739
740 // Disabled because it interferes with the pattern match above and makes worse code.
741 // (SETNEF x) -> (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x))
742 // (SETEQF x) -> (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x))
743
744 // fold constants into instructions
745 (ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x)
746 (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
747
748 (SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c])
749 (SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c]))
750 (SUBL x (MOVLconst [c])) -> (SUBLconst x [c])
751 (SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c]))
752
753 (MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x)
754 (MULL x (MOVLconst [c])) -> (MULLconst [c] x)
755
756 (ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x)
757 (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
758
759 (AND(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [c & d] x)
760 (BTR(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [d &^ (1<<uint32(c))] x)
761 (AND(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [c &^ (1<<uint32(d))] x)
762 (BTR(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [^(1<<uint32(c) | 1<<uint32(d))] x)
763 (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ d] x)
764 (BTC(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [d ^ 1<<uint32(c)] x)
765 (XOR(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ 1<<uint32(d)] x)
766 (BTC(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [1<<uint32(c) ^ 1<<uint32(d)] x)
767 (OR(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [c | d] x)
768 (OR(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [c | 1<<uint32(d)] x)
769 (BTS(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [d | 1<<uint32(c)] x)
770 (BTS(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [1<<uint32(d) | 1<<uint32(c)] x)
771
772 (MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
773 (MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x)
774
775 (ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x)
776 (ORL x (MOVLconst [c])) -> (ORLconst [c] x)
777
778 (XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x)
779 (XORL x (MOVLconst [c])) -> (XORLconst [c] x)
780
781 (SHLQ x (MOV(Q|L)const [c])) -> (SHLQconst [c&63] x)
782 (SHLL x (MOV(Q|L)const [c])) -> (SHLLconst [c&31] x)
783
784 (SHRQ x (MOV(Q|L)const [c])) -> (SHRQconst [c&63] x)
785 (SHRL x (MOV(Q|L)const [c])) -> (SHRLconst [c&31] x)
786 (SHRW x (MOV(Q|L)const [c])) && c&31 < 16 -> (SHRWconst [c&31] x)
787 (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 -> (MOVLconst [0])
788 (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 -> (SHRBconst [c&31] x)
789 (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 -> (MOVLconst [0])
790
791 (SARQ x (MOV(Q|L)const [c])) -> (SARQconst [c&63] x)
792 (SARL x (MOV(Q|L)const [c])) -> (SARLconst [c&31] x)
793 (SARW x (MOV(Q|L)const [c])) -> (SARWconst [min(c&31,15)] x)
794 (SARB x (MOV(Q|L)const [c])) -> (SARBconst [min(c&31,7)] x)
795
796 // Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
797 ((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y)
798 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
799 ((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y)
800 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
801
802 ((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y)
803 ((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y))
804 ((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y)
805 ((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y))
806
807 ((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y)
808 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
809 ((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y)
810 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
811
812 ((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y)
813 ((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGL <t> y))
814 ((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y)
815 ((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGL <t> y))
816
817 // Constant rotate instructions
818 ((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
819 ((ADDL|ORL|XORL) (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c])
820
821 ((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c])
822 ((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c && c < 8 && t.Size() == 1 -> (ROLBconst x [c])
823
824 (ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x)
825 (ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
826 (ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
827 (ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x)
828
829 (RotateLeft8 a b) -> (ROLB a b)
830 (RotateLeft16 a b) -> (ROLW a b)
831 (RotateLeft32 a b) -> (ROLL a b)
832 (RotateLeft64 a b) -> (ROLQ a b)
833
834 // Non-constant rotates.
835 // We want to issue a rotate when the Go source contains code like
836 // y &= 63
837 // x << y | x >> (64-y)
838 // The shift rules above convert << to SHLx and >> to SHRx.
839 // SHRx converts its shift argument from 64-y to -y.
840 // A tricky situation occurs when y==0. Then the original code would be:
841 // x << 0 | x >> 64
842 // But x >> 64 is 0, not x. So there's an additional mask that is ANDed in
843 // to force the second term to 0. We don't need that mask, but we must match
844 // it in order to strip it out.
845 (ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (ROLQ x y)
846 (ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (RORQ x y)
847
848 (ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (ROLL x y)
849 (ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (RORL x y)
850
851 // Help with rotate detection
852 (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) -> (FlagLT_ULT)
853 (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32]) -> (FlagLT_ULT)
854
855 (ORL (SHLL x (AND(Q|L)const y [15]))
856 (ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))
857 (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16]))))
858 && v.Type.Size() == 2
859 -> (ROLW x y)
860 (ORL (SHRW x (AND(Q|L)const y [15]))
861 (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
862 && v.Type.Size() == 2
863 -> (RORW x y)
864
865 (ORL (SHLL x (AND(Q|L)const y [ 7]))
866 (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
867 (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8]))))
868 && v.Type.Size() == 1
869 -> (ROLB x y)
870 (ORL (SHRB x (AND(Q|L)const y [ 7]))
871 (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
872 && v.Type.Size() == 1
873 -> (RORB x y)
874
875 // rotate left negative = rotate right
876 (ROLQ x (NEG(Q|L) y)) -> (RORQ x y)
877 (ROLL x (NEG(Q|L) y)) -> (RORL x y)
878 (ROLW x (NEG(Q|L) y)) -> (RORW x y)
879 (ROLB x (NEG(Q|L) y)) -> (RORB x y)
880
881 // rotate right negative = rotate left
882 (RORQ x (NEG(Q|L) y)) -> (ROLQ x y)
883 (RORL x (NEG(Q|L) y)) -> (ROLL x y)
884 (RORW x (NEG(Q|L) y)) -> (ROLW x y)
885 (RORB x (NEG(Q|L) y)) -> (ROLB x y)
886
887 // rotate by constants
888 (ROLQ x (MOV(Q|L)const [c])) -> (ROLQconst [c&63] x)
889 (ROLL x (MOV(Q|L)const [c])) -> (ROLLconst [c&31] x)
890 (ROLW x (MOV(Q|L)const [c])) -> (ROLWconst [c&15] x)
891 (ROLB x (MOV(Q|L)const [c])) -> (ROLBconst [c&7 ] x)
892
893 (RORQ x (MOV(Q|L)const [c])) -> (ROLQconst [(-c)&63] x)
894 (RORL x (MOV(Q|L)const [c])) -> (ROLLconst [(-c)&31] x)
895 (RORW x (MOV(Q|L)const [c])) -> (ROLWconst [(-c)&15] x)
896 (RORB x (MOV(Q|L)const [c])) -> (ROLBconst [(-c)&7 ] x)
897
898 // Constant shift simplifications
899 ((SHLQ|SHRQ|SARQ)const x [0]) -> x
900 ((SHLL|SHRL|SARL)const x [0]) -> x
901 ((SHRW|SARW)const x [0]) -> x
902 ((SHRB|SARB)const x [0]) -> x
903 ((ROLQ|ROLL|ROLW|ROLB)const x [0]) -> x
904
905 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
906 // because the x86 instructions are defined to use all 5 bits of the shift even
907 // for the small shifts. I don't think we'll ever generate a weird shift (e.g.
908 // (SHRW x (MOVLconst [24])), but just in case.
909
910 (CMPQ x (MOVQconst [c])) && is32Bit(c) -> (CMPQconst x [c])
911 (CMPQ (MOVQconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPQconst x [c]))
912 (CMPL x (MOVLconst [c])) -> (CMPLconst x [c])
913 (CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c]))
914 (CMPW x (MOVLconst [c])) -> (CMPWconst x [int64(int16(c))])
915 (CMPW (MOVLconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int16(c))]))
916 (CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
917 (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
918
919 // Using MOVZX instead of AND is cheaper.
920 (AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x)
921 (AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x)
922 (ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x)
923
924 // strength reduction
925 // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
926 // 1 - addq, shlq, leaq, negq, subq
927 // 3 - imulq
928 // This limits the rewrites to two instructions.
929 // Note that negq always operates in-place,
930 // which can require a register-register move
931 // to preserve the original value,
932 // so it must be used with care.
933 (MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x))
934 (MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x))
935 (MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x))
936 (MUL(Q|L)const [-1] x) -> (NEG(Q|L) x)
937 (MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0])
938 (MUL(Q|L)const [ 1] x) -> x
939 (MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x)
940 (MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x)
941 (MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x))
942 (MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x)
943 (MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x))
944 (MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x))
945 (MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x))
946 (MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x))
947 (MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x))
948 (MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x))
949 (MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x))
950 (MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x))
951 (MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x))
952 (MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x))
953 (MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x))
954
955 (MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB(Q|L) (SHL(Q|L)const <v.Type> [log2(c+1)] x) x)
956 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x)
957 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x)
958 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x)
959 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x)
960 (MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x))
961 (MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x))
962 (MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x))
963
964 // combine add/shift into LEAQ/LEAL
965 (ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y)
966 (ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y)
967 (ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y)
968 (ADD(L|Q) x (ADD(L|Q) y y)) -> (LEA(L|Q)2 x y)
969 (ADD(L|Q) x (ADD(L|Q) x y)) -> (LEA(L|Q)2 y x)
970
971 // combine ADDQ/ADDQconst into LEAQ1/LEAL1
972 (ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y)
973 (ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y)
974 (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x)
975
976 // fold ADDQ/ADDL into LEAQ/LEAL
977 (ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
978 (LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
979 (LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
980 (ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
981
982 // fold ADDQconst/ADDLconst into LEAQx/LEALx
983 (ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y)
984 (ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y)
985 (ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y)
986 (ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y)
987 (LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y)
988 (LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y)
989 (LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y)
990 (LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y)
991 (LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y)
992 (LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y)
993 (LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y)
994
995 // fold shifts into LEAQx/LEALx
996 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y)
997 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y)
998 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y)
999 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y)
1000 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y)
1001 (LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y)
1002
1003 // reverse ordering of compare instruction
1004 (SETL (InvertFlags x)) -> (SETG x)
1005 (SETG (InvertFlags x)) -> (SETL x)
1006 (SETB (InvertFlags x)) -> (SETA x)
1007 (SETA (InvertFlags x)) -> (SETB x)
1008 (SETLE (InvertFlags x)) -> (SETGE x)
1009 (SETGE (InvertFlags x)) -> (SETLE x)
1010 (SETBE (InvertFlags x)) -> (SETAE x)
1011 (SETAE (InvertFlags x)) -> (SETBE x)
1012 (SETEQ (InvertFlags x)) -> (SETEQ x)
1013 (SETNE (InvertFlags x)) -> (SETNE x)
1014
1015 (SETLstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGstore [off] {sym} ptr x mem)
1016 (SETGstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLstore [off] {sym} ptr x mem)
1017 (SETBstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAstore [off] {sym} ptr x mem)
1018 (SETAstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBstore [off] {sym} ptr x mem)
1019 (SETLEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGEstore [off] {sym} ptr x mem)
1020 (SETGEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLEstore [off] {sym} ptr x mem)
1021 (SETBEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAEstore [off] {sym} ptr x mem)
1022 (SETAEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBEstore [off] {sym} ptr x mem)
1023 (SETEQstore [off] {sym} ptr (InvertFlags x) mem) -> (SETEQstore [off] {sym} ptr x mem)
1024 (SETNEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETNEstore [off] {sym} ptr x mem)
1025
1026 // sign extended loads
1027 // Note: The combined instruction must end up in the same block
1028 // as the original load. If not, we end up making a value with
1029 // memory type live in two different blocks, which can lead to
1030 // multiple memory values alive simultaneously.
1031 // Make sure we don't combine these ops if the load has another use.
1032 // This prevents a single load from being split into multiple loads
1033 // which then might return different values. See test/atomicload.go.
1034 (MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
1035 (MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
1036 (MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
1037 (MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
1038 (MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
1039 (MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
1040 (MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
1041 (MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
1042 (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
1043 (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
1044 (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
1045 (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
1046 (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
1047 (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
1048 (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
1049 (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
1050 (MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
1051 (MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
1052
1053 (MOVLQZX x) && zeroUpper32Bits(x,3) -> x
1054 (MOVWQZX x) && zeroUpper48Bits(x,3) -> x
1055 (MOVBQZX x) && zeroUpper56Bits(x,3) -> x
1056
1057 (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
1058 (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
1059 (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
1060 (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem)
1061 (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem)
1062
1063 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
1064 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x)
1065 (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x)
1066 (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQZX x)
1067 (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
1068 (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQSX x)
1069 (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQSX x)
1070 (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQSX x)
1071
1072 // Fold extensions and ANDs together.
1073 (MOVBQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xff] x)
1074 (MOVWQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xffff] x)
1075 (MOVLQZX (ANDLconst [c] x)) -> (ANDLconst [c] x)
1076 (MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 -> (ANDLconst [c & 0x7f] x)
1077 (MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 -> (ANDLconst [c & 0x7fff] x)
1078 (MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDLconst [c & 0x7fffffff] x)
1079
1080 // Don't extend before storing
1081 (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem)
1082 (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) -> (MOVWstore [off] {sym} ptr x mem)
1083 (MOVBstore [off] {sym} ptr (MOVBQSX x) mem) -> (MOVBstore [off] {sym} ptr x mem)
1084 (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) -> (MOVLstore [off] {sym} ptr x mem)
1085 (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) -> (MOVWstore [off] {sym} ptr x mem)
1086 (MOVBstore [off] {sym} ptr (MOVBQZX x) mem) -> (MOVBstore [off] {sym} ptr x mem)
1087
1088 // fold constants into memory operations
1089 // Note that this is not always a good idea because if not all the uses of
1090 // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
1091 // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
1092 // Nevertheless, let's do it!
1093 (MOV(Q|L|W|B|SS|SD|O)load [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
1094 (MOV(Q|L|W|B|SS|SD|O)load [off1+off2] {sym} ptr mem)
1095 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(off1+off2) ->
1096 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {sym} ptr val mem)
1097 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
1098 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {sym} base val mem)
1099 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
1100 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {sym} val base mem)
1101 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
1102 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
1103 (CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
1104 (CMP(Q|L|W|B)load [off1+off2] {sym} base val mem)
1105 (CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
1106 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {sym} base mem)
1107
1108 ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
1109 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
1110 ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
1111 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
1112 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
1113 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
1114 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
1115 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
1116 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
1117 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem)
1118 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
1119 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem)
1120
1121 // Fold constants into stores.
1122 (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
1123 (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
1124 (MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) ->
1125 (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
1126 (MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) ->
1127 (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
1128 (MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) ->
1129 (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
1130
1131 // Fold address offsets into constant stores.
1132 (MOV(Q|L|W|B)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
1133 (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
1134
1135 // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows
1136 // what variables are being read/written by the ops.
1137 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
1138 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1139 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
1140 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
1141 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1142 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
1143 (MOV(Q|L|W|B)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
1144 (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
1145 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
1146 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1147 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
1148 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
1149 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1150 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
1151 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
1152 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1153 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
1154 (CMP(Q|L|W|B)load [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
1155 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1156 (CMP(Q|L|W|B)load [off1+off2] {mergeSym(sym1,sym2)} base val mem)
1157 (CMP(Q|L|W|B)constload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
1158 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
1159 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
1160
1161 ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
1162 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1163 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
1164 ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
1165 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1166 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
1167 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
1168 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
1169 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
1170 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
1171 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
1172 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
1173 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
1174 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1175 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
1176 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
1177 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1178 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
1179
1180 // generating indexed loads and stores
1181 (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1182 (MOV(B|W|L|Q|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
1183 (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1184 (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
1185 (MOV(L|SS)load [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1186 (MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
1187 (MOV(L|Q|SD)load [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1188 (MOV(L|Q|SD)loadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
1189
1190 (MOV(B|W|L|Q|SS|SD)store [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1191 (MOV(B|W|L|Q|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
1192 (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1193 (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
1194 (MOV(L|SS)store [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1195 (MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
1196 (MOV(L|Q|SD)store [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1197 (MOV(L|Q|SD)storeidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
1198
1199 (MOV(B|W|L|Q|SS|SD)load [off] {sym} (ADDQ ptr idx) mem) && ptr.Op != OpSB ->
1200 (MOV(B|W|L|Q|SS|SD)loadidx1 [off] {sym} ptr idx mem)
1201 (MOV(B|W|L|Q|SS|SD)store [off] {sym} (ADDQ ptr idx) val mem) && ptr.Op != OpSB ->
1202 (MOV(B|W|L|Q|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
1203
1204 (MOV(B|W|L|Q)storeconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
1205 (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
1206 (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
1207 (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
1208 (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
1209 (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
1210 (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
1211 (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
1212
1213 (MOV(B|W|L|Q)storeconst [x] {sym} (ADDQ ptr idx) mem) -> (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr idx mem)
1214
1215 // combine SHLQ into indexed loads and stores
1216 (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
1217 (MOV(L|SS)loadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOV(L|SS)loadidx4 [c] {sym} ptr idx mem)
1218 (MOV(L|Q|SD)loadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOV(L|Q|SD)loadidx8 [c] {sym} ptr idx mem)
1219
1220 (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
1221 (MOV(L|SS)storeidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) -> (MOV(L|SS)storeidx4 [c] {sym} ptr idx val mem)
1222 (MOV(L|Q|SD)storeidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) -> (MOV(L|Q|SD)storeidx8 [c] {sym} ptr idx val mem)
1223 (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
1224 (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
1225 (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOVQstoreconstidx8 [c] {sym} ptr idx mem)
1226
1227 // combine ADDQ into pointer of indexed loads and stores
1228 (MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem)
1229 (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOVWloadidx2 [c+d] {sym} ptr idx mem)
1230 (MOV(L|SS)loadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|SS)loadidx4 [c+d] {sym} ptr idx mem)
1231 (MOV(L|Q|SD)loadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|Q|SD)loadidx8 [c+d] {sym} ptr idx mem)
1232
1233 (MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem)
1234 (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
1235 (MOV(L|SS)storeidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|SS)storeidx4 [c+d] {sym} ptr idx val mem)
1236 (MOV(L|Q|SD)storeidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|Q|SD)storeidx8 [c+d] {sym} ptr idx val mem)
1237
1238
1239 // combine ADDQ into index of indexed loads and stores
1240 (MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem)
1241 (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+2*d) -> (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
1242 (MOV(L|SS)loadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+4*d) -> (MOV(L|SS)loadidx4 [c+4*d] {sym} ptr idx mem)
1243 (MOV(L|Q|SD)loadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)loadidx8 [c+8*d] {sym} ptr idx mem)
1244
1245 (MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem)
1246 (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+2*d) -> (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
1247 (MOV(L|SS)storeidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+4*d) -> (MOV(L|SS)storeidx4 [c+4*d] {sym} ptr idx val mem)
1248 (MOV(L|Q|SD)storeidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)storeidx8 [c+8*d] {sym} ptr idx val mem)
1249
1250 (MOV(B|W|L|Q)storeconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
1251 (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
1252 (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
1253 (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem)
1254
1255 (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
1256 (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(2*c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
1257 (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(4*c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
1258 (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(8*c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem)
1259
1260 // fold LEAQs together
1261 (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1262 (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
1263
1264 // LEAQ into LEAQ1
1265 (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
1266 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
1267
1268 // LEAQ1 into LEAQ
1269 (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1270 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
1271
1272 // LEAQ into LEAQ[248]
1273 (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
1274 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
1275 (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
1276 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
1277 (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
1278 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
1279
1280 // LEAQ[248] into LEAQ
1281 (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1282 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
1283 (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1284 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
1285 (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
1286 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
1287
1288 // Absorb InvertFlags into branches.
1289 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
1290 (GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
1291 (LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
1292 (GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
1293 (ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no)
1294 (UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no)
1295 (ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no)
1296 (UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no)
1297 (EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
1298 (NE (InvertFlags cmp) yes no) -> (NE cmp yes no)
1299
1300 // Constant comparisons.
1301 (CMPQconst (MOVQconst [x]) [y]) && x==y -> (FlagEQ)
1302 (CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)<uint64(y) -> (FlagLT_ULT)
1303 (CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)>uint64(y) -> (FlagLT_UGT)
1304 (CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)<uint64(y) -> (FlagGT_ULT)
1305 (CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)>uint64(y) -> (FlagGT_UGT)
1306 (CMPLconst (MOVLconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
1307 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT)
1308 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT)
1309 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT)
1310 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT)
1311 (CMPWconst (MOVLconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ)
1312 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT)
1313 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT)
1314 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT)
1315 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT)
1316 (CMPBconst (MOVLconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ)
1317 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT)
1318 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT)
1319 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT)
1320 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT)
1321
1322 // Other known comparisons.
1323 (CMPQconst (MOVBQZX _) [c]) && 0xFF < c -> (FlagLT_ULT)
1324 (CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c -> (FlagLT_ULT)
1325 (CMPQconst (MOVLQZX _) [c]) && 0xFFFFFFFF < c -> (FlagLT_ULT)
1326 (CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) -> (FlagLT_ULT)
1327 (CMPQconst (SHRQconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) -> (FlagLT_ULT)
1328 (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT)
1329 (CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT)
1330 (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT)
1331 (CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT)
1332 (CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < int8(n) -> (FlagLT_ULT)
1333
1334 // TODO: DIVxU also.
1335
1336 // Absorb flag constants into SBB ops.
1337 (SBBQcarrymask (FlagEQ)) -> (MOVQconst [0])
1338 (SBBQcarrymask (FlagLT_ULT)) -> (MOVQconst [-1])
1339 (SBBQcarrymask (FlagLT_UGT)) -> (MOVQconst [0])
1340 (SBBQcarrymask (FlagGT_ULT)) -> (MOVQconst [-1])
1341 (SBBQcarrymask (FlagGT_UGT)) -> (MOVQconst [0])
1342 (SBBLcarrymask (FlagEQ)) -> (MOVLconst [0])
1343 (SBBLcarrymask (FlagLT_ULT)) -> (MOVLconst [-1])
1344 (SBBLcarrymask (FlagLT_UGT)) -> (MOVLconst [0])
1345 (SBBLcarrymask (FlagGT_ULT)) -> (MOVLconst [-1])
1346 (SBBLcarrymask (FlagGT_UGT)) -> (MOVLconst [0])
1347
1348 // Absorb flag constants into branches.
1349 ((EQ|LE|GE|ULE|UGE) (FlagEQ) yes no) -> (First nil yes no)
1350 ((NE|LT|GT|ULT|UGT) (FlagEQ) yes no) -> (First nil no yes)
1351 ((NE|LT|LE|ULT|ULE) (FlagLT_ULT) yes no) -> (First nil yes no)
1352 ((EQ|GT|GE|UGT|UGE) (FlagLT_ULT) yes no) -> (First nil no yes)
1353 ((NE|LT|LE|UGT|UGE) (FlagLT_UGT) yes no) -> (First nil yes no)
1354 ((EQ|GT|GE|ULT|ULE) (FlagLT_UGT) yes no) -> (First nil no yes)
1355 ((NE|GT|GE|ULT|ULE) (FlagGT_ULT) yes no) -> (First nil yes no)
1356 ((EQ|LT|LE|UGT|UGE) (FlagGT_ULT) yes no) -> (First nil no yes)
1357 ((NE|GT|GE|UGT|UGE) (FlagGT_UGT) yes no) -> (First nil yes no)
1358 ((EQ|LT|LE|ULT|ULE) (FlagGT_UGT) yes no) -> (First nil no yes)
1359
1360 // Absorb flag constants into SETxx ops.
1361 ((SETEQ|SETLE|SETGE|SETBE|SETAE) (FlagEQ)) -> (MOVLconst [1])
1362 ((SETNE|SETL|SETG|SETB|SETA) (FlagEQ)) -> (MOVLconst [0])
1363 ((SETNE|SETL|SETLE|SETB|SETBE) (FlagLT_ULT)) -> (MOVLconst [1])
1364 ((SETEQ|SETG|SETGE|SETA|SETAE) (FlagLT_ULT)) -> (MOVLconst [0])
1365 ((SETNE|SETL|SETLE|SETA|SETAE) (FlagLT_UGT)) -> (MOVLconst [1])
1366 ((SETEQ|SETG|SETGE|SETB|SETBE) (FlagLT_UGT)) -> (MOVLconst [0])
1367 ((SETNE|SETG|SETGE|SETB|SETBE) (FlagGT_ULT)) -> (MOVLconst [1])
1368 ((SETEQ|SETL|SETLE|SETA|SETAE) (FlagGT_ULT)) -> (MOVLconst [0])
1369 ((SETNE|SETG|SETGE|SETA|SETAE) (FlagGT_UGT)) -> (MOVLconst [1])
1370 ((SETEQ|SETL|SETLE|SETB|SETBE) (FlagGT_UGT)) -> (MOVLconst [0])
1371
1372 (SETEQstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1373 (SETEQstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1374 (SETEQstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1375 (SETEQstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1376 (SETEQstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1377
1378 (SETNEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1379 (SETNEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1380 (SETNEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1381 (SETNEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1382 (SETNEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1383
1384 (SETLstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1385 (SETLstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1386 (SETLstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1387 (SETLstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1388 (SETLstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1389
1390 (SETLEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1391 (SETLEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1392 (SETLEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1393 (SETLEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1394 (SETLEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1395
1396 (SETGstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1397 (SETGstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1398 (SETGstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1399 (SETGstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1400 (SETGstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1401
1402 (SETGEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1403 (SETGEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1404 (SETGEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1405 (SETGEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1406 (SETGEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1407
1408 (SETBstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1409 (SETBstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1410 (SETBstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1411 (SETBstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1412 (SETBstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1413
1414 (SETBEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1415 (SETBEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1416 (SETBEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1417 (SETBEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1418 (SETBEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1419
1420 (SETAstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1421 (SETAstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1422 (SETAstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1423 (SETAstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1424 (SETAstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1425
1426 (SETAEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1427 (SETAEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1428 (SETAEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1429 (SETAEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem)
1430 (SETAEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem)
1431
1432 // Remove redundant *const ops
1433 (ADDQconst [0] x) -> x
1434 (ADDLconst [c] x) && int32(c)==0 -> x
1435 (SUBQconst [0] x) -> x
1436 (SUBLconst [c] x) && int32(c) == 0 -> x
1437 (ANDQconst [0] _) -> (MOVQconst [0])
1438 (ANDLconst [c] _) && int32(c)==0 -> (MOVLconst [0])
1439 (ANDQconst [-1] x) -> x
1440 (ANDLconst [c] x) && int32(c)==-1 -> x
1441 (ORQconst [0] x) -> x
1442 (ORLconst [c] x) && int32(c)==0 -> x
1443 (ORQconst [-1] _) -> (MOVQconst [-1])
1444 (ORLconst [c] _) && int32(c)==-1 -> (MOVLconst [-1])
1445 (XORQconst [0] x) -> x
1446 (XORLconst [c] x) && int32(c)==0 -> x
1447 // TODO: since we got rid of the W/B versions, we might miss
1448 // things like (ANDLconst [0x100] x) which were formerly
1449 // (ANDBconst [0] x). Probably doesn't happen very often.
1450 // If we cared, we might do:
1451 // (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0])
1452
1453 // Remove redundant ops
1454 // Not in generic rules, because they may appear after lowering e. g. Slicemask
1455 (NEG(Q|L) (NEG(Q|L) x)) -> x
1456
1457 // Convert constant subtracts to constant adds
1458 (SUBQconst [c] x) && c != -(1<<31) -> (ADDQconst [-c] x)
1459 (SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x)
1460
1461 // generic constant folding
1462 // TODO: more of this
1463 (ADDQconst [c] (MOVQconst [d])) -> (MOVQconst [c+d])
1464 (ADDLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c+d))])
1465 (ADDQconst [c] (ADDQconst [d] x)) && is32Bit(c+d) -> (ADDQconst [c+d] x)
1466 (ADDLconst [c] (ADDLconst [d] x)) -> (ADDLconst [int64(int32(c+d))] x)
1467 (SUBQconst (MOVQconst [d]) [c]) -> (MOVQconst [d-c])
1468 (SUBQconst (SUBQconst x [d]) [c]) && is32Bit(-c-d) -> (ADDQconst [-c-d] x)
1469 (SARQconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)])
1470 (SARLconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int32(d))>>uint64(c)])
1471 (SARWconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int16(d))>>uint64(c)])
1472 (SARBconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int8(d))>>uint64(c)])
1473 (NEGQ (MOVQconst [c])) -> (MOVQconst [-c])
1474 (NEGL (MOVLconst [c])) -> (MOVLconst [int64(int32(-c))])
1475 (MULQconst [c] (MOVQconst [d])) -> (MOVQconst [c*d])
1476 (MULLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c*d))])
1477 (ANDQconst [c] (MOVQconst [d])) -> (MOVQconst [c&d])
1478 (ANDLconst [c] (MOVLconst [d])) -> (MOVLconst [c&d])
1479 (ORQconst [c] (MOVQconst [d])) -> (MOVQconst [c|d])
1480 (ORLconst [c] (MOVLconst [d])) -> (MOVLconst [c|d])
1481 (XORQconst [c] (MOVQconst [d])) -> (MOVQconst [c^d])
1482 (XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d])
1483 (NOTQ (MOVQconst [c])) -> (MOVQconst [^c])
1484 (NOTL (MOVLconst [c])) -> (MOVLconst [^c])
1485 (BTSQconst [c] (MOVQconst [d])) -> (MOVQconst [d|(1<<uint32(c))])
1486 (BTSLconst [c] (MOVLconst [d])) -> (MOVLconst [d|(1<<uint32(c))])
1487 (BTRQconst [c] (MOVQconst [d])) -> (MOVQconst [d&^(1<<uint32(c))])
1488 (BTRLconst [c] (MOVLconst [d])) -> (MOVLconst [d&^(1<<uint32(c))])
1489 (BTCQconst [c] (MOVQconst [d])) -> (MOVQconst [d^(1<<uint32(c))])
1490 (BTCLconst [c] (MOVLconst [d])) -> (MOVLconst [d^(1<<uint32(c))])
1491
1492 // generic simplifications
1493 // TODO: more of this
1494 (ADDQ x (NEGQ y)) -> (SUBQ x y)
1495 (ADDL x (NEGL y)) -> (SUBL x y)
1496 (SUBQ x x) -> (MOVQconst [0])
1497 (SUBL x x) -> (MOVLconst [0])
1498 (ANDQ x x) -> x
1499 (ANDL x x) -> x
1500 (ORQ x x) -> x
1501 (ORL x x) -> x
1502 (XORQ x x) -> (MOVQconst [0])
1503 (XORL x x) -> (MOVLconst [0])
1504 (NEGQ (ADDQconst [c] (NEGQ x))) && c != -(1<<31) -> (ADDQconst [-c] x)
1505
1506 // checking AND against 0.
1507 (CMPQconst (ANDQ x y) [0]) -> (TESTQ x y)
1508 (CMPLconst (ANDL x y) [0]) -> (TESTL x y)
1509 (CMPWconst (ANDL x y) [0]) -> (TESTW x y)
1510 (CMPBconst (ANDL x y) [0]) -> (TESTB x y)
1511 (CMPQconst (ANDQconst [c] x) [0]) -> (TESTQconst [c] x)
1512 (CMPLconst (ANDLconst [c] x) [0]) -> (TESTLconst [c] x)
1513 (CMPWconst (ANDLconst [c] x) [0]) -> (TESTWconst [int64(int16(c))] x)
1514 (CMPBconst (ANDLconst [c] x) [0]) -> (TESTBconst [int64(int8(c))] x)
1515
1516 // Convert TESTx to TESTxconst if possible.
1517 (TESTQ (MOVQconst [c]) x) && is32Bit(c) -> (TESTQconst [c] x)
1518 (TESTL (MOVLconst [c]) x) -> (TESTLconst [c] x)
1519 (TESTW (MOVLconst [c]) x) -> (TESTWconst [c] x)
1520 (TESTB (MOVLconst [c]) x) -> (TESTBconst [c] x)
1521
1522 // TEST %reg,%reg is shorter than CMP
1523 (CMPQconst x [0]) -> (TESTQ x x)
1524 (CMPLconst x [0]) -> (TESTL x x)
1525 (CMPWconst x [0]) -> (TESTW x x)
1526 (CMPBconst x [0]) -> (TESTB x x)
1527 (TESTQconst [-1] x) && x.Op != OpAMD64MOVQconst -> (TESTQ x x)
1528 (TESTLconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTL x x)
1529 (TESTWconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTW x x)
1530 (TESTBconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTB x x)
1531
1532 // Combining byte loads into larger (unaligned) loads.
1533 // There are many ways these combinations could occur. This is
1534 // designed to match the way encoding/binary.LittleEndian does it.
1535
1536 // Little-endian loads
1537
1538 (ORL x0:(MOVBload [i0] {s} p mem)
1539 sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
1540 && i1 == i0+1
1541 && x0.Uses == 1
1542 && x1.Uses == 1
1543 && sh.Uses == 1
1544 && mergePoint(b,x0,x1) != nil
1545 && clobber(x0)
1546 && clobber(x1)
1547 && clobber(sh)
1548 -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
1549
1550 (ORQ x0:(MOVBload [i0] {s} p mem)
1551 sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
1552 && i1 == i0+1
1553 && x0.Uses == 1
1554 && x1.Uses == 1
1555 && sh.Uses == 1
1556 && mergePoint(b,x0,x1) != nil
1557 && clobber(x0)
1558 && clobber(x1)
1559 && clobber(sh)
1560 -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
1561
1562 (ORL x0:(MOVWload [i0] {s} p mem)
1563 sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
1564 && i1 == i0+2
1565 && x0.Uses == 1
1566 && x1.Uses == 1
1567 && sh.Uses == 1
1568 && mergePoint(b,x0,x1) != nil
1569 && clobber(x0)
1570 && clobber(x1)
1571 && clobber(sh)
1572 -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
1573
1574 (ORQ x0:(MOVWload [i0] {s} p mem)
1575 sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
1576 && i1 == i0+2
1577 && x0.Uses == 1
1578 && x1.Uses == 1
1579 && sh.Uses == 1
1580 && mergePoint(b,x0,x1) != nil
1581 && clobber(x0)
1582 && clobber(x1)
1583 && clobber(sh)
1584 -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
1585
1586 (ORQ x0:(MOVLload [i0] {s} p mem)
1587 sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
1588 && i1 == i0+4
1589 && x0.Uses == 1
1590 && x1.Uses == 1
1591 && sh.Uses == 1
1592 && mergePoint(b,x0,x1) != nil
1593 && clobber(x0)
1594 && clobber(x1)
1595 && clobber(sh)
1596 -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
1597
1598 (ORL
1599 s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
1600 or:(ORL
1601 s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
1602 y))
1603 && i1 == i0+1
1604 && j1 == j0+8
1605 && j0 % 16 == 0
1606 && x0.Uses == 1
1607 && x1.Uses == 1
1608 && s0.Uses == 1
1609 && s1.Uses == 1
1610 && or.Uses == 1
1611 && mergePoint(b,x0,x1,y) != nil
1612 && clobber(x0)
1613 && clobber(x1)
1614 && clobber(s0)
1615 && clobber(s1)
1616 && clobber(or)
1617 -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
1618
1619 (ORQ
1620 s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
1621 or:(ORQ
1622 s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
1623 y))
1624 && i1 == i0+1
1625 && j1 == j0+8
1626 && j0 % 16 == 0
1627 && x0.Uses == 1
1628 && x1.Uses == 1
1629 && s0.Uses == 1
1630 && s1.Uses == 1
1631 && or.Uses == 1
1632 && mergePoint(b,x0,x1,y) != nil
1633 && clobber(x0)
1634 && clobber(x1)
1635 && clobber(s0)
1636 && clobber(s1)
1637 && clobber(or)
1638 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
1639
1640 (ORQ
1641 s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem))
1642 or:(ORQ
1643 s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))
1644 y))
1645 && i1 == i0+2
1646 && j1 == j0+16
1647 && j0 % 32 == 0
1648 && x0.Uses == 1
1649 && x1.Uses == 1
1650 && s0.Uses == 1
1651 && s1.Uses == 1
1652 && or.Uses == 1
1653 && mergePoint(b,x0,x1,y) != nil
1654 && clobber(x0)
1655 && clobber(x1)
1656 && clobber(s0)
1657 && clobber(s1)
1658 && clobber(or)
1659 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
1660
1661 // Little-endian indexed loads
1662
1663 (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem)
1664 sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
1665 && i1 == i0+1
1666 && x0.Uses == 1
1667 && x1.Uses == 1
1668 && sh.Uses == 1
1669 && mergePoint(b,x0,x1) != nil
1670 && clobber(x0)
1671 && clobber(x1)
1672 && clobber(sh)
1673 -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
1674
1675 (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem)
1676 sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
1677 && i1 == i0+1
1678 && x0.Uses == 1
1679 && x1.Uses == 1
1680 && sh.Uses == 1
1681 && mergePoint(b,x0,x1) != nil
1682 && clobber(x0)
1683 && clobber(x1)
1684 && clobber(sh)
1685 -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
1686
1687 (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem)
1688 sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
1689 && i1 == i0+2
1690 && x0.Uses == 1
1691 && x1.Uses == 1
1692 && sh.Uses == 1
1693 && mergePoint(b,x0,x1) != nil
1694 && clobber(x0)
1695 && clobber(x1)
1696 && clobber(sh)
1697 -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
1698
1699 (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem)
1700 sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
1701 && i1 == i0+2
1702 && x0.Uses == 1
1703 && x1.Uses == 1
1704 && sh.Uses == 1
1705 && mergePoint(b,x0,x1) != nil
1706 && clobber(x0)
1707 && clobber(x1)
1708 && clobber(sh)
1709 -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
1710
1711 (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem)
1712 sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
1713 && i1 == i0+4
1714 && x0.Uses == 1
1715 && x1.Uses == 1
1716 && sh.Uses == 1
1717 && mergePoint(b,x0,x1) != nil
1718 && clobber(x0)
1719 && clobber(x1)
1720 && clobber(sh)
1721 -> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
1722
1723 (ORL
1724 s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
1725 or:(ORL
1726 s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
1727 y))
1728 && i1 == i0+1
1729 && j1 == j0+8
1730 && j0 % 16 == 0
1731 && x0.Uses == 1
1732 && x1.Uses == 1
1733 && s0.Uses == 1
1734 && s1.Uses == 1
1735 && or.Uses == 1
1736 && mergePoint(b,x0,x1,y) != nil
1737 && clobber(x0)
1738 && clobber(x1)
1739 && clobber(s0)
1740 && clobber(s1)
1741 && clobber(or)
1742 -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
1743
1744 (ORQ
1745 s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
1746 or:(ORQ
1747 s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
1748 y))
1749 && i1 == i0+1
1750 && j1 == j0+8
1751 && j0 % 16 == 0
1752 && x0.Uses == 1
1753 && x1.Uses == 1
1754 && s0.Uses == 1
1755 && s1.Uses == 1
1756 && or.Uses == 1
1757 && mergePoint(b,x0,x1,y) != nil
1758 && clobber(x0)
1759 && clobber(x1)
1760 && clobber(s0)
1761 && clobber(s1)
1762 && clobber(or)
1763 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
1764
1765 (ORQ
1766 s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem))
1767 or:(ORQ
1768 s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))
1769 y))
1770 && i1 == i0+2
1771 && j1 == j0+16
1772 && j0 % 32 == 0
1773 && x0.Uses == 1
1774 && x1.Uses == 1
1775 && s0.Uses == 1
1776 && s1.Uses == 1
1777 && or.Uses == 1
1778 && mergePoint(b,x0,x1,y) != nil
1779 && clobber(x0)
1780 && clobber(x1)
1781 && clobber(s0)
1782 && clobber(s1)
1783 && clobber(or)
1784 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
1785
1786 // Big-endian loads
1787
1788 (ORL
1789 x1:(MOVBload [i1] {s} p mem)
1790 sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
1791 && i1 == i0+1
1792 && x0.Uses == 1
1793 && x1.Uses == 1
1794 && sh.Uses == 1
1795 && mergePoint(b,x0,x1) != nil
1796 && clobber(x0)
1797 && clobber(x1)
1798 && clobber(sh)
1799 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
1800
1801 (ORQ
1802 x1:(MOVBload [i1] {s} p mem)
1803 sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
1804 && i1 == i0+1
1805 && x0.Uses == 1
1806 && x1.Uses == 1
1807 && sh.Uses == 1
1808 && mergePoint(b,x0,x1) != nil
1809 && clobber(x0)
1810 && clobber(x1)
1811 && clobber(sh)
1812 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
1813
1814 (ORL
1815 r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
1816 sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
1817 && i1 == i0+2
1818 && x0.Uses == 1
1819 && x1.Uses == 1
1820 && r0.Uses == 1
1821 && r1.Uses == 1
1822 && sh.Uses == 1
1823 && mergePoint(b,x0,x1) != nil
1824 && clobber(x0)
1825 && clobber(x1)
1826 && clobber(r0)
1827 && clobber(r1)
1828 && clobber(sh)
1829 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
1830
1831 (ORQ
1832 r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
1833 sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
1834 && i1 == i0+2
1835 && x0.Uses == 1
1836 && x1.Uses == 1
1837 && r0.Uses == 1
1838 && r1.Uses == 1
1839 && sh.Uses == 1
1840 && mergePoint(b,x0,x1) != nil
1841 && clobber(x0)
1842 && clobber(x1)
1843 && clobber(r0)
1844 && clobber(r1)
1845 && clobber(sh)
1846 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
1847
1848 (ORQ
1849 r1:(BSWAPL x1:(MOVLload [i1] {s} p mem))
1850 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
1851 && i1 == i0+4
1852 && x0.Uses == 1
1853 && x1.Uses == 1
1854 && r0.Uses == 1
1855 && r1.Uses == 1
1856 && sh.Uses == 1
1857 && mergePoint(b,x0,x1) != nil
1858 && clobber(x0)
1859 && clobber(x1)
1860 && clobber(r0)
1861 && clobber(r1)
1862 && clobber(sh)
1863 -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
1864
1865 (ORL
1866 s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))
1867 or:(ORL
1868 s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))
1869 y))
1870 && i1 == i0+1
1871 && j1 == j0-8
1872 && j1 % 16 == 0
1873 && x0.Uses == 1
1874 && x1.Uses == 1
1875 && s0.Uses == 1
1876 && s1.Uses == 1
1877 && or.Uses == 1
1878 && mergePoint(b,x0,x1,y) != nil
1879 && clobber(x0)
1880 && clobber(x1)
1881 && clobber(s0)
1882 && clobber(s1)
1883 && clobber(or)
1884 -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
1885
1886 (ORQ
1887 s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))
1888 or:(ORQ
1889 s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))
1890 y))
1891 && i1 == i0+1
1892 && j1 == j0-8
1893 && j1 % 16 == 0
1894 && x0.Uses == 1
1895 && x1.Uses == 1
1896 && s0.Uses == 1
1897 && s1.Uses == 1
1898 && or.Uses == 1
1899 && mergePoint(b,x0,x1,y) != nil
1900 && clobber(x0)
1901 && clobber(x1)
1902 && clobber(s0)
1903 && clobber(s1)
1904 && clobber(or)
1905 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
1906
1907 (ORQ
1908 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))
1909 or:(ORQ
1910 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
1911 y))
1912 && i1 == i0+2
1913 && j1 == j0-16
1914 && j1 % 32 == 0
1915 && x0.Uses == 1
1916 && x1.Uses == 1
1917 && r0.Uses == 1
1918 && r1.Uses == 1
1919 && s0.Uses == 1
1920 && s1.Uses == 1
1921 && or.Uses == 1
1922 && mergePoint(b,x0,x1,y) != nil
1923 && clobber(x0)
1924 && clobber(x1)
1925 && clobber(r0)
1926 && clobber(r1)
1927 && clobber(s0)
1928 && clobber(s1)
1929 && clobber(or)
1930 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
1931
1932 // Big-endian indexed loads
1933
1934 (ORL
1935 x1:(MOVBloadidx1 [i1] {s} p idx mem)
1936 sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
1937 && i1 == i0+1
1938 && x0.Uses == 1
1939 && x1.Uses == 1
1940 && sh.Uses == 1
1941 && mergePoint(b,x0,x1) != nil
1942 && clobber(x0)
1943 && clobber(x1)
1944 && clobber(sh)
1945 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
1946
1947 (ORQ
1948 x1:(MOVBloadidx1 [i1] {s} p idx mem)
1949 sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
1950 && i1 == i0+1
1951 && x0.Uses == 1
1952 && x1.Uses == 1
1953 && sh.Uses == 1
1954 && mergePoint(b,x0,x1) != nil
1955 && clobber(x0)
1956 && clobber(x1)
1957 && clobber(sh)
1958 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
1959
1960 (ORL
1961 r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
1962 sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
1963 && i1 == i0+2
1964 && x0.Uses == 1
1965 && x1.Uses == 1
1966 && r0.Uses == 1
1967 && r1.Uses == 1
1968 && sh.Uses == 1
1969 && mergePoint(b,x0,x1) != nil
1970 && clobber(x0)
1971 && clobber(x1)
1972 && clobber(r0)
1973 && clobber(r1)
1974 && clobber(sh)
1975 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
1976
1977 (ORQ
1978 r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))
1979 sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
1980 && i1 == i0+2
1981 && x0.Uses == 1
1982 && x1.Uses == 1
1983 && r0.Uses == 1
1984 && r1.Uses == 1
1985 && sh.Uses == 1
1986 && mergePoint(b,x0,x1) != nil
1987 && clobber(x0)
1988 && clobber(x1)
1989 && clobber(r0)
1990 && clobber(r1)
1991 && clobber(sh)
1992 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
1993
1994 (ORQ
1995 r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem))
1996 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
1997 && i1 == i0+4
1998 && x0.Uses == 1
1999 && x1.Uses == 1
2000 && r0.Uses == 1
2001 && r1.Uses == 1
2002 && sh.Uses == 1
2003 && mergePoint(b,x0,x1) != nil
2004 && clobber(x0)
2005 && clobber(x1)
2006 && clobber(r0)
2007 && clobber(r1)
2008 && clobber(sh)
2009 -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
2010
2011 (ORL
2012 s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
2013 or:(ORL
2014 s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
2015 y))
2016 && i1 == i0+1
2017 && j1 == j0-8
2018 && j1 % 16 == 0
2019 && x0.Uses == 1
2020 && x1.Uses == 1
2021 && s0.Uses == 1
2022 && s1.Uses == 1
2023 && or.Uses == 1
2024 && mergePoint(b,x0,x1,y) != nil
2025 && clobber(x0)
2026 && clobber(x1)
2027 && clobber(s0)
2028 && clobber(s1)
2029 && clobber(or)
2030 -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
2031
2032 (ORQ
2033 s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))
2034 or:(ORQ
2035 s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))
2036 y))
2037 && i1 == i0+1
2038 && j1 == j0-8
2039 && j1 % 16 == 0
2040 && x0.Uses == 1
2041 && x1.Uses == 1
2042 && s0.Uses == 1
2043 && s1.Uses == 1
2044 && or.Uses == 1
2045 && mergePoint(b,x0,x1,y) != nil
2046 && clobber(x0)
2047 && clobber(x1)
2048 && clobber(s0)
2049 && clobber(s1)
2050 && clobber(or)
2051 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
2052
2053 (ORQ
2054 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))
2055 or:(ORQ
2056 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
2057 y))
2058 && i1 == i0+2
2059 && j1 == j0-16
2060 && j1 % 32 == 0
2061 && x0.Uses == 1
2062 && x1.Uses == 1
2063 && r0.Uses == 1
2064 && r1.Uses == 1
2065 && s0.Uses == 1
2066 && s1.Uses == 1
2067 && or.Uses == 1
2068 && mergePoint(b,x0,x1,y) != nil
2069 && clobber(x0)
2070 && clobber(x1)
2071 && clobber(r0)
2072 && clobber(r1)
2073 && clobber(s0)
2074 && clobber(s1)
2075 && clobber(or)
2076 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
2077
2078 // Combine 2 byte stores + shift into rolw 8 + word store
2079 (MOVBstore [i] {s} p w
2080 x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
2081 && x0.Uses == 1
2082 && clobber(x0)
2083 -> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
2084
2085 (MOVBstoreidx1 [i] {s} p idx w
2086 x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
2087 && x0.Uses == 1
2088 && clobber(x0)
2089 -> (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
2090
2091 // Combine stores + shifts into bswap and larger (unaligned) stores
2092 (MOVBstore [i] {s} p w
2093 x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)
2094 x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)
2095 x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
2096 && x0.Uses == 1
2097 && x1.Uses == 1
2098 && x2.Uses == 1
2099 && clobber(x0)
2100 && clobber(x1)
2101 && clobber(x2)
2102 -> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
2103
2104 (MOVBstoreidx1 [i] {s} p idx w
2105 x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w)
2106 x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w)
2107 x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
2108 && x0.Uses == 1
2109 && x1.Uses == 1
2110 && x2.Uses == 1
2111 && clobber(x0)
2112 && clobber(x1)
2113 && clobber(x2)
2114 -> (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
2115
2116 (MOVBstore [i] {s} p w
2117 x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)
2118 x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)
2119 x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)
2120 x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)
2121 x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)
2122 x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)
2123 x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
2124 && x0.Uses == 1
2125 && x1.Uses == 1
2126 && x2.Uses == 1
2127 && x3.Uses == 1
2128 && x4.Uses == 1
2129 && x5.Uses == 1
2130 && x6.Uses == 1
2131 && clobber(x0)
2132 && clobber(x1)
2133 && clobber(x2)
2134 && clobber(x3)
2135 && clobber(x4)
2136 && clobber(x5)
2137 && clobber(x6)
2138 -> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
2139
2140 (MOVBstoreidx1 [i] {s} p idx w
2141 x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w)
2142 x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w)
2143 x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w)
2144 x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w)
2145 x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w)
2146 x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w)
2147 x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
2148 && x0.Uses == 1
2149 && x1.Uses == 1
2150 && x2.Uses == 1
2151 && x3.Uses == 1
2152 && x4.Uses == 1
2153 && x5.Uses == 1
2154 && x6.Uses == 1
2155 && clobber(x0)
2156 && clobber(x1)
2157 && clobber(x2)
2158 && clobber(x3)
2159 && clobber(x4)
2160 && clobber(x5)
2161 && clobber(x6)
2162 -> (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
2163
2164 // Combine constant stores into larger (unaligned) stores.
2165 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
2166 && x.Uses == 1
2167 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
2168 && clobber(x)
2169 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
2170 (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
2171 && x.Uses == 1
2172 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
2173 && clobber(x)
2174 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
2175 (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
2176 && x.Uses == 1
2177 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
2178 && clobber(x)
2179 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
2180 (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
2181 && x.Uses == 1
2182 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
2183 && clobber(x)
2184 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
2185 (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
2186 && x.Uses == 1
2187 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
2188 && clobber(x)
2189 -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
2190 (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem))
2191 && x.Uses == 1
2192 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
2193 && clobber(x)
2194 -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
2195 (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
2196 && config.useSSE
2197 && x.Uses == 1
2198 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off()
2199 && ValAndOff(c).Val() == 0
2200 && ValAndOff(c2).Val() == 0
2201 && clobber(x)
2202 -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
2203
2204 (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
2205 && x.Uses == 1
2206 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
2207 && clobber(x)
2208 -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
2209 (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
2210 && x.Uses == 1
2211 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
2212 && clobber(x)
2213 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
2214 (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
2215 && x.Uses == 1
2216 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
2217 && clobber(x)
2218 -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
2219
2220 (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
2221 && x.Uses == 1
2222 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
2223 && clobber(x)
2224 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem)
2225 (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem))
2226 && x.Uses == 1
2227 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
2228 && clobber(x)
2229 -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
2230
2231 // Combine stores into larger (unaligned) stores.
2232 (MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
2233 && x.Uses == 1
2234 && clobber(x)
2235 -> (MOVWstore [i-1] {s} p w mem)
2236 (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem))
2237 && x.Uses == 1
2238 && clobber(x)
2239 -> (MOVWstore [i] {s} p w mem)
2240 (MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem))
2241 && x.Uses == 1
2242 && clobber(x)
2243 -> (MOVWstore [i-1] {s} p w0 mem)
2244 (MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem))
2245 && x.Uses == 1
2246 && clobber(x)
2247 -> (MOVLstore [i-2] {s} p w mem)
2248 (MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem))
2249 && x.Uses == 1
2250 && clobber(x)
2251 -> (MOVLstore [i-2] {s} p w0 mem)
2252 (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
2253 && x.Uses == 1
2254 && clobber(x)
2255 -> (MOVQstore [i-4] {s} p w mem)
2256 (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
2257 && x.Uses == 1
2258 && clobber(x)
2259 -> (MOVQstore [i-4] {s} p w0 mem)
2260
2261 (MOVBstoreidx1 [i] {s} p idx (SHR(W|L|Q)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
2262 && x.Uses == 1
2263 && clobber(x)
2264 -> (MOVWstoreidx1 [i-1] {s} p idx w mem)
2265 (MOVBstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHR(L|Q)const [j-8] w) mem))
2266 && x.Uses == 1
2267 && clobber(x)
2268 -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
2269 (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
2270 && x.Uses == 1
2271 && clobber(x)
2272 -> (MOVLstoreidx1 [i-2] {s} p idx w mem)
2273 (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHR(L|Q)const [j-16] w) mem))
2274 && x.Uses == 1
2275 && clobber(x)
2276 -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
2277 (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
2278 && x.Uses == 1
2279 && clobber(x)
2280 -> (MOVQstoreidx1 [i-4] {s} p idx w mem)
2281 (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
2282 && x.Uses == 1
2283 && clobber(x)
2284 -> (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
2285
2286 (MOVWstoreidx2 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
2287 && x.Uses == 1
2288 && clobber(x)
2289 -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
2290 (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
2291 && x.Uses == 1
2292 && clobber(x)
2293 -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
2294 (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem))
2295 && x.Uses == 1
2296 && clobber(x)
2297 -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem)
2298 (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
2299 && x.Uses == 1
2300 && clobber(x)
2301 -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
2302
2303 (MOVBstore [i] {s} p
2304 x1:(MOVBload [j] {s2} p2 mem)
2305 mem2:(MOVBstore [i-1] {s} p
2306 x2:(MOVBload [j-1] {s2} p2 mem) mem))
2307 && x1.Uses == 1
2308 && x2.Uses == 1
2309 && mem2.Uses == 1
2310 && clobber(x1)
2311 && clobber(x2)
2312 && clobber(mem2)
2313 -> (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem)
2314
2315 (MOVWstore [i] {s} p
2316 x1:(MOVWload [j] {s2} p2 mem)
2317 mem2:(MOVWstore [i-2] {s} p
2318 x2:(MOVWload [j-2] {s2} p2 mem) mem))
2319 && x1.Uses == 1
2320 && x2.Uses == 1
2321 && mem2.Uses == 1
2322 && clobber(x1)
2323 && clobber(x2)
2324 && clobber(mem2)
2325 -> (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
2326
2327 (MOVLstore [i] {s} p
2328 x1:(MOVLload [j] {s2} p2 mem)
2329 mem2:(MOVLstore [i-4] {s} p
2330 x2:(MOVLload [j-4] {s2} p2 mem) mem))
2331 && x1.Uses == 1
2332 && x2.Uses == 1
2333 && mem2.Uses == 1
2334 && clobber(x1)
2335 && clobber(x2)
2336 && clobber(mem2)
2337 -> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
2338
2339 (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
2340 (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
2341 (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
2342 (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem)
2343 (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
2344 (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
2345 (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
2346 (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
2347
2348 (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
2349 (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
2350 (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
2351 (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
2352 (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
2353 (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
2354 (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
2355 (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
2356
2357 (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
2358 (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
2359 (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
2360 (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
2361 (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
2362 (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
2363 (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
2364 (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
2365
2366 (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQload [off1+off2] {sym} ptr mem)
2367 (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVLload [off1+off2] {sym} ptr mem)
2368 (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVWload [off1+off2] {sym} ptr mem)
2369 (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem)
2370 (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVQstore [off1+off2] {sym} ptr val mem)
2371 (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore [off1+off2] {sym} ptr val mem)
2372 (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem)
2373 (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem)
2374 (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
2375 (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
2376 (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
2377 (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
2378 (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
2379 (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
2380 (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
2381 (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
2382
2383 // Merge load and op
2384 // TODO: add indexed variants?
2385 ((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem)
2386 ((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem)
2387 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
2388 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
2389 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
2390 (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
2391 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem)
2392 (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
2393 (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
2394 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem)
2395
2396 // Merge ADDQconst and LEAQ into atomic loads.
2397 (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
2398 (MOV(Q|L|B)atomicload [off1+off2] {sym} ptr mem)
2399 (MOV(Q|L|B)atomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
2400 (MOV(Q|L|B)atomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
2401
2402 // Merge ADDQconst and LEAQ into atomic stores.
2403 (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
2404 (XCHGQ [off1+off2] {sym} val ptr mem)
2405 (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
2406 (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
2407 (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
2408 (XCHGL [off1+off2] {sym} val ptr mem)
2409 (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
2410 (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
2411
2412 // Merge ADDQconst into atomic adds.
2413 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions.
2414 (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
2415 (XADDQlock [off1+off2] {sym} val ptr mem)
2416 (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
2417 (XADDLlock [off1+off2] {sym} val ptr mem)
2418
2419 // Merge ADDQconst into atomic compare and swaps.
2420 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions.
2421 (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) ->
2422 (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem)
2423 (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) ->
2424 (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem)
2425
2426 // We don't need the conditional move if we know the arg of BSF is not zero.
2427 (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _)))) && c != 0 -> x
2428 // Extension is unnecessary for trailing zeros.
2429 (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst <t> [1<<8] x))
2430 (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst <t> [1<<16] x))
2431
2432 // Simplify indexed loads/stores
2433 (MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVBstore [i+c] {s} p w mem)
2434 (MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVWstore [i+c] {s} p w mem)
2435 (MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVLstore [i+c] {s} p w mem)
2436 (MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVQstore [i+c] {s} p w mem)
2437 (MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+2*c) -> (MOVWstore [i+2*c] {s} p w mem)
2438 (MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVLstore [i+4*c] {s} p w mem)
2439 (MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVLstore [i+8*c] {s} p w mem)
2440 (MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVQstore [i+8*c] {s} p w mem)
2441 (MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSSstore [i+c] {s} p w mem)
2442 (MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVSSstore [i+4*c] {s} p w mem)
2443 (MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSDstore [i+c] {s} p w mem)
2444 (MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVSDstore [i+8*c] {s} p w mem)
2445 (MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVBload [i+c] {s} p mem)
2446 (MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVWload [i+c] {s} p mem)
2447 (MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVLload [i+c] {s} p mem)
2448 (MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVQload [i+c] {s} p mem)
2449 (MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+2*c) -> (MOVWload [i+2*c] {s} p mem)
2450 (MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVLload [i+4*c] {s} p mem)
2451 (MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVLload [i+8*c] {s} p mem)
2452 (MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVQload [i+8*c] {s} p mem)
2453 (MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSSload [i+c] {s} p mem)
2454 (MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVSSload [i+4*c] {s} p mem)
2455 (MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSDload [i+c] {s} p mem)
2456 (MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVSDload [i+8*c] {s} p mem)
2457
2458 // Redundant sign/zero extensions
2459 // Note: see issue 21963. We have to make sure we use the right type on
2460 // the resulting extension (the outer type, not the inner type).
2461 (MOVLQSX (MOVLQSX x)) -> (MOVLQSX x)
2462 (MOVLQSX (MOVWQSX x)) -> (MOVWQSX x)
2463 (MOVLQSX (MOVBQSX x)) -> (MOVBQSX x)
2464 (MOVWQSX (MOVWQSX x)) -> (MOVWQSX x)
2465 (MOVWQSX (MOVBQSX x)) -> (MOVBQSX x)
2466 (MOVBQSX (MOVBQSX x)) -> (MOVBQSX x)
2467 (MOVLQZX (MOVLQZX x)) -> (MOVLQZX x)
2468 (MOVLQZX (MOVWQZX x)) -> (MOVWQZX x)
2469 (MOVLQZX (MOVBQZX x)) -> (MOVBQZX x)
2470 (MOVWQZX (MOVWQZX x)) -> (MOVWQZX x)
2471 (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x)
2472 (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x)
2473
2474 (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
2475 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) ->
2476 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
2477 (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
2478 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) ->
2479 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
2480
2481 // float <-> int register moves, with no conversion.
2482 // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}.
2483 (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) -> (MOVQf2i val)
2484 (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) -> (MOVLf2i val)
2485 (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) -> (MOVQi2f val)
2486 (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) -> (MOVLi2f val)
2487
2488 // Other load-like ops.
2489 (ADDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ADDQ x (MOVQf2i y))
2490 (ADDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ADDL x (MOVLf2i y))
2491 (SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (SUBQ x (MOVQf2i y))
2492 (SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (SUBL x (MOVLf2i y))
2493 (ANDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ANDQ x (MOVQf2i y))
2494 (ANDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ANDL x (MOVLf2i y))
2495 ( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> ( ORQ x (MOVQf2i y))
2496 ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> ( ORL x (MOVLf2i y))
2497 (XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (XORQ x (MOVQf2i y))
2498 (XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (XORL x (MOVLf2i y))
2499
2500 (ADDSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (ADDSD x (MOVQi2f y))
2501 (ADDSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (ADDSS x (MOVLi2f y))
2502 (SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (SUBSD x (MOVQi2f y))
2503 (SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (SUBSS x (MOVLi2f y))
2504 (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (MULSD x (MOVQi2f y))
2505 (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (MULSS x (MOVLi2f y))
2506
2507 // Redirect stores to use the other register set.
2508 (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) -> (MOVSDstore [off] {sym} ptr val mem)
2509 (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) -> (MOVSSstore [off] {sym} ptr val mem)
2510 (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) -> (MOVQstore [off] {sym} ptr val mem)
2511 (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) -> (MOVLstore [off] {sym} ptr val mem)
2512
2513 // Load args directly into the register class where it will be used.
2514 // We do this by just modifying the type of the Arg.
2515 (MOVQf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym})
2516 (MOVLf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym})
2517 (MOVQi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym})
2518 (MOVLi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym})
2519
2520 // LEAQ is rematerializeable, so this helps to avoid register spill.
2521 // See issue 22947 for details
2522 (ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x)
2523
2524 // HMULx is commutative, but its first argument must go in AX.
2525 // If possible, put a rematerializeable value in the first argument slot,
2526 // to reduce the odds that another value will be have to spilled
2527 // specifically to free up AX.
2528 (HMUL(Q|L) x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L) y x)
2529 (HMUL(Q|L)U x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L)U y x)
2530
2531 // Fold loads into compares
2532 // Note: these may be undone by the flagalloc pass.
2533 (CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) -> (CMP(Q|L|W|B)load {sym} [off] ptr x mem)
2534 (CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) -> (InvertFlags (CMP(Q|L|W|B)load {sym} [off] ptr x mem))
2535
2536 (CMP(Q|L|W|B)const l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) [c])
2537 && l.Uses == 1
2538 && validValAndOff(c, off)
2539 && clobber(l) ->
2540 @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(c,off)] ptr mem)
2541
2542 (CMPQload {sym} [off] ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> (CMPQconstload {sym} [makeValAndOff(c,off)] ptr mem)
2543 (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(c,off) -> (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem)
2544 (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)),off) -> (CMPWconstload {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
2545 (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),off) -> (CMPBconstload {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
2546
2547 (TEST(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) l2)
2548 && l == l2
2549 && l.Uses == 2
2550 && validValAndOff(0,off)
2551 && clobber(l) ->
2552 @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(0,off)] ptr mem)
2553
2554 (MOVBload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read8(sym, off))])
2555 (MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.BigEndian))])
2556 (MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read32(sym, off, config.BigEndian))])
2557 (MOVQload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read64(sym, off, config.BigEndian))])
View as plain text