Text file src/cmd/compile/internal/ssa/gen/386.rules
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Lowering arithmetic
6 (Add(Ptr|32|16|8) x y) -> (ADDL x y)
7 (Add(32|64)F x y) -> (ADDS(S|D) x y)
8 (Add32carry x y) -> (ADDLcarry x y)
9 (Add32withcarry x y c) -> (ADCL x y c)
10
11 (Sub(Ptr|32|16|8) x y) -> (SUBL x y)
12 (Sub(32|64)F x y) -> (SUBS(S|D) x y)
13 (Sub32carry x y) -> (SUBLcarry x y)
14 (Sub32withcarry x y c) -> (SBBL x y c)
15
16 (Mul(32|16|8) x y) -> (MULL x y)
17 (Mul(32|64)F x y) -> (MULS(S|D) x y)
18 (Mul32uhilo x y) -> (MULLQU x y)
19
20 (Select0 (Mul32uover x y)) -> (Select0 <typ.UInt32> (MULLU x y))
21 (Select1 (Mul32uover x y)) -> (SETO (Select1 <types.TypeFlags> (MULLU x y)))
22
23 (Avg32u x y) -> (AVGLU x y)
24
25 (Div32F x y) -> (DIVSS x y)
26 (Div64F x y) -> (DIVSD x y)
27
28 (Div32 [a] x y) -> (DIVL [a] x y)
29 (Div32u x y) -> (DIVLU x y)
30 (Div16 [a] x y) -> (DIVW [a] x y)
31 (Div16u x y) -> (DIVWU x y)
32 (Div8 x y) -> (DIVW (SignExt8to16 x) (SignExt8to16 y))
33 (Div8u x y) -> (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))
34
35 (Hmul32 x y) -> (HMULL x y)
36 (Hmul32u x y) -> (HMULLU x y)
37
38 (Mod32 [a] x y) -> (MODL [a] x y)
39 (Mod32u x y) -> (MODLU x y)
40 (Mod16 [a] x y) -> (MODW [a] x y)
41 (Mod16u x y) -> (MODWU x y)
42 (Mod8 x y) -> (MODW (SignExt8to16 x) (SignExt8to16 y))
43 (Mod8u x y) -> (MODWU (ZeroExt8to16 x) (ZeroExt8to16 y))
44
45 (And(32|16|8) x y) -> (ANDL x y)
46 (Or(32|16|8) x y) -> (ORL x y)
47 (Xor(32|16|8) x y) -> (XORL x y)
48
49 (Neg(32|16|8) x) -> (NEGL x)
50 (Neg32F x) && !config.use387 -> (PXOR x (MOVSSconst <typ.Float32> [auxFrom32F(float32(math.Copysign(0, -1)))]))
51 (Neg64F x) && !config.use387 -> (PXOR x (MOVSDconst <typ.Float64> [auxFrom64F(math.Copysign(0, -1))]))
52 (Neg32F x) && config.use387 -> (FCHS x)
53 (Neg64F x) && config.use387 -> (FCHS x)
54
55 (Com(32|16|8) x) -> (NOTL x)
56
57 // Lowering boolean ops
58 (AndB x y) -> (ANDL x y)
59 (OrB x y) -> (ORL x y)
60 (Not x) -> (XORLconst [1] x)
61
62 // Lowering pointer arithmetic
63 (OffPtr [off] ptr) -> (ADDLconst [off] ptr)
64
65 (Bswap32 x) -> (BSWAPL x)
66
67 (Sqrt x) -> (SQRTSD x)
68
69 // Lowering extension
70 (SignExt8to16 x) -> (MOVBLSX x)
71 (SignExt8to32 x) -> (MOVBLSX x)
72 (SignExt16to32 x) -> (MOVWLSX x)
73
74 (ZeroExt8to16 x) -> (MOVBLZX x)
75 (ZeroExt8to32 x) -> (MOVBLZX x)
76 (ZeroExt16to32 x) -> (MOVWLZX x)
77
78 (Signmask x) -> (SARLconst x [31])
79 (Zeromask <t> x) -> (XORLconst [-1] (SBBLcarrymask <t> (CMPLconst x [1])))
80 (Slicemask <t> x) -> (SARLconst (NEGL <t> x) [31])
81
82 // Lowering truncation
83 // Because we ignore high parts of registers, truncates are just copies.
84 (Trunc16to8 x) -> x
85 (Trunc32to8 x) -> x
86 (Trunc32to16 x) -> x
87
88 // Lowering float <-> int
89 (Cvt32to32F x) -> (CVTSL2SS x)
90 (Cvt32to64F x) -> (CVTSL2SD x)
91
92 (Cvt32Fto32 x) -> (CVTTSS2SL x)
93 (Cvt64Fto32 x) -> (CVTTSD2SL x)
94
95 (Cvt32Fto64F x) -> (CVTSS2SD x)
96 (Cvt64Fto32F x) -> (CVTSD2SS x)
97
98 (Round32F x) -> x
99 (Round64F x) -> x
100
101 // Lowering shifts
102 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
103 // result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff)
104 (Lsh32x32 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
105 (Lsh32x16 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
106 (Lsh32x8 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
107
108 (Lsh16x32 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
109 (Lsh16x16 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
110 (Lsh16x8 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
111
112 (Lsh8x32 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
113 (Lsh8x16 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
114 (Lsh8x8 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
115
116 (Rsh32Ux32 <t> x y) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
117 (Rsh32Ux16 <t> x y) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
118 (Rsh32Ux8 <t> x y) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
119
120 (Rsh16Ux32 <t> x y) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16])))
121 (Rsh16Ux16 <t> x y) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16])))
122 (Rsh16Ux8 <t> x y) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16])))
123
124 (Rsh8Ux32 <t> x y) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8])))
125 (Rsh8Ux16 <t> x y) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8])))
126 (Rsh8Ux8 <t> x y) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8])))
127
128 // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value.
129 // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width.
130
131 (Rsh32x32 <t> x y) -> (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [32])))))
132 (Rsh32x16 <t> x y) -> (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32])))))
133 (Rsh32x8 <t> x y) -> (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32])))))
134
135 (Rsh16x32 <t> x y) -> (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [16])))))
136 (Rsh16x16 <t> x y) -> (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16])))))
137 (Rsh16x8 <t> x y) -> (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16])))))
138
139 (Rsh8x32 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [8])))))
140 (Rsh8x16 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8])))))
141 (Rsh8x8 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
142
143 // constant shifts
144 // generic opt rewrites all constant shifts to shift by Const64
145 (Lsh32x64 x (Const64 [c])) && uint64(c) < 32 -> (SHLLconst x [c])
146 (Rsh32x64 x (Const64 [c])) && uint64(c) < 32 -> (SARLconst x [c])
147 (Rsh32Ux64 x (Const64 [c])) && uint64(c) < 32 -> (SHRLconst x [c])
148 (Lsh16x64 x (Const64 [c])) && uint64(c) < 16 -> (SHLLconst x [c])
149 (Rsh16x64 x (Const64 [c])) && uint64(c) < 16 -> (SARWconst x [c])
150 (Rsh16Ux64 x (Const64 [c])) && uint64(c) < 16 -> (SHRWconst x [c])
151 (Lsh8x64 x (Const64 [c])) && uint64(c) < 8 -> (SHLLconst x [c])
152 (Rsh8x64 x (Const64 [c])) && uint64(c) < 8 -> (SARBconst x [c])
153 (Rsh8Ux64 x (Const64 [c])) && uint64(c) < 8 -> (SHRBconst x [c])
154
155 // large constant shifts
156 (Lsh32x64 _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
157 (Rsh32Ux64 _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
158 (Lsh16x64 _ (Const64 [c])) && uint64(c) >= 16 -> (Const16 [0])
159 (Rsh16Ux64 _ (Const64 [c])) && uint64(c) >= 16 -> (Const16 [0])
160 (Lsh8x64 _ (Const64 [c])) && uint64(c) >= 8 -> (Const8 [0])
161 (Rsh8Ux64 _ (Const64 [c])) && uint64(c) >= 8 -> (Const8 [0])
162
163 // large constant signed right shift, we leave the sign bit
164 (Rsh32x64 x (Const64 [c])) && uint64(c) >= 32 -> (SARLconst x [31])
165 (Rsh16x64 x (Const64 [c])) && uint64(c) >= 16 -> (SARWconst x [15])
166 (Rsh8x64 x (Const64 [c])) && uint64(c) >= 8 -> (SARBconst x [7])
167
168 // constant rotates
169 (RotateLeft32 x (MOVLconst [c])) -> (ROLLconst [c&31] x)
170 (RotateLeft16 x (MOVLconst [c])) -> (ROLWconst [c&15] x)
171 (RotateLeft8 x (MOVLconst [c])) -> (ROLBconst [c&7] x)
172
173 // Lowering comparisons
174 (Less32 x y) -> (SETL (CMPL x y))
175 (Less16 x y) -> (SETL (CMPW x y))
176 (Less8 x y) -> (SETL (CMPB x y))
177 (Less32U x y) -> (SETB (CMPL x y))
178 (Less16U x y) -> (SETB (CMPW x y))
179 (Less8U x y) -> (SETB (CMPB x y))
180 // Use SETGF with reversed operands to dodge NaN case
181 (Less64F x y) -> (SETGF (UCOMISD y x))
182 (Less32F x y) -> (SETGF (UCOMISS y x))
183
184 (Leq32 x y) -> (SETLE (CMPL x y))
185 (Leq16 x y) -> (SETLE (CMPW x y))
186 (Leq8 x y) -> (SETLE (CMPB x y))
187 (Leq32U x y) -> (SETBE (CMPL x y))
188 (Leq16U x y) -> (SETBE (CMPW x y))
189 (Leq8U x y) -> (SETBE (CMPB x y))
190 // Use SETGEF with reversed operands to dodge NaN case
191 (Leq64F x y) -> (SETGEF (UCOMISD y x))
192 (Leq32F x y) -> (SETGEF (UCOMISS y x))
193
194 (Greater32 x y) -> (SETG (CMPL x y))
195 (Greater16 x y) -> (SETG (CMPW x y))
196 (Greater8 x y) -> (SETG (CMPB x y))
197 (Greater32U x y) -> (SETA (CMPL x y))
198 (Greater16U x y) -> (SETA (CMPW x y))
199 (Greater8U x y) -> (SETA (CMPB x y))
200 // Note Go assembler gets UCOMISx operand order wrong, but it is right here
201 // Bug is accommodated at generation of assembly language.
202 (Greater64F x y) -> (SETGF (UCOMISD x y))
203 (Greater32F x y) -> (SETGF (UCOMISS x y))
204
205 (Geq32 x y) -> (SETGE (CMPL x y))
206 (Geq16 x y) -> (SETGE (CMPW x y))
207 (Geq8 x y) -> (SETGE (CMPB x y))
208 (Geq32U x y) -> (SETAE (CMPL x y))
209 (Geq16U x y) -> (SETAE (CMPW x y))
210 (Geq8U x y) -> (SETAE (CMPB x y))
211 // Note Go assembler gets UCOMISx operand order wrong, but it is right here
212 // Bug is accommodated at generation of assembly language.
213 (Geq64F x y) -> (SETGEF (UCOMISD x y))
214 (Geq32F x y) -> (SETGEF (UCOMISS x y))
215
216 (Eq32 x y) -> (SETEQ (CMPL x y))
217 (Eq16 x y) -> (SETEQ (CMPW x y))
218 (Eq8 x y) -> (SETEQ (CMPB x y))
219 (EqB x y) -> (SETEQ (CMPB x y))
220 (EqPtr x y) -> (SETEQ (CMPL x y))
221 (Eq64F x y) -> (SETEQF (UCOMISD x y))
222 (Eq32F x y) -> (SETEQF (UCOMISS x y))
223
224 (Neq32 x y) -> (SETNE (CMPL x y))
225 (Neq16 x y) -> (SETNE (CMPW x y))
226 (Neq8 x y) -> (SETNE (CMPB x y))
227 (NeqB x y) -> (SETNE (CMPB x y))
228 (NeqPtr x y) -> (SETNE (CMPL x y))
229 (Neq64F x y) -> (SETNEF (UCOMISD x y))
230 (Neq32F x y) -> (SETNEF (UCOMISS x y))
231
232 // Lowering loads
233 (Load <t> ptr mem) && (is32BitInt(t) || isPtr(t)) -> (MOVLload ptr mem)
234 (Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem)
235 (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem)
236 (Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem)
237 (Load <t> ptr mem) && is64BitFloat(t) -> (MOVSDload ptr mem)
238
239 // Lowering stores
240 // These more-specific FP versions of Store pattern should come first.
241 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (MOVSDstore ptr val mem)
242 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (MOVSSstore ptr val mem)
243
244 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVLstore ptr val mem)
245 (Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVWstore ptr val mem)
246 (Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
247
248 // Lowering moves
249 (Move [0] _ _ mem) -> mem
250 (Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem)
251 (Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem)
252 (Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem)
253 (Move [3] dst src mem) ->
254 (MOVBstore [2] dst (MOVBload [2] src mem)
255 (MOVWstore dst (MOVWload src mem) mem))
256 (Move [5] dst src mem) ->
257 (MOVBstore [4] dst (MOVBload [4] src mem)
258 (MOVLstore dst (MOVLload src mem) mem))
259 (Move [6] dst src mem) ->
260 (MOVWstore [4] dst (MOVWload [4] src mem)
261 (MOVLstore dst (MOVLload src mem) mem))
262 (Move [7] dst src mem) ->
263 (MOVLstore [3] dst (MOVLload [3] src mem)
264 (MOVLstore dst (MOVLload src mem) mem))
265 (Move [8] dst src mem) ->
266 (MOVLstore [4] dst (MOVLload [4] src mem)
267 (MOVLstore dst (MOVLload src mem) mem))
268
269 // Adjust moves to be a multiple of 4 bytes.
270 (Move [s] dst src mem)
271 && s > 8 && s%4 != 0 ->
272 (Move [s-s%4]
273 (ADDLconst <dst.Type> dst [s%4])
274 (ADDLconst <src.Type> src [s%4])
275 (MOVLstore dst (MOVLload src mem) mem))
276
277 // Medium copying uses a duff device.
278 (Move [s] dst src mem)
279 && s > 8 && s <= 4*128 && s%4 == 0
280 && !config.noDuffDevice ->
281 (DUFFCOPY [10*(128-s/4)] dst src mem)
282 // 10 and 128 are magic constants. 10 is the number of bytes to encode:
283 // MOVL (SI), CX
284 // ADDL $4, SI
285 // MOVL CX, (DI)
286 // ADDL $4, DI
287 // and 128 is the number of such blocks. See src/runtime/duff_386.s:duffcopy.
288
289 // Large copying uses REP MOVSL.
290 (Move [s] dst src mem) && (s > 4*128 || config.noDuffDevice) && s%4 == 0 ->
291 (REPMOVSL dst src (MOVLconst [s/4]) mem)
292
293 // Lowering Zero instructions
294 (Zero [0] _ mem) -> mem
295 (Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
296 (Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem)
297 (Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem)
298
299 (Zero [3] destptr mem) ->
300 (MOVBstoreconst [makeValAndOff(0,2)] destptr
301 (MOVWstoreconst [0] destptr mem))
302 (Zero [5] destptr mem) ->
303 (MOVBstoreconst [makeValAndOff(0,4)] destptr
304 (MOVLstoreconst [0] destptr mem))
305 (Zero [6] destptr mem) ->
306 (MOVWstoreconst [makeValAndOff(0,4)] destptr
307 (MOVLstoreconst [0] destptr mem))
308 (Zero [7] destptr mem) ->
309 (MOVLstoreconst [makeValAndOff(0,3)] destptr
310 (MOVLstoreconst [0] destptr mem))
311
312 // Strip off any fractional word zeroing.
313 (Zero [s] destptr mem) && s%4 != 0 && s > 4 ->
314 (Zero [s-s%4] (ADDLconst destptr [s%4])
315 (MOVLstoreconst [0] destptr mem))
316
317 // Zero small numbers of words directly.
318 (Zero [8] destptr mem) ->
319 (MOVLstoreconst [makeValAndOff(0,4)] destptr
320 (MOVLstoreconst [0] destptr mem))
321 (Zero [12] destptr mem) ->
322 (MOVLstoreconst [makeValAndOff(0,8)] destptr
323 (MOVLstoreconst [makeValAndOff(0,4)] destptr
324 (MOVLstoreconst [0] destptr mem)))
325 (Zero [16] destptr mem) ->
326 (MOVLstoreconst [makeValAndOff(0,12)] destptr
327 (MOVLstoreconst [makeValAndOff(0,8)] destptr
328 (MOVLstoreconst [makeValAndOff(0,4)] destptr
329 (MOVLstoreconst [0] destptr mem))))
330
331 // Medium zeroing uses a duff device.
332 (Zero [s] destptr mem)
333 && s > 16 && s <= 4*128 && s%4 == 0
334 && !config.noDuffDevice ->
335 (DUFFZERO [1*(128-s/4)] destptr (MOVLconst [0]) mem)
336 // 1 and 128 are magic constants. 1 is the number of bytes to encode STOSL.
337 // 128 is the number of STOSL instructions in duffzero.
338 // See src/runtime/duff_386.s:duffzero.
339
340 // Large zeroing uses REP STOSQ.
341 (Zero [s] destptr mem)
342 && (s > 4*128 || (config.noDuffDevice && s > 16))
343 && s%4 == 0 ->
344 (REPSTOSL destptr (MOVLconst [s/4]) (MOVLconst [0]) mem)
345
346 // Lowering constants
347 (Const(8|16|32) [val]) -> (MOVLconst [val])
348 (Const(32|64)F [val]) -> (MOVS(S|D)const [val])
349 (ConstNil) -> (MOVLconst [0])
350 (ConstBool [b]) -> (MOVLconst [b])
351
352 // Lowering calls
353 (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
354 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
355 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
356
357 // Miscellaneous
358 (IsNonNil p) -> (SETNE (TESTL p p))
359 (IsInBounds idx len) -> (SETB (CMPL idx len))
360 (IsSliceInBounds idx len) -> (SETBE (CMPL idx len))
361 (NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
362 (GetG mem) -> (LoweredGetG mem)
363 (GetClosurePtr) -> (LoweredGetClosurePtr)
364 (GetCallerPC) -> (LoweredGetCallerPC)
365 (GetCallerSP) -> (LoweredGetCallerSP)
366 (Addr {sym} base) -> (LEAL {sym} base)
367 (LocalAddr {sym} base _) -> (LEAL {sym} base)
368
369 // block rewrites
370 (If (SETL cmp) yes no) -> (LT cmp yes no)
371 (If (SETLE cmp) yes no) -> (LE cmp yes no)
372 (If (SETG cmp) yes no) -> (GT cmp yes no)
373 (If (SETGE cmp) yes no) -> (GE cmp yes no)
374 (If (SETEQ cmp) yes no) -> (EQ cmp yes no)
375 (If (SETNE cmp) yes no) -> (NE cmp yes no)
376 (If (SETB cmp) yes no) -> (ULT cmp yes no)
377 (If (SETBE cmp) yes no) -> (ULE cmp yes no)
378 (If (SETA cmp) yes no) -> (UGT cmp yes no)
379 (If (SETAE cmp) yes no) -> (UGE cmp yes no)
380 (If (SETO cmp) yes no) -> (OS cmp yes no)
381
382 // Special case for floating point - LF/LEF not generated
383 (If (SETGF cmp) yes no) -> (UGT cmp yes no)
384 (If (SETGEF cmp) yes no) -> (UGE cmp yes no)
385 (If (SETEQF cmp) yes no) -> (EQF cmp yes no)
386 (If (SETNEF cmp) yes no) -> (NEF cmp yes no)
387
388 (If cond yes no) -> (NE (TESTB cond cond) yes no)
389
390 // Write barrier.
391 (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
392
393 (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
394 (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
395 (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
396
397 (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 0 -> (LoweredPanicExtendA [kind] hi lo y mem)
398 (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 1 -> (LoweredPanicExtendB [kind] hi lo y mem)
399 (PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 2 -> (LoweredPanicExtendC [kind] hi lo y mem)
400
401 // ***************************
402 // Above: lowering rules
403 // Below: optimizations
404 // ***************************
405 // TODO: Should the optimizations be a separate pass?
406
407 // Fold boolean tests into blocks
408 (NE (TESTB (SETL cmp) (SETL cmp)) yes no) -> (LT cmp yes no)
409 (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) -> (LE cmp yes no)
410 (NE (TESTB (SETG cmp) (SETG cmp)) yes no) -> (GT cmp yes no)
411 (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) -> (GE cmp yes no)
412 (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) -> (EQ cmp yes no)
413 (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) -> (NE cmp yes no)
414 (NE (TESTB (SETB cmp) (SETB cmp)) yes no) -> (ULT cmp yes no)
415 (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) -> (ULE cmp yes no)
416 (NE (TESTB (SETA cmp) (SETA cmp)) yes no) -> (UGT cmp yes no)
417 (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no)
418 (NE (TESTB (SETO cmp) (SETO cmp)) yes no) -> (OS cmp yes no)
419
420 // Special case for floating point - LF/LEF not generated
421 (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) -> (UGT cmp yes no)
422 (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) -> (UGE cmp yes no)
423 (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) -> (EQF cmp yes no)
424 (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF cmp yes no)
425
426 // fold constants into instructions
427 (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
428 (ADDLcarry x (MOVLconst [c])) -> (ADDLconstcarry [c] x)
429 (ADCL x (MOVLconst [c]) f) -> (ADCLconst [c] x f)
430 (ADCL (MOVLconst [c]) x f) -> (ADCLconst [c] x f)
431
432 (SUBL x (MOVLconst [c])) -> (SUBLconst x [c])
433 (SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c]))
434 (SUBLcarry x (MOVLconst [c])) -> (SUBLconstcarry [c] x)
435 (SBBL x (MOVLconst [c]) f) -> (SBBLconst [c] x f)
436
437 (MULL x (MOVLconst [c])) -> (MULLconst [c] x)
438
439 (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
440
441 (ANDLconst [c] (ANDLconst [d] x)) -> (ANDLconst [c & d] x)
442
443 (XORLconst [c] (XORLconst [d] x)) -> (XORLconst [c ^ d] x)
444
445 (MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
446
447 (ORL x (MOVLconst [c])) -> (ORLconst [c] x)
448
449 (XORL x (MOVLconst [c])) -> (XORLconst [c] x)
450
451 (SHLL x (MOVLconst [c])) -> (SHLLconst [c&31] x)
452 (SHRL x (MOVLconst [c])) -> (SHRLconst [c&31] x)
453 (SHRW x (MOVLconst [c])) && c&31 < 16 -> (SHRWconst [c&31] x)
454 (SHRW _ (MOVLconst [c])) && c&31 >= 16 -> (MOVLconst [0])
455 (SHRB x (MOVLconst [c])) && c&31 < 8 -> (SHRBconst [c&31] x)
456 (SHRB _ (MOVLconst [c])) && c&31 >= 8 -> (MOVLconst [0])
457
458 (SARL x (MOVLconst [c])) -> (SARLconst [c&31] x)
459 (SARW x (MOVLconst [c])) -> (SARWconst [min(c&31,15)] x)
460 (SARB x (MOVLconst [c])) -> (SARBconst [min(c&31,7)] x)
461
462 (SARL x (ANDLconst [31] y)) -> (SARL x y)
463
464 (SHLL x (ANDLconst [31] y)) -> (SHLL x y)
465
466 (SHRL x (ANDLconst [31] y)) -> (SHRL x y)
467
468 // Rotate instructions
469
470 (ADDL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
471 ( ORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
472 (XORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
473
474 (ADDL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
475 ( ORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
476 (XORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
477
478 (ADDL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
479 ( ORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
480 (XORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
481
482 (ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
483 (ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
484 (ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x)
485
486 // Constant shift simplifications
487
488 (SHLLconst x [0]) -> x
489 (SHRLconst x [0]) -> x
490 (SARLconst x [0]) -> x
491
492 (SHRWconst x [0]) -> x
493 (SARWconst x [0]) -> x
494
495 (SHRBconst x [0]) -> x
496 (SARBconst x [0]) -> x
497
498 (ROLLconst [0] x) -> x
499 (ROLWconst [0] x) -> x
500 (ROLBconst [0] x) -> x
501
502 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
503 // because the x86 instructions are defined to use all 5 bits of the shift even
504 // for the small shifts. I don't think we'll ever generate a weird shift (e.g.
505 // (SHRW x (MOVLconst [24])), but just in case.
506
507 (CMPL x (MOVLconst [c])) -> (CMPLconst x [c])
508 (CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c]))
509 (CMPW x (MOVLconst [c])) -> (CMPWconst x [int64(int16(c))])
510 (CMPW (MOVLconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int16(c))]))
511 (CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
512 (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
513
514 // strength reduction
515 // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
516 // 1 - addl, shll, leal, negl, subl
517 // 3 - imull
518 // This limits the rewrites to two instructions.
519 // Note that negl always operates in-place,
520 // which can require a register-register move
521 // to preserve the original value,
522 // so it must be used with care.
523 (MULLconst [-9] x) -> (NEGL (LEAL8 <v.Type> x x))
524 (MULLconst [-5] x) -> (NEGL (LEAL4 <v.Type> x x))
525 (MULLconst [-3] x) -> (NEGL (LEAL2 <v.Type> x x))
526 (MULLconst [-1] x) -> (NEGL x)
527 (MULLconst [0] _) -> (MOVLconst [0])
528 (MULLconst [1] x) -> x
529 (MULLconst [3] x) -> (LEAL2 x x)
530 (MULLconst [5] x) -> (LEAL4 x x)
531 (MULLconst [7] x) -> (LEAL2 x (LEAL2 <v.Type> x x))
532 (MULLconst [9] x) -> (LEAL8 x x)
533 (MULLconst [11] x) -> (LEAL2 x (LEAL4 <v.Type> x x))
534 (MULLconst [13] x) -> (LEAL4 x (LEAL2 <v.Type> x x))
535 (MULLconst [19] x) -> (LEAL2 x (LEAL8 <v.Type> x x))
536 (MULLconst [21] x) -> (LEAL4 x (LEAL4 <v.Type> x x))
537 (MULLconst [25] x) -> (LEAL8 x (LEAL2 <v.Type> x x))
538 (MULLconst [27] x) -> (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x))
539 (MULLconst [37] x) -> (LEAL4 x (LEAL8 <v.Type> x x))
540 (MULLconst [41] x) -> (LEAL8 x (LEAL4 <v.Type> x x))
541 (MULLconst [45] x) -> (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x))
542 (MULLconst [73] x) -> (LEAL8 x (LEAL8 <v.Type> x x))
543 (MULLconst [81] x) -> (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x))
544
545 (MULLconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x)
546 (MULLconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x)
547 (MULLconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
548 (MULLconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
549 (MULLconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
550 (MULLconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
551 (MULLconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
552 (MULLconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
553
554 // combine add/shift into LEAL
555 (ADDL x (SHLLconst [3] y)) -> (LEAL8 x y)
556 (ADDL x (SHLLconst [2] y)) -> (LEAL4 x y)
557 (ADDL x (SHLLconst [1] y)) -> (LEAL2 x y)
558 (ADDL x (ADDL y y)) -> (LEAL2 x y)
559 (ADDL x (ADDL x y)) -> (LEAL2 y x)
560
561 // combine ADDL/ADDLconst into LEAL1
562 (ADDLconst [c] (ADDL x y)) -> (LEAL1 [c] x y)
563 (ADDL (ADDLconst [c] x) y) -> (LEAL1 [c] x y)
564
565 // fold ADDL into LEAL
566 (ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
567 (LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
568 (LEAL [c] {s} (ADDL x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
569 (ADDL x (LEAL [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
570
571 // fold ADDLconst into LEALx
572 (ADDLconst [c] (LEAL1 [d] {s} x y)) && is32Bit(c+d) -> (LEAL1 [c+d] {s} x y)
573 (ADDLconst [c] (LEAL2 [d] {s} x y)) && is32Bit(c+d) -> (LEAL2 [c+d] {s} x y)
574 (ADDLconst [c] (LEAL4 [d] {s} x y)) && is32Bit(c+d) -> (LEAL4 [c+d] {s} x y)
575 (ADDLconst [c] (LEAL8 [d] {s} x y)) && is32Bit(c+d) -> (LEAL8 [c+d] {s} x y)
576 (LEAL1 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAL1 [c+d] {s} x y)
577 (LEAL2 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAL2 [c+d] {s} x y)
578 (LEAL2 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAL2 [c+2*d] {s} x y)
579 (LEAL4 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAL4 [c+d] {s} x y)
580 (LEAL4 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEAL4 [c+4*d] {s} x y)
581 (LEAL8 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAL8 [c+d] {s} x y)
582 (LEAL8 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEAL8 [c+8*d] {s} x y)
583
584 // fold shifts into LEALx
585 (LEAL1 [c] {s} x (SHLLconst [1] y)) -> (LEAL2 [c] {s} x y)
586 (LEAL1 [c] {s} x (SHLLconst [2] y)) -> (LEAL4 [c] {s} x y)
587 (LEAL1 [c] {s} x (SHLLconst [3] y)) -> (LEAL8 [c] {s} x y)
588 (LEAL2 [c] {s} x (SHLLconst [1] y)) -> (LEAL4 [c] {s} x y)
589 (LEAL2 [c] {s} x (SHLLconst [2] y)) -> (LEAL8 [c] {s} x y)
590 (LEAL4 [c] {s} x (SHLLconst [1] y)) -> (LEAL8 [c] {s} x y)
591
592 // reverse ordering of compare instruction
593 (SETL (InvertFlags x)) -> (SETG x)
594 (SETG (InvertFlags x)) -> (SETL x)
595 (SETB (InvertFlags x)) -> (SETA x)
596 (SETA (InvertFlags x)) -> (SETB x)
597 (SETLE (InvertFlags x)) -> (SETGE x)
598 (SETGE (InvertFlags x)) -> (SETLE x)
599 (SETBE (InvertFlags x)) -> (SETAE x)
600 (SETAE (InvertFlags x)) -> (SETBE x)
601 (SETEQ (InvertFlags x)) -> (SETEQ x)
602 (SETNE (InvertFlags x)) -> (SETNE x)
603
604 // sign extended loads
605 // Note: The combined instruction must end up in the same block
606 // as the original load. If not, we end up making a value with
607 // memory type live in two different blocks, which can lead to
608 // multiple memory values alive simultaneously.
609 // Make sure we don't combine these ops if the load has another use.
610 // This prevents a single load from being split into multiple loads
611 // which then might return different values. See test/atomicload.go.
612 (MOVBLSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBLSXload <v.Type> [off] {sym} ptr mem)
613 (MOVBLZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
614 (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
615 (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
616
617 (MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
618 (MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
619 (MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
620
621 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
622 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x)
623 (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x)
624 (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
625 (MOVBLSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLSX x)
626 (MOVWLSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLSX x)
627
628 // Fold extensions and ANDs together.
629 (MOVBLZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xff] x)
630 (MOVWLZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xffff] x)
631 (MOVBLSX (ANDLconst [c] x)) && c & 0x80 == 0 -> (ANDLconst [c & 0x7f] x)
632 (MOVWLSX (ANDLconst [c] x)) && c & 0x8000 == 0 -> (ANDLconst [c & 0x7fff] x)
633
634 // Don't extend before storing
635 (MOVWstore [off] {sym} ptr (MOVWL(S|Z)X x) mem) -> (MOVWstore [off] {sym} ptr x mem)
636 (MOVBstore [off] {sym} ptr (MOVBL(S|Z)X x) mem) -> (MOVBstore [off] {sym} ptr x mem)
637
638 // fold constants into memory operations
639 // Note that this is not always a good idea because if not all the uses of
640 // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
641 // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
642 // Nevertheless, let's do it!
643 (MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem)
644 (MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem)
645
646 ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
647 ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
648 ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) && is32Bit(off1+off2) ->
649 ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {sym} val base idx mem)
650 ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) && is32Bit(off1+off2*4) ->
651 ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2*4] {sym} val base idx mem)
652 ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
653 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
654 ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
655 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
656 ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) ->
657 ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
658 ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) && is32Bit(off1+off2) ->
659 ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {sym} base idx val mem)
660 ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) && is32Bit(off1+off2*4) ->
661 ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2*4] {sym} base idx val mem)
662 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
663 ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
664 ((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) && ValAndOff(valoff1).canAdd(off2) ->
665 ((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
666 ((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) && ValAndOff(valoff1).canAdd(off2*4) ->
667 ((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
668
669 // Fold constants into stores.
670 (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
671 (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
672 (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
673 (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
674 (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
675 (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
676
677 // Fold address offsets into constant stores.
678 (MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
679 (MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
680
681 // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows
682 // what variables are being read/written by the ops.
683 // Note: we turn off this merging for operations on globals when building
684 // position-independent code (when Flag_shared is set).
685 // PIC needs a spare register to load the PC into. Having the LEAL be
686 // a separate instruction gives us that register. Having the LEAL be
687 // a separate instruction also allows it to be CSEd (which is good because
688 // it compiles to a thunk call).
689 (MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
690 && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
691 (MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
692
693 (MOV(L|W|B|SS|SD)store [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
694 && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
695 (MOV(L|W|B|SS|SD)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
696
697 (MOV(L|W|B)storeconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
698 && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
699 (MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
700
701 // generating indexed loads and stores
702 (MOV(B|W|L|SS|SD)load [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
703 (MOV(B|W|L|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
704 (MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
705 (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
706 (MOV(L|SS)load [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
707 (MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
708 (MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
709 (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
710
711 (MOV(B|W|L|SS|SD)store [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
712 (MOV(B|W|L|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
713 (MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
714 (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
715 (MOV(L|SS)store [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
716 (MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
717 (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
718 (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
719
720 ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
721 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
722 ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
723 ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
724 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
725 ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
726 ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
727 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
728 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
729 ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
730 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
731 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
732 ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
733 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
734 ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
735 ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
736 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
737 ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
738 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem)
739 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
740 ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
741 ((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
742 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
743 ((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
744
745 (MOV(B|W|L|SS|SD)load [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)loadidx1 [off] {sym} ptr idx mem)
746 (MOV(B|W|L|SS|SD)store [off] {sym} (ADDL ptr idx) val mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
747
748 (MOV(B|W|L)storeconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
749 (MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
750 (MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
751 (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
752 (MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
753 (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
754
755 (MOV(B|W|L)storeconst [x] {sym} (ADDL ptr idx) mem) -> (MOV(B|W|L)storeconstidx1 [x] {sym} ptr idx mem)
756
757 // combine SHLL into indexed loads and stores
758 (MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem)
759 (MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLloadidx4 [c] {sym} ptr idx mem)
760 (MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem)
761 (MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) -> (MOVLstoreidx4 [c] {sym} ptr idx val mem)
762 (MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
763 (MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
764
765 // combine ADDL into indexed loads and stores
766 (MOV(B|W|L|SS|SD)loadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
767 (MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem)
768 (MOV(L|SS)loadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+d))] {sym} ptr idx mem)
769 (MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem)
770
771 (MOV(B|W|L|SS|SD)storeidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
772 (MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem)
773 (MOV(L|SS)storeidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+d))] {sym} ptr idx val mem)
774 (MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem)
775
776 (MOV(B|W|L|SS|SD)loadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem)
777 (MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem)
778 (MOV(L|SS)loadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem)
779 (MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem)
780
781 (MOV(B|W|L|SS|SD)storeidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem)
782 (MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem)
783 (MOV(L|SS)storeidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem)
784 (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
785
786 // Merge load/store to op
787 ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
788 ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) && canMergeLoadClobber(v, l, x) && clobber(l) ->
789 ((ADD|AND|OR|XOR|SUB|MUL)Lloadidx4 x [off] {sym} ptr idx mem)
790 ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
791 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
792 ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
793 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
794 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
795 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
796 (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
797 ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
798 (MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lloadidx4 x [off] {sym} ptr idx mem) mem) && y.Uses==1 && clobber(y) ->
799 ((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
800 (MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|SUB|AND|OR|XOR)L l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
801 ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
802 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem)
803 && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) && validValAndOff(c,off) ->
804 ((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem)
805 (MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
806 && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) && validValAndOff(c,off) ->
807 ((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
808 ((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(c,off) ->
809 ((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
810 (SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(-c,off) ->
811 (ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem)
812
813 (MOV(B|W|L)storeconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
814 (MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
815 (MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
816 (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
817 (MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
818 (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
819
820 (MOV(B|W|L)storeconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
821 (MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
822 (MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
823 (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
824 (MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) ->
825 (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
826
827 // fold LEALs together
828 (LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
829 (LEAL [off1+off2] {mergeSym(sym1,sym2)} x)
830
831 // LEAL into LEAL1
832 (LEAL1 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
833 (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
834
835 // LEAL1 into LEAL
836 (LEAL [off1] {sym1} (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
837 (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
838
839 // LEAL into LEAL[248]
840 (LEAL2 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
841 (LEAL2 [off1+off2] {mergeSym(sym1,sym2)} x y)
842 (LEAL4 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
843 (LEAL4 [off1+off2] {mergeSym(sym1,sym2)} x y)
844 (LEAL8 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
845 (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
846
847 // LEAL[248] into LEAL
848 (LEAL [off1] {sym1} (LEAL2 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
849 (LEAL2 [off1+off2] {mergeSym(sym1,sym2)} x y)
850 (LEAL [off1] {sym1} (LEAL4 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
851 (LEAL4 [off1+off2] {mergeSym(sym1,sym2)} x y)
852 (LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
853 (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
854
855 // Absorb InvertFlags into branches.
856 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
857 (GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
858 (LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
859 (GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
860 (ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no)
861 (UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no)
862 (ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no)
863 (UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no)
864 (EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
865 (NE (InvertFlags cmp) yes no) -> (NE cmp yes no)
866
867 // Constant comparisons.
868 (CMPLconst (MOVLconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
869 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT)
870 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT)
871 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT)
872 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT)
873 (CMPWconst (MOVLconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ)
874 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT)
875 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT)
876 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT)
877 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT)
878 (CMPBconst (MOVLconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ)
879 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT)
880 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT)
881 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT)
882 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT)
883
884 // Other known comparisons.
885 (CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) -> (FlagLT_ULT)
886 (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT)
887 (CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT)
888 (CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < int8(n) -> (FlagLT_ULT)
889 // TODO: DIVxU also.
890
891 // Absorb flag constants into SBB ops.
892 (SBBLcarrymask (FlagEQ)) -> (MOVLconst [0])
893 (SBBLcarrymask (FlagLT_ULT)) -> (MOVLconst [-1])
894 (SBBLcarrymask (FlagLT_UGT)) -> (MOVLconst [0])
895 (SBBLcarrymask (FlagGT_ULT)) -> (MOVLconst [-1])
896 (SBBLcarrymask (FlagGT_UGT)) -> (MOVLconst [0])
897
898 // Absorb flag constants into branches.
899 (EQ (FlagEQ) yes no) -> (First nil yes no)
900 (EQ (FlagLT_ULT) yes no) -> (First nil no yes)
901 (EQ (FlagLT_UGT) yes no) -> (First nil no yes)
902 (EQ (FlagGT_ULT) yes no) -> (First nil no yes)
903 (EQ (FlagGT_UGT) yes no) -> (First nil no yes)
904
905 (NE (FlagEQ) yes no) -> (First nil no yes)
906 (NE (FlagLT_ULT) yes no) -> (First nil yes no)
907 (NE (FlagLT_UGT) yes no) -> (First nil yes no)
908 (NE (FlagGT_ULT) yes no) -> (First nil yes no)
909 (NE (FlagGT_UGT) yes no) -> (First nil yes no)
910
911 (LT (FlagEQ) yes no) -> (First nil no yes)
912 (LT (FlagLT_ULT) yes no) -> (First nil yes no)
913 (LT (FlagLT_UGT) yes no) -> (First nil yes no)
914 (LT (FlagGT_ULT) yes no) -> (First nil no yes)
915 (LT (FlagGT_UGT) yes no) -> (First nil no yes)
916
917 (LE (FlagEQ) yes no) -> (First nil yes no)
918 (LE (FlagLT_ULT) yes no) -> (First nil yes no)
919 (LE (FlagLT_UGT) yes no) -> (First nil yes no)
920 (LE (FlagGT_ULT) yes no) -> (First nil no yes)
921 (LE (FlagGT_UGT) yes no) -> (First nil no yes)
922
923 (GT (FlagEQ) yes no) -> (First nil no yes)
924 (GT (FlagLT_ULT) yes no) -> (First nil no yes)
925 (GT (FlagLT_UGT) yes no) -> (First nil no yes)
926 (GT (FlagGT_ULT) yes no) -> (First nil yes no)
927 (GT (FlagGT_UGT) yes no) -> (First nil yes no)
928
929 (GE (FlagEQ) yes no) -> (First nil yes no)
930 (GE (FlagLT_ULT) yes no) -> (First nil no yes)
931 (GE (FlagLT_UGT) yes no) -> (First nil no yes)
932 (GE (FlagGT_ULT) yes no) -> (First nil yes no)
933 (GE (FlagGT_UGT) yes no) -> (First nil yes no)
934
935 (ULT (FlagEQ) yes no) -> (First nil no yes)
936 (ULT (FlagLT_ULT) yes no) -> (First nil yes no)
937 (ULT (FlagLT_UGT) yes no) -> (First nil no yes)
938 (ULT (FlagGT_ULT) yes no) -> (First nil yes no)
939 (ULT (FlagGT_UGT) yes no) -> (First nil no yes)
940
941 (ULE (FlagEQ) yes no) -> (First nil yes no)
942 (ULE (FlagLT_ULT) yes no) -> (First nil yes no)
943 (ULE (FlagLT_UGT) yes no) -> (First nil no yes)
944 (ULE (FlagGT_ULT) yes no) -> (First nil yes no)
945 (ULE (FlagGT_UGT) yes no) -> (First nil no yes)
946
947 (UGT (FlagEQ) yes no) -> (First nil no yes)
948 (UGT (FlagLT_ULT) yes no) -> (First nil no yes)
949 (UGT (FlagLT_UGT) yes no) -> (First nil yes no)
950 (UGT (FlagGT_ULT) yes no) -> (First nil no yes)
951 (UGT (FlagGT_UGT) yes no) -> (First nil yes no)
952
953 (UGE (FlagEQ) yes no) -> (First nil yes no)
954 (UGE (FlagLT_ULT) yes no) -> (First nil no yes)
955 (UGE (FlagLT_UGT) yes no) -> (First nil yes no)
956 (UGE (FlagGT_ULT) yes no) -> (First nil no yes)
957 (UGE (FlagGT_UGT) yes no) -> (First nil yes no)
958
959 // Absorb flag constants into SETxx ops.
960 (SETEQ (FlagEQ)) -> (MOVLconst [1])
961 (SETEQ (FlagLT_ULT)) -> (MOVLconst [0])
962 (SETEQ (FlagLT_UGT)) -> (MOVLconst [0])
963 (SETEQ (FlagGT_ULT)) -> (MOVLconst [0])
964 (SETEQ (FlagGT_UGT)) -> (MOVLconst [0])
965
966 (SETNE (FlagEQ)) -> (MOVLconst [0])
967 (SETNE (FlagLT_ULT)) -> (MOVLconst [1])
968 (SETNE (FlagLT_UGT)) -> (MOVLconst [1])
969 (SETNE (FlagGT_ULT)) -> (MOVLconst [1])
970 (SETNE (FlagGT_UGT)) -> (MOVLconst [1])
971
972 (SETL (FlagEQ)) -> (MOVLconst [0])
973 (SETL (FlagLT_ULT)) -> (MOVLconst [1])
974 (SETL (FlagLT_UGT)) -> (MOVLconst [1])
975 (SETL (FlagGT_ULT)) -> (MOVLconst [0])
976 (SETL (FlagGT_UGT)) -> (MOVLconst [0])
977
978 (SETLE (FlagEQ)) -> (MOVLconst [1])
979 (SETLE (FlagLT_ULT)) -> (MOVLconst [1])
980 (SETLE (FlagLT_UGT)) -> (MOVLconst [1])
981 (SETLE (FlagGT_ULT)) -> (MOVLconst [0])
982 (SETLE (FlagGT_UGT)) -> (MOVLconst [0])
983
984 (SETG (FlagEQ)) -> (MOVLconst [0])
985 (SETG (FlagLT_ULT)) -> (MOVLconst [0])
986 (SETG (FlagLT_UGT)) -> (MOVLconst [0])
987 (SETG (FlagGT_ULT)) -> (MOVLconst [1])
988 (SETG (FlagGT_UGT)) -> (MOVLconst [1])
989
990 (SETGE (FlagEQ)) -> (MOVLconst [1])
991 (SETGE (FlagLT_ULT)) -> (MOVLconst [0])
992 (SETGE (FlagLT_UGT)) -> (MOVLconst [0])
993 (SETGE (FlagGT_ULT)) -> (MOVLconst [1])
994 (SETGE (FlagGT_UGT)) -> (MOVLconst [1])
995
996 (SETB (FlagEQ)) -> (MOVLconst [0])
997 (SETB (FlagLT_ULT)) -> (MOVLconst [1])
998 (SETB (FlagLT_UGT)) -> (MOVLconst [0])
999 (SETB (FlagGT_ULT)) -> (MOVLconst [1])
1000 (SETB (FlagGT_UGT)) -> (MOVLconst [0])
1001
1002 (SETBE (FlagEQ)) -> (MOVLconst [1])
1003 (SETBE (FlagLT_ULT)) -> (MOVLconst [1])
1004 (SETBE (FlagLT_UGT)) -> (MOVLconst [0])
1005 (SETBE (FlagGT_ULT)) -> (MOVLconst [1])
1006 (SETBE (FlagGT_UGT)) -> (MOVLconst [0])
1007
1008 (SETA (FlagEQ)) -> (MOVLconst [0])
1009 (SETA (FlagLT_ULT)) -> (MOVLconst [0])
1010 (SETA (FlagLT_UGT)) -> (MOVLconst [1])
1011 (SETA (FlagGT_ULT)) -> (MOVLconst [0])
1012 (SETA (FlagGT_UGT)) -> (MOVLconst [1])
1013
1014 (SETAE (FlagEQ)) -> (MOVLconst [1])
1015 (SETAE (FlagLT_ULT)) -> (MOVLconst [0])
1016 (SETAE (FlagLT_UGT)) -> (MOVLconst [1])
1017 (SETAE (FlagGT_ULT)) -> (MOVLconst [0])
1018 (SETAE (FlagGT_UGT)) -> (MOVLconst [1])
1019
1020 // Remove redundant *const ops
1021 (ADDLconst [c] x) && int32(c)==0 -> x
1022 (SUBLconst [c] x) && int32(c) == 0 -> x
1023 (ANDLconst [c] _) && int32(c)==0 -> (MOVLconst [0])
1024 (ANDLconst [c] x) && int32(c)==-1 -> x
1025 (ORLconst [c] x) && int32(c)==0 -> x
1026 (ORLconst [c] _) && int32(c)==-1 -> (MOVLconst [-1])
1027 (XORLconst [c] x) && int32(c)==0 -> x
1028 // TODO: since we got rid of the W/B versions, we might miss
1029 // things like (ANDLconst [0x100] x) which were formerly
1030 // (ANDBconst [0] x). Probably doesn't happen very often.
1031 // If we cared, we might do:
1032 // (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0])
1033
1034 // Convert constant subtracts to constant adds
1035 (SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x)
1036
1037 // generic constant folding
1038 // TODO: more of this
1039 (ADDLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c+d))])
1040 (ADDLconst [c] (ADDLconst [d] x)) -> (ADDLconst [int64(int32(c+d))] x)
1041 (SARLconst [c] (MOVLconst [d])) -> (MOVLconst [d>>uint64(c)])
1042 (SARWconst [c] (MOVLconst [d])) -> (MOVLconst [d>>uint64(c)])
1043 (SARBconst [c] (MOVLconst [d])) -> (MOVLconst [d>>uint64(c)])
1044 (NEGL (MOVLconst [c])) -> (MOVLconst [int64(int32(-c))])
1045 (MULLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c*d))])
1046 (ANDLconst [c] (MOVLconst [d])) -> (MOVLconst [c&d])
1047 (ORLconst [c] (MOVLconst [d])) -> (MOVLconst [c|d])
1048 (XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d])
1049 (NOTL (MOVLconst [c])) -> (MOVLconst [^c])
1050
1051 // generic simplifications
1052 // TODO: more of this
1053 (ADDL x (NEGL y)) -> (SUBL x y)
1054 (SUBL x x) -> (MOVLconst [0])
1055 (ANDL x x) -> x
1056 (ORL x x) -> x
1057 (XORL x x) -> (MOVLconst [0])
1058
1059 // checking AND against 0.
1060 (CMP(L|W|B)const l:(ANDL x y) [0]) && l.Uses==1 -> (TEST(L|W|B) x y)
1061 (CMPLconst l:(ANDLconst [c] x) [0]) && l.Uses==1 -> (TESTLconst [c] x)
1062 (CMPWconst l:(ANDLconst [c] x) [0]) && l.Uses==1 -> (TESTWconst [int64(int16(c))] x)
1063 (CMPBconst l:(ANDLconst [c] x) [0]) && l.Uses==1 -> (TESTBconst [int64(int8(c))] x)
1064
1065 // TEST %reg,%reg is shorter than CMP
1066 (CMP(L|W|B)const x [0]) -> (TEST(L|W|B) x x)
1067
1068 // Combining byte loads into larger (unaligned) loads.
1069 // There are many ways these combinations could occur. This is
1070 // designed to match the way encoding/binary.LittleEndian does it.
1071 (ORL x0:(MOVBload [i0] {s} p mem)
1072 s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
1073 && i1 == i0+1
1074 && x0.Uses == 1
1075 && x1.Uses == 1
1076 && s0.Uses == 1
1077 && mergePoint(b,x0,x1) != nil
1078 && clobber(x0)
1079 && clobber(x1)
1080 && clobber(s0)
1081 -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
1082
1083 (ORL o0:(ORL
1084 x0:(MOVWload [i0] {s} p mem)
1085 s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
1086 s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
1087 && i2 == i0+2
1088 && i3 == i0+3
1089 && x0.Uses == 1
1090 && x1.Uses == 1
1091 && x2.Uses == 1
1092 && s0.Uses == 1
1093 && s1.Uses == 1
1094 && o0.Uses == 1
1095 && mergePoint(b,x0,x1,x2) != nil
1096 && clobber(x0)
1097 && clobber(x1)
1098 && clobber(x2)
1099 && clobber(s0)
1100 && clobber(s1)
1101 && clobber(o0)
1102 -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
1103
1104 (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem)
1105 s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
1106 && i1==i0+1
1107 && x0.Uses == 1
1108 && x1.Uses == 1
1109 && s0.Uses == 1
1110 && mergePoint(b,x0,x1) != nil
1111 && clobber(x0)
1112 && clobber(x1)
1113 && clobber(s0)
1114 -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
1115
1116 (ORL o0:(ORL
1117 x0:(MOVWloadidx1 [i0] {s} p idx mem)
1118 s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)))
1119 s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
1120 && i2 == i0+2
1121 && i3 == i0+3
1122 && x0.Uses == 1
1123 && x1.Uses == 1
1124 && x2.Uses == 1
1125 && s0.Uses == 1
1126 && s1.Uses == 1
1127 && o0.Uses == 1
1128 && mergePoint(b,x0,x1,x2) != nil
1129 && clobber(x0)
1130 && clobber(x1)
1131 && clobber(x2)
1132 && clobber(s0)
1133 && clobber(s1)
1134 && clobber(o0)
1135 -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
1136
1137 // Combine constant stores into larger (unaligned) stores.
1138 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
1139 && x.Uses == 1
1140 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
1141 && clobber(x)
1142 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
1143 (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
1144 && x.Uses == 1
1145 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
1146 && clobber(x)
1147 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
1148 (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
1149 && x.Uses == 1
1150 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
1151 && clobber(x)
1152 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
1153 (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
1154 && x.Uses == 1
1155 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
1156 && clobber(x)
1157 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
1158
1159 (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
1160 && x.Uses == 1
1161 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
1162 && clobber(x)
1163 -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
1164 (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
1165 && x.Uses == 1
1166 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
1167 && clobber(x)
1168 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
1169
1170 (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
1171 && x.Uses == 1
1172 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
1173 && clobber(x)
1174 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
1175
1176 // Combine stores into larger (unaligned) stores.
1177 (MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
1178 && x.Uses == 1
1179 && clobber(x)
1180 -> (MOVWstore [i-1] {s} p w mem)
1181 (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem))
1182 && x.Uses == 1
1183 && clobber(x)
1184 -> (MOVWstore [i] {s} p w mem)
1185 (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
1186 && x.Uses == 1
1187 && clobber(x)
1188 -> (MOVWstore [i-1] {s} p w0 mem)
1189 (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
1190 && x.Uses == 1
1191 && clobber(x)
1192 -> (MOVLstore [i-2] {s} p w mem)
1193 (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
1194 && x.Uses == 1
1195 && clobber(x)
1196 -> (MOVLstore [i-2] {s} p w0 mem)
1197
1198 (MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
1199 && x.Uses == 1
1200 && clobber(x)
1201 -> (MOVWstoreidx1 [i-1] {s} p idx w mem)
1202 (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem))
1203 && x.Uses == 1
1204 && clobber(x)
1205 -> (MOVWstoreidx1 [i] {s} p idx w mem)
1206 (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
1207 && x.Uses == 1
1208 && clobber(x)
1209 -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
1210 (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
1211 && x.Uses == 1
1212 && clobber(x)
1213 -> (MOVLstoreidx1 [i-2] {s} p idx w mem)
1214 (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
1215 && x.Uses == 1
1216 && clobber(x)
1217 -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
1218
1219 (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
1220 && x.Uses == 1
1221 && clobber(x)
1222 -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w mem)
1223 (MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
1224 && x.Uses == 1
1225 && clobber(x)
1226 -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst <idx.Type> [1] idx) w0 mem)
1227
1228 // For PIC, break floating-point constant loading into two instructions so we have
1229 // a register to use for holding the address of the constant pool entry.
1230 (MOVSSconst [c]) && config.ctxt.Flag_shared -> (MOVSSconst2 (MOVSSconst1 [c]))
1231 (MOVSDconst [c]) && config.ctxt.Flag_shared -> (MOVSDconst2 (MOVSDconst1 [c]))
1232
1233 (CMP(L|W|B) l:(MOV(L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) -> (CMP(L|W|B)load {sym} [off] ptr x mem)
1234 (CMP(L|W|B) x l:(MOV(L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) -> (InvertFlags (CMP(L|W|B)load {sym} [off] ptr x mem))
1235
1236 (CMP(L|W|B)const l:(MOV(L|W|B)load {sym} [off] ptr mem) [c])
1237 && l.Uses == 1
1238 && validValAndOff(c, off)
1239 && clobber(l) ->
1240 @l.Block (CMP(L|W|B)constload {sym} [makeValAndOff(c,off)] ptr mem)
1241
1242 (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int32(c)),off) -> (CMPLconstload {sym} [makeValAndOff(int64(int32(c)),off)] ptr mem)
1243 (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)),off) -> (CMPWconstload {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
1244 (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),off) -> (CMPBconstload {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
1245
1246 (MOVBload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read8(sym, off))])
1247 (MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.BigEndian))])
1248 (MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(int32(read32(sym, off, config.BigEndian)))])
View as plain text