...

Text file src/pkg/cmd/compile/internal/ssa/gen/S390X.rules

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Lowering arithmetic
     6	(Add(64|Ptr)  x y) -> (ADD  x y)
     7	(Add(32|16|8)  x y) -> (ADDW  x y)
     8	(Add32F x y) -> (FADDS x y)
     9	(Add64F x y) -> (FADD x y)
    10	
    11	(Sub(64|Ptr)  x y) -> (SUB  x y)
    12	(Sub(32|16|8)  x y) -> (SUBW  x y)
    13	(Sub32F x y) -> (FSUBS x y)
    14	(Sub64F x y) -> (FSUB x y)
    15	
    16	(Mul64  x y) -> (MULLD  x y)
    17	(Mul(32|16|8)  x y) -> (MULLW  x y)
    18	(Mul32F x y) -> (FMULS x y)
    19	(Mul64F x y) -> (FMUL x y)
    20	
    21	(Div32F x y) -> (FDIVS x y)
    22	(Div64F x y) -> (FDIV x y)
    23	
    24	(Div64  x y) -> (DIVD  x y)
    25	(Div64u x y) -> (DIVDU x y)
    26	// DIVW/DIVWU has a 64-bit dividend and a 32-bit divisor,
    27	// so a sign/zero extension of the dividend is required.
    28	(Div32  x y) -> (DIVW  (MOVWreg x) y)
    29	(Div32u x y) -> (DIVWU (MOVWZreg x) y)
    30	(Div16  x y) -> (DIVW  (MOVHreg x) (MOVHreg y))
    31	(Div16u x y) -> (DIVWU (MOVHZreg x) (MOVHZreg y))
    32	(Div8   x y) -> (DIVW  (MOVBreg x) (MOVBreg y))
    33	(Div8u  x y) -> (DIVWU (MOVBZreg x) (MOVBZreg y))
    34	
    35	(Hmul(64|64u)  x y) -> (MULH(D|DU)  x y)
    36	(Hmul32  x y) -> (SRDconst [32] (MULLD (MOVWreg x) (MOVWreg y)))
    37	(Hmul32u x y) -> (SRDconst [32] (MULLD (MOVWZreg x) (MOVWZreg y)))
    38	
    39	(Mod(64|64u)  x y) -> (MOD(D|DU)  x y)
    40	// MODW/MODWU has a 64-bit dividend and a 32-bit divisor,
    41	// so a sign/zero extension of the dividend is required.
    42	(Mod32  x y) -> (MODW  (MOVWreg x) y)
    43	(Mod32u x y) -> (MODWU (MOVWZreg x) y)
    44	(Mod16  x y) -> (MODW  (MOVHreg x) (MOVHreg y))
    45	(Mod16u x y) -> (MODWU (MOVHZreg x) (MOVHZreg y))
    46	(Mod8   x y) -> (MODW  (MOVBreg x) (MOVBreg y))
    47	(Mod8u  x y) -> (MODWU (MOVBZreg x) (MOVBZreg y))
    48	
    49	// (x + y) / 2 with x>=y -> (x - y) / 2 + y
    50	(Avg64u <t> x y) -> (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
    51	
    52	(And64 x y) -> (AND x y)
    53	(And(32|16|8) x y) -> (ANDW x y)
    54	
    55	(Or64 x y) -> (OR x y)
    56	(Or(32|16|8) x y) -> (ORW x y)
    57	
    58	(Xor64 x y) -> (XOR x y)
    59	(Xor(32|16|8) x y) -> (XORW x y)
    60	
    61	(Neg64 x) -> (NEG x)
    62	(Neg(32|16|8) x) -> (NEGW x)
    63	(Neg32F x) -> (FNEGS x)
    64	(Neg64F x) -> (FNEG x)
    65	
    66	(Com64 x) -> (NOT x)
    67	(Com(32|16|8) x) -> (NOTW x)
    68	(NOT x) && true -> (XOR (MOVDconst [-1]) x)
    69	(NOTW x) && true -> (XORWconst [-1] x)
    70	
    71	// Lowering boolean ops
    72	(AndB x y) -> (ANDW x y)
    73	(OrB x y) -> (ORW x y)
    74	(Not x) -> (XORWconst [1] x)
    75	
    76	// Lowering pointer arithmetic
    77	(OffPtr [off] ptr:(SP)) -> (MOVDaddr [off] ptr)
    78	(OffPtr [off] ptr) && is32Bit(off) -> (ADDconst [off] ptr)
    79	(OffPtr [off] ptr) -> (ADD (MOVDconst [off]) ptr)
    80	
    81	// TODO: optimize these cases?
    82	(Ctz64NonZero x) -> (Ctz64 x)
    83	(Ctz32NonZero x) -> (Ctz32 x)
    84	
    85	// Ctz(x) = 64 - findLeftmostOne((x-1)&^x)
    86	(Ctz64 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (AND <t> (SUBconst <t> [1] x) (NOT <t> x))))
    87	(Ctz32 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW <t> (SUBWconst <t> [1] x) (NOTW <t> x)))))
    88	
    89	(BitLen64 x) -> (SUB (MOVDconst [64]) (FLOGR x))
    90	
    91	// POPCNT treats the input register as a vector of 8 bytes, producing
    92	// a population count for each individual byte. For inputs larger than
    93	// a single byte we therefore need to sum the individual bytes produced
    94	// by the POPCNT instruction. For example, the following instruction
    95	// sequence could be used to calculate the population count of a 4-byte
    96	// value:
    97	//
    98	//     MOVD   $0x12345678, R1 // R1=0x12345678 <-- input
    99	//     POPCNT R1, R2          // R2=0x02030404
   100	//     SRW    $16, R2, R3     // R3=0x00000203
   101	//     ADDW   R2, R3, R4      // R4=0x02030607
   102	//     SRW    $8, R4, R5      // R5=0x00020306
   103	//     ADDW   R4, R5, R6      // R6=0x0205090d
   104	//     MOVBZ  R6, R7          // R7=0x0000000d <-- result is 13
   105	//
   106	(PopCount8  x) -> (POPCNT (MOVBZreg x))
   107	(PopCount16 x) -> (MOVBZreg (SumBytes2 (POPCNT <typ.UInt16> x)))
   108	(PopCount32 x) -> (MOVBZreg (SumBytes4 (POPCNT <typ.UInt32> x)))
   109	(PopCount64 x) -> (MOVBZreg (SumBytes8 (POPCNT <typ.UInt64> x)))
   110	
   111	// SumBytes{2,4,8} pseudo operations sum the values of the rightmost
   112	// 2, 4 or 8 bytes respectively. The result is a single byte however
   113	// other bytes might contain junk so a zero extension is required if
   114	// the desired output type is larger than 1 byte.
   115	(SumBytes2 x) -> (ADDW (SRWconst <typ.UInt8> x [8]) x)
   116	(SumBytes4 x) -> (SumBytes2 (ADDW <typ.UInt16> (SRWconst <typ.UInt16> x [16]) x))
   117	(SumBytes8 x) -> (SumBytes4 (ADDW <typ.UInt32> (SRDconst <typ.UInt32> x [32]) x))
   118	
   119	(Bswap64 x) -> (MOVDBR x)
   120	(Bswap32 x) -> (MOVWBR x)
   121	
   122	// add with carry
   123	(Select0 (Add64carry x y c))
   124	  -> (Select0 <typ.UInt64> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1]))))
   125	(Select1 (Add64carry x y c))
   126	  -> (Select0 <typ.UInt64> (ADDE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1]))))))
   127	
   128	// subtract with borrow
   129	(Select0 (Sub64borrow x y c))
   130	  -> (Select0 <typ.UInt64> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c))))
   131	(Select1 (Sub64borrow x y c))
   132	  -> (NEG (Select0 <typ.UInt64> (SUBE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c)))))))
   133	
   134	// math package intrinsics
   135	(Sqrt        x) -> (FSQRT x)
   136	(Floor       x) -> (FIDBR [7] x)
   137	(Ceil        x) -> (FIDBR [6] x)
   138	(Trunc       x) -> (FIDBR [5] x)
   139	(RoundToEven x) -> (FIDBR [4] x)
   140	(Round       x) -> (FIDBR [1] x)
   141	
   142	// Atomic loads and stores.
   143	// The SYNC instruction (fast-BCR-serialization) prevents store-load
   144	// reordering. Other sequences of memory operations (load-load,
   145	// store-store and load-store) are already guaranteed not to be reordered.
   146	(AtomicLoad(8|32|Acq32|64|Ptr) ptr mem) -> (MOV(BZ|WZ|WZ|D|D)atomicload ptr mem)
   147	(AtomicStore(32|64|PtrNoWB) ptr val mem) -> (SYNC (MOV(W|D|D)atomicstore ptr val mem))
   148	
   149	// Store-release doesn't require store-load ordering.
   150	(AtomicStoreRel32 ptr val mem) -> (MOVWatomicstore ptr val mem)
   151	
   152	// Atomic adds.
   153	(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (LAA ptr val mem))
   154	(AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (LAAG ptr val mem))
   155	(Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDW val (Select0 <t> tuple))
   156	(Select1     (AddTupleFirst32   _ tuple)) -> (Select1 tuple)
   157	(Select0 <t> (AddTupleFirst64 val tuple)) -> (ADD val (Select0 <t> tuple))
   158	(Select1     (AddTupleFirst64   _ tuple)) -> (Select1 tuple)
   159	
   160	// Atomic exchanges.
   161	(AtomicExchange32 ptr val mem) -> (LoweredAtomicExchange32 ptr val mem)
   162	(AtomicExchange64 ptr val mem) -> (LoweredAtomicExchange64 ptr val mem)
   163	
   164	// Atomic compare and swap.
   165	(AtomicCompareAndSwap32 ptr old new_ mem) -> (LoweredAtomicCas32 ptr old new_ mem)
   166	(AtomicCompareAndSwap64 ptr old new_ mem) -> (LoweredAtomicCas64 ptr old new_ mem)
   167	
   168	// Lowering extension
   169	// Note: we always extend to 64 bits even though some ops don't need that many result bits.
   170	(SignExt8to(16|32|64)  x) -> (MOVBreg x)
   171	(SignExt16to(32|64) x) -> (MOVHreg x)
   172	(SignExt32to64 x) -> (MOVWreg x)
   173	
   174	(ZeroExt8to(16|32|64)  x) -> (MOVBZreg x)
   175	(ZeroExt16to(32|64) x) -> (MOVHZreg x)
   176	(ZeroExt32to64 x) -> (MOVWZreg x)
   177	
   178	(Slicemask <t> x) -> (SRADconst (NEG <t> x) [63])
   179	
   180	// Lowering truncation
   181	// Because we ignore high parts of registers, truncates are just copies.
   182	(Trunc(16|32|64)to8  x) -> x
   183	(Trunc(32|64)to16 x) -> x
   184	(Trunc64to32 x) -> x
   185	
   186	// Lowering float <-> int
   187	(Cvt32to32F x) -> (CEFBRA x)
   188	(Cvt32to64F x) -> (CDFBRA x)
   189	(Cvt64to32F x) -> (CEGBRA x)
   190	(Cvt64to64F x) -> (CDGBRA x)
   191	
   192	(Cvt32Fto32 x) -> (CFEBRA x)
   193	(Cvt32Fto64 x) -> (CGEBRA x)
   194	(Cvt64Fto32 x) -> (CFDBRA x)
   195	(Cvt64Fto64 x) -> (CGDBRA x)
   196	
   197	(Cvt32Fto64F x) -> (LDEBR x)
   198	(Cvt64Fto32F x) -> (LEDBR x)
   199	
   200	(Round(32|64)F x) -> (LoweredRound(32|64)F x)
   201	
   202	// Lowering shifts
   203	
   204	// Lower bounded shifts first. No need to check shift value.
   205	(Lsh64x(64|32|16|8)  x y) && shiftIsBounded(v) -> (SLD x y)
   206	(Lsh32x(64|32|16|8)  x y) && shiftIsBounded(v) -> (SLW x y)
   207	(Lsh16x(64|32|16|8)  x y) && shiftIsBounded(v) -> (SLW x y)
   208	(Lsh8x(64|32|16|8)   x y) && shiftIsBounded(v) -> (SLW x y)
   209	(Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRD x y)
   210	(Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRW x y)
   211	(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRW (MOVHZreg x) y)
   212	(Rsh8Ux(64|32|16|8)  x y) && shiftIsBounded(v) -> (SRW (MOVBZreg x) y)
   213	(Rsh64x(64|32|16|8)  x y) && shiftIsBounded(v) -> (SRAD x y)
   214	(Rsh32x(64|32|16|8)  x y) && shiftIsBounded(v) -> (SRAW x y)
   215	(Rsh16x(64|32|16|8)  x y) && shiftIsBounded(v) -> (SRAW (MOVHreg x) y)
   216	(Rsh8x(64|32|16|8)   x y) && shiftIsBounded(v) -> (SRAW (MOVBreg x) y)
   217	
   218	// Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
   219	//   result = shift >= 64 ? 0 : arg << shift
   220	(Lsh(64|32|16|8)x64 <t> x y) -> (MOVDGE <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPUconst y [64]))
   221	(Lsh(64|32|16|8)x32 <t> x y) -> (MOVDGE <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst y [64]))
   222	(Lsh(64|32|16|8)x16 <t> x y) -> (MOVDGE <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64]))
   223	(Lsh(64|32|16|8)x8  <t> x y) -> (MOVDGE <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64]))
   224	
   225	(Rsh(64|32)Ux64 <t> x y) -> (MOVDGE <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPUconst y [64]))
   226	(Rsh(64|32)Ux32 <t> x y) -> (MOVDGE <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst y [64]))
   227	(Rsh(64|32)Ux16 <t> x y) -> (MOVDGE <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64]))
   228	(Rsh(64|32)Ux8  <t> x y) -> (MOVDGE <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64]))
   229	
   230	(Rsh(16|8)Ux64 <t> x y) -> (MOVDGE <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPUconst y [64]))
   231	(Rsh(16|8)Ux32 <t> x y) -> (MOVDGE <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst y [64]))
   232	(Rsh(16|8)Ux16 <t> x y) -> (MOVDGE <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64]))
   233	(Rsh(16|8)Ux8  <t> x y) -> (MOVDGE <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64]))
   234	
   235	// Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value.
   236	// We implement this by setting the shift value to 63 (all ones) if the shift value is more than 63.
   237	//   result = arg >> (shift >= 64 ? 63 : shift)
   238	(Rsh(64|32)x64 x y) -> (SRA(D|W) x (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPUconst  y [64])))
   239	(Rsh(64|32)x32 x y) -> (SRA(D|W) x (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst y [64])))
   240	(Rsh(64|32)x16 x y) -> (SRA(D|W) x (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVHZreg y) [64])))
   241	(Rsh(64|32)x8  x y) -> (SRA(D|W) x (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVBZreg y) [64])))
   242	
   243	(Rsh(16|8)x64 x y) -> (SRAW (MOV(H|B)reg x) (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPUconst  y [64])))
   244	(Rsh(16|8)x32 x y) -> (SRAW (MOV(H|B)reg x) (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst y [64])))
   245	(Rsh(16|8)x16 x y) -> (SRAW (MOV(H|B)reg x) (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVHZreg y) [64])))
   246	(Rsh(16|8)x8  x y) -> (SRAW (MOV(H|B)reg x) (MOVDGE <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVBZreg y) [64])))
   247	
   248	// Lowering rotates
   249	(RotateLeft8 <t> x (MOVDconst [c])) -> (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
   250	(RotateLeft16 <t> x (MOVDconst [c])) -> (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
   251	(RotateLeft32 x y) -> (RLL  x y)
   252	(RotateLeft64 x y) -> (RLLG x y)
   253	
   254	// Lowering comparisons
   255	(Less64      x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   256	(Less32      x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   257	(Less(16|8)  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
   258	(Less64U     x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
   259	(Less32U     x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
   260	(Less(16|8)U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
   261	// Use SETG with reversed operands to dodge NaN case.
   262	(Less64F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP y x))
   263	(Less32F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS y x))
   264	
   265	(Leq64      x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   266	(Leq32      x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   267	(Leq(16|8)  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
   268	(Leq64U     x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
   269	(Leq32U     x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
   270	(Leq(16|8)U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
   271	// Use SETGE with reversed operands to dodge NaN case.
   272	(Leq64F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP y x))
   273	(Leq32F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS y x))
   274	
   275	(Greater64      x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   276	(Greater32      x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   277	(Greater(16|8)  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
   278	(Greater64U     x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
   279	(Greater32U     x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
   280	(Greater(16|8)U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
   281	(Greater64F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
   282	(Greater32F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
   283	
   284	(Geq64      x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   285	(Geq32      x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   286	(Geq(16|8)  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
   287	(Geq64U     x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
   288	(Geq32U     x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
   289	(Geq(16|8)U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
   290	(Geq64F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
   291	(Geq32F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
   292	
   293	(Eq(64|Ptr) x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   294	(Eq32       x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   295	(Eq(16|8|B) x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y)))
   296	(Eq64F      x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
   297	(Eq32F      x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
   298	
   299	(Neq(64|Ptr) x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   300	(Neq32       x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   301	(Neq(16|8|B) x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y)))
   302	(Neq64F      x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
   303	(Neq32F      x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
   304	
   305	// Lowering loads
   306	(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
   307	(Load <t> ptr mem) && is32BitInt(t) && isSigned(t) -> (MOVWload ptr mem)
   308	(Load <t> ptr mem) && is32BitInt(t) && !isSigned(t) -> (MOVWZload ptr mem)
   309	(Load <t> ptr mem) && is16BitInt(t) && isSigned(t) -> (MOVHload ptr mem)
   310	(Load <t> ptr mem) && is16BitInt(t) && !isSigned(t) -> (MOVHZload ptr mem)
   311	(Load <t> ptr mem) && is8BitInt(t) && isSigned(t) -> (MOVBload ptr mem)
   312	(Load <t> ptr mem) && (t.IsBoolean() || (is8BitInt(t) && !isSigned(t))) -> (MOVBZload ptr mem)
   313	(Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
   314	(Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
   315	
   316	// Lowering stores
   317	// These more-specific FP versions of Store pattern should come first.
   318	(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem)
   319	(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (FMOVSstore ptr val mem)
   320	
   321	(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVDstore ptr val mem)
   322	(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVWstore ptr val mem)
   323	(Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVHstore ptr val mem)
   324	(Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
   325	
   326	// Lowering moves
   327	
   328	// Load and store for small copies.
   329	(Move [0] _ _ mem) -> mem
   330	(Move [1] dst src mem) -> (MOVBstore dst (MOVBZload src mem) mem)
   331	(Move [2] dst src mem) -> (MOVHstore dst (MOVHZload src mem) mem)
   332	(Move [4] dst src mem) -> (MOVWstore dst (MOVWZload src mem) mem)
   333	(Move [8] dst src mem) -> (MOVDstore dst (MOVDload src mem) mem)
   334	(Move [16] dst src mem) ->
   335		(MOVDstore [8] dst (MOVDload [8] src mem)
   336			(MOVDstore dst (MOVDload src mem) mem))
   337	(Move [24] dst src mem) ->
   338	        (MOVDstore [16] dst (MOVDload [16] src mem)
   339		        (MOVDstore [8] dst (MOVDload [8] src mem)
   340	                (MOVDstore dst (MOVDload src mem) mem)))
   341	(Move [3] dst src mem) ->
   342		(MOVBstore [2] dst (MOVBZload [2] src mem)
   343			(MOVHstore dst (MOVHZload src mem) mem))
   344	(Move [5] dst src mem) ->
   345		(MOVBstore [4] dst (MOVBZload [4] src mem)
   346			(MOVWstore dst (MOVWZload src mem) mem))
   347	(Move [6] dst src mem) ->
   348		(MOVHstore [4] dst (MOVHZload [4] src mem)
   349			(MOVWstore dst (MOVWZload src mem) mem))
   350	(Move [7] dst src mem) ->
   351		(MOVBstore [6] dst (MOVBZload [6] src mem)
   352			(MOVHstore [4] dst (MOVHZload [4] src mem)
   353				(MOVWstore dst (MOVWZload src mem) mem)))
   354	
   355	// MVC for other moves. Use up to 4 instructions (sizes up to 1024 bytes).
   356	(Move [s] dst src mem) && s > 0 && s <= 256 ->
   357		(MVC [makeValAndOff(s, 0)] dst src mem)
   358	(Move [s] dst src mem) && s > 256 && s <= 512 ->
   359		(MVC [makeValAndOff(s-256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem))
   360	(Move [s] dst src mem) && s > 512 && s <= 768 ->
   361		(MVC [makeValAndOff(s-512, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem)))
   362	(Move [s] dst src mem) && s > 768 && s <= 1024 ->
   363		(MVC [makeValAndOff(s-768, 768)] dst src (MVC [makeValAndOff(256, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem))))
   364	
   365	// Move more than 1024 bytes using a loop.
   366	(Move [s] dst src mem) && s > 1024 ->
   367		(LoweredMove [s%256] dst src (ADD <src.Type> src (MOVDconst [(s/256)*256])) mem)
   368	
   369	// Lowering Zero instructions
   370	(Zero [0] _ mem) -> mem
   371	(Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
   372	(Zero [2] destptr mem) -> (MOVHstoreconst [0] destptr mem)
   373	(Zero [4] destptr mem) -> (MOVWstoreconst [0] destptr mem)
   374	(Zero [8] destptr mem) -> (MOVDstoreconst [0] destptr mem)
   375	(Zero [3] destptr mem) ->
   376		(MOVBstoreconst [makeValAndOff(0,2)] destptr
   377			(MOVHstoreconst [0] destptr mem))
   378	(Zero [5] destptr mem) ->
   379		(MOVBstoreconst [makeValAndOff(0,4)] destptr
   380			(MOVWstoreconst [0] destptr mem))
   381	(Zero [6] destptr mem) ->
   382		(MOVHstoreconst [makeValAndOff(0,4)] destptr
   383			(MOVWstoreconst [0] destptr mem))
   384	(Zero [7] destptr mem) ->
   385		(MOVWstoreconst [makeValAndOff(0,3)] destptr
   386			(MOVWstoreconst [0] destptr mem))
   387	
   388	(Zero [s] destptr mem) && s > 0 && s <= 1024 ->
   389		(CLEAR [makeValAndOff(s, 0)] destptr mem)
   390	
   391	// Move more than 1024 bytes using a loop.
   392	(Zero [s] destptr mem) && s > 1024 ->
   393		(LoweredZero [s%256] destptr (ADDconst <destptr.Type> destptr [(s/256)*256]) mem)
   394	
   395	// Lowering constants
   396	(Const(64|32|16|8)  [val]) -> (MOVDconst [val])
   397	(Const(32|64)F [val]) -> (FMOV(S|D)const [val])
   398	(ConstNil) -> (MOVDconst [0])
   399	(ConstBool [b]) -> (MOVDconst [b])
   400	
   401	// Lowering calls
   402	(StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
   403	(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
   404	(InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
   405	
   406	// Miscellaneous
   407	(IsNonNil p) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPconst p [0]))
   408	(IsInBounds idx len) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len))
   409	(IsSliceInBounds idx len) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len))
   410	(NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
   411	(GetG mem) -> (LoweredGetG mem)
   412	(GetClosurePtr) -> (LoweredGetClosurePtr)
   413	(GetCallerSP) -> (LoweredGetCallerSP)
   414	(GetCallerPC) -> (LoweredGetCallerPC)
   415	(Addr {sym} base) -> (MOVDaddr {sym} base)
   416	(LocalAddr {sym} base _) -> (MOVDaddr {sym} base)
   417	(ITab (Load ptr mem)) -> (MOVDload ptr mem)
   418	
   419	// block rewrites
   420	(If (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (LT cmp yes no)
   421	(If (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (LE cmp yes no)
   422	(If (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GT cmp yes no)
   423	(If (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GE cmp yes no)
   424	(If (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (EQ cmp yes no)
   425	(If (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (NE cmp yes no)
   426	
   427	// Special case for floating point - LF/LEF not generated.
   428	(If (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GTF cmp yes no)
   429	(If (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GEF cmp yes no)
   430	
   431	(If cond yes no) -> (NE (CMPWconst [0] (MOVBZreg <typ.Bool> cond)) yes no)
   432	
   433	// Write barrier.
   434	(WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
   435	
   436	(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
   437	(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
   438	(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
   439	
   440	// ***************************
   441	// Above: lowering rules
   442	// Below: optimizations
   443	// ***************************
   444	// TODO: Should the optimizations be a separate pass?
   445	
   446	// Fold unnecessary type conversions.
   447	(MOVDreg <t> x) && t.Compare(x.Type) == types.CMPeq -> x
   448	(MOVDnop <t> x) && t.Compare(x.Type) == types.CMPeq -> x
   449	
   450	// Propagate constants through type conversions.
   451	(MOVDreg (MOVDconst [c])) -> (MOVDconst [c])
   452	(MOVDnop (MOVDconst [c])) -> (MOVDconst [c])
   453	
   454	// If a register move has only 1 use, just use the same register without emitting instruction.
   455	// MOVDnop doesn't emit instruction, only for ensuring the type.
   456	(MOVDreg x) && x.Uses == 1 -> (MOVDnop x)
   457	
   458	// Fold type changes into loads.
   459	(MOVDreg <t> x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <t> [off] {sym} ptr mem)
   460	(MOVDreg <t> x:(MOVBload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload  <t> [off] {sym} ptr mem)
   461	(MOVDreg <t> x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <t> [off] {sym} ptr mem)
   462	(MOVDreg <t> x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload  <t> [off] {sym} ptr mem)
   463	(MOVDreg <t> x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <t> [off] {sym} ptr mem)
   464	(MOVDreg <t> x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload  <t> [off] {sym} ptr mem)
   465	(MOVDreg <t> x:(MOVDload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDload  <t> [off] {sym} ptr mem)
   466	
   467	(MOVDnop <t> x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <t> [off] {sym} ptr mem)
   468	(MOVDnop <t> x:(MOVBload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload  <t> [off] {sym} ptr mem)
   469	(MOVDnop <t> x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <t> [off] {sym} ptr mem)
   470	(MOVDnop <t> x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload  <t> [off] {sym} ptr mem)
   471	(MOVDnop <t> x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <t> [off] {sym} ptr mem)
   472	(MOVDnop <t> x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload  <t> [off] {sym} ptr mem)
   473	(MOVDnop <t> x:(MOVDload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDload  <t> [off] {sym} ptr mem)
   474	
   475	(MOVDreg <t> x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
   476	(MOVDreg <t> x:(MOVBloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx  <t> [off] {sym} ptr idx mem)
   477	(MOVDreg <t> x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
   478	(MOVDreg <t> x:(MOVHloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx  <t> [off] {sym} ptr idx mem)
   479	(MOVDreg <t> x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
   480	(MOVDreg <t> x:(MOVWloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx  <t> [off] {sym} ptr idx mem)
   481	(MOVDreg <t> x:(MOVDloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
   482	
   483	(MOVDnop <t> x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
   484	(MOVDnop <t> x:(MOVBloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx  <t> [off] {sym} ptr idx mem)
   485	(MOVDnop <t> x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
   486	(MOVDnop <t> x:(MOVHloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx  <t> [off] {sym} ptr idx mem)
   487	(MOVDnop <t> x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
   488	(MOVDnop <t> x:(MOVWloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx  <t> [off] {sym} ptr idx mem)
   489	(MOVDnop <t> x:(MOVDloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
   490	
   491	// Fold sign extensions into conditional moves of constants.
   492	// Designed to remove the MOVBZreg inserted by the If lowering.
   493	(MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   494	(MOVBZreg x:(MOVDLE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   495	(MOVBZreg x:(MOVDGT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   496	(MOVBZreg x:(MOVDGE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   497	(MOVBZreg x:(MOVDEQ (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   498	(MOVBZreg x:(MOVDNE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   499	(MOVBZreg x:(MOVDGTnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   500	(MOVBZreg x:(MOVDGEnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   501	
   502	// Fold boolean tests into blocks.
   503	(NE (CMPWconst [0] (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LT cmp yes no)
   504	(NE (CMPWconst [0] (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LE cmp yes no)
   505	(NE (CMPWconst [0] (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GT cmp yes no)
   506	(NE (CMPWconst [0] (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GE cmp yes no)
   507	(NE (CMPWconst [0] (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (EQ cmp yes no)
   508	(NE (CMPWconst [0] (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (NE cmp yes no)
   509	(NE (CMPWconst [0] (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GTF cmp yes no)
   510	(NE (CMPWconst [0] (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GEF cmp yes no)
   511	
   512	// Fold constants into instructions.
   513	(ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
   514	(ADDW x (MOVDconst [c])) -> (ADDWconst [int64(int32(c))] x)
   515	
   516	(SUB x (MOVDconst [c])) && is32Bit(c) -> (SUBconst x [c])
   517	(SUB (MOVDconst [c]) x) && is32Bit(c) -> (NEG (SUBconst <v.Type> x [c]))
   518	(SUBW x (MOVDconst [c])) -> (SUBWconst x [int64(int32(c))])
   519	(SUBW (MOVDconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [int64(int32(c))]))
   520	
   521	(MULLD x (MOVDconst [c])) && is32Bit(c) -> (MULLDconst [c] x)
   522	(MULLW x (MOVDconst [c])) -> (MULLWconst [int64(int32(c))] x)
   523	
   524	// NILF instructions leave the high 32 bits unchanged which is
   525	// equivalent to the leftmost 32 bits being set.
   526	// TODO(mundaym): modify the assembler to accept 64-bit values
   527	// and use isU32Bit(^c).
   528	(AND x (MOVDconst [c])) && is32Bit(c) && c < 0 -> (ANDconst [c] x)
   529	(AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(int32(c))] x))
   530	(ANDW x (MOVDconst [c])) -> (ANDWconst [int64(int32(c))] x)
   531	
   532	(ANDWconst [c] (ANDWconst [d] x)) -> (ANDWconst [c & d] x)
   533	(ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c & d] x)
   534	
   535	(OR x (MOVDconst [c])) && isU32Bit(c) -> (ORconst [c] x)
   536	(ORW x (MOVDconst [c])) -> (ORWconst [int64(int32(c))] x)
   537	
   538	(XOR x (MOVDconst [c])) && isU32Bit(c) -> (XORconst [c] x)
   539	(XORW x (MOVDconst [c])) -> (XORWconst [int64(int32(c))] x)
   540	
   541	// Constant shifts.
   542	(S(LD|RD|RAD|LW|RW|RAW) x (MOVDconst [c]))
   543		-> (S(LD|RD|RAD|LW|RW|RAW)const x [c&63])
   544	
   545	// Shifts only use the rightmost 6 bits of the shift value.
   546	(S(LD|RD|RAD|LW|RW|RAW) x (AND (MOVDconst [c]) y))
   547		-> (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst <typ.UInt32> [c&63] y))
   548	(S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [c] y)) && c&63 == 63
   549		-> (S(LD|RD|RAD|LW|RW|RAW) x y)
   550	(SLD  x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SLD  x y)
   551	(SRD  x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SRD  x y)
   552	(SRAD x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SRAD x y)
   553	(SLW  x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SLW  x y)
   554	(SRW  x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SRW  x y)
   555	(SRAW x (MOV(D|W|H|B|WZ|HZ|BZ)reg y)) -> (SRAW x y)
   556	
   557	// Constant rotate generation
   558	(RLL  x (MOVDconst [c])) -> (RLLconst  x [c&31])
   559	(RLLG x (MOVDconst [c])) -> (RLLGconst x [c&63])
   560	
   561	(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
   562	( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
   563	(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
   564	
   565	(ADDW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
   566	( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
   567	(XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
   568	
   569	(CMP x (MOVDconst [c])) && is32Bit(c) -> (CMPconst x [c])
   570	(CMP (MOVDconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPconst x [c]))
   571	(CMPW x (MOVDconst [c])) -> (CMPWconst x [int64(int32(c))])
   572	(CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int32(c))]))
   573	(CMPU x (MOVDconst [c])) && isU32Bit(c) -> (CMPUconst x [int64(int32(c))])
   574	(CMPU (MOVDconst [c]) x) && isU32Bit(c) -> (InvertFlags (CMPUconst x [int64(int32(c))]))
   575	(CMPWU x (MOVDconst [c])) -> (CMPWUconst x [int64(int32(c))])
   576	(CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(int32(c))]))
   577	
   578	// Using MOV{W,H,B}Zreg instead of AND is cheaper.
   579	(AND x (MOVDconst [0xFF])) -> (MOVBZreg x)
   580	(AND x (MOVDconst [0xFFFF])) -> (MOVHZreg x)
   581	(AND x (MOVDconst [0xFFFFFFFF])) -> (MOVWZreg x)
   582	(ANDWconst [0xFF] x) -> (MOVBZreg x)
   583	(ANDWconst [0xFFFF] x) -> (MOVHZreg x)
   584	
   585	// strength reduction
   586	(MULLDconst [-1] x) -> (NEG x)
   587	(MULLDconst [0] _) -> (MOVDconst [0])
   588	(MULLDconst [1] x) -> x
   589	(MULLDconst [c] x) && isPowerOfTwo(c) -> (SLDconst [log2(c)] x)
   590	(MULLDconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
   591	(MULLDconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
   592	
   593	(MULLWconst [-1] x) -> (NEGW x)
   594	(MULLWconst [0] _) -> (MOVDconst [0])
   595	(MULLWconst [1] x) -> x
   596	(MULLWconst [c] x) && isPowerOfTwo(c) -> (SLWconst [log2(c)] x)
   597	(MULLWconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
   598	(MULLWconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
   599	
   600	// Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them).
   601	(ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
   602	(ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(c+d) -> (MOVDaddr [c+d] {s} x)
   603	(ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB && idx.Op != OpSB -> (MOVDaddridx [c] {s} ptr idx)
   604	
   605	// fold ADDconst into MOVDaddrx
   606	(ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(c+d) -> (MOVDaddridx [c+d] {s} x y)
   607	(MOVDaddridx [c] {s} (ADDconst [d] x) y) && is20Bit(c+d) && x.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
   608	(MOVDaddridx [c] {s} x (ADDconst [d] y)) && is20Bit(c+d) && y.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
   609	
   610	// reverse ordering of compare instruction
   611	(MOVDLT x y (InvertFlags cmp)) -> (MOVDGT x y cmp)
   612	(MOVDGT x y (InvertFlags cmp)) -> (MOVDLT x y cmp)
   613	(MOVDLE x y (InvertFlags cmp)) -> (MOVDGE x y cmp)
   614	(MOVDGE x y (InvertFlags cmp)) -> (MOVDLE x y cmp)
   615	(MOVDEQ x y (InvertFlags cmp)) -> (MOVDEQ x y cmp)
   616	(MOVDNE x y (InvertFlags cmp)) -> (MOVDNE x y cmp)
   617	
   618	// don't extend after proper load
   619	(MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
   620	(MOVBZreg x:(MOVBZload _ _)) -> (MOVDreg x)
   621	(MOVHreg x:(MOVBload _ _)) -> (MOVDreg x)
   622	(MOVHreg x:(MOVBZload _ _)) -> (MOVDreg x)
   623	(MOVHreg x:(MOVHload _ _)) -> (MOVDreg x)
   624	(MOVHZreg x:(MOVBZload _ _)) -> (MOVDreg x)
   625	(MOVHZreg x:(MOVHZload _ _)) -> (MOVDreg x)
   626	(MOVWreg x:(MOVBload _ _)) -> (MOVDreg x)
   627	(MOVWreg x:(MOVBZload _ _)) -> (MOVDreg x)
   628	(MOVWreg x:(MOVHload _ _)) -> (MOVDreg x)
   629	(MOVWreg x:(MOVHZload _ _)) -> (MOVDreg x)
   630	(MOVWreg x:(MOVWload _ _)) -> (MOVDreg x)
   631	(MOVWZreg x:(MOVBZload _ _)) -> (MOVDreg x)
   632	(MOVWZreg x:(MOVHZload _ _)) -> (MOVDreg x)
   633	(MOVWZreg x:(MOVWZload _ _)) -> (MOVDreg x)
   634	
   635	// don't extend if argument is already extended
   636	(MOVBreg x:(Arg <t>)) && is8BitInt(t) && isSigned(t) -> (MOVDreg x)
   637	(MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !isSigned(t) -> (MOVDreg x)
   638	(MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && isSigned(t) -> (MOVDreg x)
   639	(MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !isSigned(t) -> (MOVDreg x)
   640	(MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t) -> (MOVDreg x)
   641	(MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t) -> (MOVDreg x)
   642	
   643	// fold double extensions
   644	(MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
   645	(MOVBZreg x:(MOVBZreg _)) -> (MOVDreg x)
   646	(MOVHreg x:(MOVBreg _)) -> (MOVDreg x)
   647	(MOVHreg x:(MOVBZreg _)) -> (MOVDreg x)
   648	(MOVHreg x:(MOVHreg _)) -> (MOVDreg x)
   649	(MOVHZreg x:(MOVBZreg _)) -> (MOVDreg x)
   650	(MOVHZreg x:(MOVHZreg _)) -> (MOVDreg x)
   651	(MOVWreg x:(MOVBreg _)) -> (MOVDreg x)
   652	(MOVWreg x:(MOVBZreg _)) -> (MOVDreg x)
   653	(MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
   654	(MOVWreg x:(MOVHZreg _)) -> (MOVDreg x)
   655	(MOVWreg x:(MOVWreg _)) -> (MOVDreg x)
   656	(MOVWZreg x:(MOVBZreg _)) -> (MOVDreg x)
   657	(MOVWZreg x:(MOVHZreg _)) -> (MOVDreg x)
   658	(MOVWZreg x:(MOVWZreg _)) -> (MOVDreg x)
   659	
   660	(MOVBreg (MOVBZreg x)) -> (MOVBreg x)
   661	(MOVBZreg (MOVBreg x)) -> (MOVBZreg x)
   662	(MOVHreg (MOVHZreg x)) -> (MOVHreg x)
   663	(MOVHZreg (MOVHreg x)) -> (MOVHZreg x)
   664	(MOVWreg (MOVWZreg x)) -> (MOVWreg x)
   665	(MOVWZreg (MOVWreg x)) -> (MOVWZreg x)
   666	
   667	// fold extensions into constants
   668	(MOVBreg (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
   669	(MOVBZreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))])
   670	(MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
   671	(MOVHZreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
   672	(MOVWreg (MOVDconst [c])) -> (MOVDconst [int64(int32(c))])
   673	(MOVWZreg (MOVDconst [c])) -> (MOVDconst [int64(uint32(c))])
   674	
   675	// sign extended loads
   676	// Note: The combined instruction must end up in the same block
   677	// as the original load. If not, we end up making a value with
   678	// memory type live in two different blocks, which can lead to
   679	// multiple memory values alive simultaneously.
   680	// Make sure we don't combine these ops if the load has another use.
   681	// This prevents a single load from being split into multiple loads
   682	// which then might return different values.  See test/atomicload.go.
   683	(MOVBreg  x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload  <v.Type> [off] {sym} ptr mem)
   684	(MOVBreg  x:(MOVBload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload  <v.Type> [off] {sym} ptr mem)
   685	(MOVBZreg x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <v.Type> [off] {sym} ptr mem)
   686	(MOVBZreg x:(MOVBload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <v.Type> [off] {sym} ptr mem)
   687	(MOVHreg  x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload  <v.Type> [off] {sym} ptr mem)
   688	(MOVHreg  x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload  <v.Type> [off] {sym} ptr mem)
   689	(MOVHZreg x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
   690	(MOVHZreg x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
   691	(MOVWreg  x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload  <v.Type> [off] {sym} ptr mem)
   692	(MOVWreg  x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload  <v.Type> [off] {sym} ptr mem)
   693	(MOVWZreg x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
   694	(MOVWZreg x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
   695	
   696	(MOVBreg  x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx  <v.Type> [off] {sym} ptr idx mem)
   697	(MOVBreg  x:(MOVBloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx  <v.Type> [off] {sym} ptr idx mem)
   698	(MOVBZreg x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <v.Type> [off] {sym} ptr idx mem)
   699	(MOVBZreg x:(MOVBloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <v.Type> [off] {sym} ptr idx mem)
   700	(MOVHreg  x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx  <v.Type> [off] {sym} ptr idx mem)
   701	(MOVHreg  x:(MOVHloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx  <v.Type> [off] {sym} ptr idx mem)
   702	(MOVHZreg x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
   703	(MOVHZreg x:(MOVHloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
   704	(MOVWreg  x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx  <v.Type> [off] {sym} ptr idx mem)
   705	(MOVWreg  x:(MOVWloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx  <v.Type> [off] {sym} ptr idx mem)
   706	(MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
   707	(MOVWZreg x:(MOVWloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
   708	
   709	// replace load from same location as preceding store with copy
   710	(MOVDload  [off] {sym} ptr1 (MOVDstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVDreg x)
   711	(MOVWload  [off] {sym} ptr1 (MOVWstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWreg x)
   712	(MOVHload  [off] {sym} ptr1 (MOVHstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHreg x)
   713	(MOVBload  [off] {sym} ptr1 (MOVBstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBreg x)
   714	(MOVWZload [off] {sym} ptr1 (MOVWstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWZreg x)
   715	(MOVHZload [off] {sym} ptr1 (MOVHstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHZreg x)
   716	(MOVBZload [off] {sym} ptr1 (MOVBstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBZreg x)
   717	(MOVDload  [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LGDR x)
   718	(FMOVDload [off] {sym} ptr1 (MOVDstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LDGR x)
   719	(FMOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
   720	(FMOVSload [off] {sym} ptr1 (FMOVSstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
   721	
   722	// prefer FPR <-> GPR moves over combined load ops
   723	(MULLDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (MULLD x (LGDR <t> y))
   724	(ADDload   <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (ADD   x (LGDR <t> y))
   725	(SUBload   <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (SUB   x (LGDR <t> y))
   726	(ORload    <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (OR    x (LGDR <t> y))
   727	(ANDload   <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (AND   x (LGDR <t> y))
   728	(XORload   <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (XOR   x (LGDR <t> y))
   729	
   730	// detect attempts to set/clear the sign bit
   731	// may need to be reworked when NIHH/OIHH are added
   732	(SRDconst [1] (SLDconst [1] (LGDR <t> x))) -> (LGDR <t> (LPDFR <x.Type> x))
   733	(LDGR <t> (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR <t> x))
   734	(OR (MOVDconst [-1<<63]) (LGDR <t> x))     -> (LGDR <t> (LNDFR <x.Type> x))
   735	(LDGR <t> (OR (MOVDconst [-1<<63]) x))     -> (LNDFR (LDGR <t> x))
   736	
   737	// detect attempts to set the sign bit with load
   738	(LDGR <t> x:(ORload <t1> [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (LNDFR <t> (LDGR <t> (MOVDload <t1> [off] {sym} ptr mem)))
   739	
   740	// detect copysign
   741	(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x))
   742	(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
   743	(CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y)
   744	(CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y)
   745	
   746	// absorb negations into set/clear sign bit
   747	(FNEG  (LPDFR x)) -> (LNDFR x)
   748	(FNEG  (LNDFR x)) -> (LPDFR x)
   749	(FNEGS (LPDFR x)) -> (LNDFR x)
   750	(FNEGS (LNDFR x)) -> (LPDFR x)
   751	
   752	// no need to convert float32 to float64 to set/clear sign bit
   753	(LEDBR (LPDFR (LDEBR x))) -> (LPDFR x)
   754	(LEDBR (LNDFR (LDEBR x))) -> (LNDFR x)
   755	
   756	// remove unnecessary FPR <-> GPR moves
   757	(LDGR (LGDR x)) -> x
   758	(LGDR (LDGR x)) -> (MOVDreg x)
   759	
   760	// Don't extend before storing
   761	(MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
   762	(MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
   763	(MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
   764	(MOVWstore [off] {sym} ptr (MOVWZreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
   765	(MOVHstore [off] {sym} ptr (MOVHZreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
   766	(MOVBstore [off] {sym} ptr (MOVBZreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
   767	
   768	// Fold constants into memory operations.
   769	// Note that this is not always a good idea because if not all the uses of
   770	// the ADDconst get eliminated, we still have to compute the ADDconst and we now
   771	// have potentially two live values (ptr and (ADDconst [off] ptr)) instead of one.
   772	// Nevertheless, let's do it!
   773	(MOVDload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVDload  [off1+off2] {sym} ptr mem)
   774	(MOVWload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWload  [off1+off2] {sym} ptr mem)
   775	(MOVHload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHload  [off1+off2] {sym} ptr mem)
   776	(MOVBload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBload  [off1+off2] {sym} ptr mem)
   777	(MOVWZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWZload [off1+off2] {sym} ptr mem)
   778	(MOVHZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHZload [off1+off2] {sym} ptr mem)
   779	(MOVBZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBZload [off1+off2] {sym} ptr mem)
   780	(FMOVSload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVSload [off1+off2] {sym} ptr mem)
   781	(FMOVDload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVDload [off1+off2] {sym} ptr mem)
   782	
   783	(MOVDstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVDstore  [off1+off2] {sym} ptr val mem)
   784	(MOVWstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVWstore  [off1+off2] {sym} ptr val mem)
   785	(MOVHstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVHstore  [off1+off2] {sym} ptr val mem)
   786	(MOVBstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVBstore  [off1+off2] {sym} ptr val mem)
   787	(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
   788	(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
   789	
   790	(ADDload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDload   [off1+off2] {sym} x ptr mem)
   791	(ADDWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDWload  [off1+off2] {sym} x ptr mem)
   792	(MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLDload [off1+off2] {sym} x ptr mem)
   793	(MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLWload [off1+off2] {sym} x ptr mem)
   794	(SUBload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBload   [off1+off2] {sym} x ptr mem)
   795	(SUBWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBWload  [off1+off2] {sym} x ptr mem)
   796	
   797	(ANDload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDload   [off1+off2] {sym} x ptr mem)
   798	(ANDWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDWload  [off1+off2] {sym} x ptr mem)
   799	(ORload    [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORload    [off1+off2] {sym} x ptr mem)
   800	(ORWload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORWload   [off1+off2] {sym} x ptr mem)
   801	(XORload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORload   [off1+off2] {sym} x ptr mem)
   802	(XORWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORWload  [off1+off2] {sym} x ptr mem)
   803	
   804	// Fold constants into stores.
   805	(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
   806		(MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
   807	(MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
   808		(MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
   809	(MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && isU12Bit(off) && ptr.Op != OpSB ->
   810		(MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
   811	(MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && is20Bit(off) && ptr.Op != OpSB ->
   812		(MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
   813	
   814	// Fold address offsets into constant stores.
   815	(MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
   816		(MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
   817	(MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
   818		(MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
   819	(MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
   820		(MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
   821	(MOVBstoreconst [sc] {s} (ADDconst [off] ptr) mem) && is20Bit(ValAndOff(sc).Off()+off) ->
   822		(MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
   823	
   824	// Merge address calculations into loads and stores.
   825	// Offsets from SB must not be merged into unaligned memory accesses because
   826	// loads/stores using PC-relative addressing directly must be aligned to the
   827	// size of the target.
   828	(MOVDload   [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
   829		(MOVDload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
   830	(MOVWZload  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
   831		(MOVWZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
   832	(MOVHZload  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
   833		(MOVHZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
   834	(MOVBZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   835		(MOVBZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
   836	(FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   837		(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   838	(FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   839		(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   840	
   841	(MOVWload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
   842		(MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   843	(MOVHload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
   844		(MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   845	(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   846		(MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   847	
   848	(MOVDstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
   849		(MOVDstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   850	(MOVWstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
   851		(MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   852	(MOVHstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
   853		(MOVHstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   854	(MOVBstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   855		(MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   856	(FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   857		(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   858	(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   859		(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   860	
   861	(ADDload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   862	(ADDWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   863	(MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   864	(MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   865	(SUBload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   866	(SUBWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   867	
   868	(ANDload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   869	(ANDWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   870	(ORload    [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORload    [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   871	(ORWload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORWload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   872	(XORload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   873	(XORWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   874	
   875	// Cannot store constant to SB directly (no 'move relative long immediate' instructions).
   876	(MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
   877		(MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
   878	(MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
   879		(MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
   880	(MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
   881		(MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
   882	(MOVBstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
   883		(MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
   884	
   885	// generating indexed loads and stores
   886	(MOVBZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   887		(MOVBZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   888	(MOVBload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   889		(MOVBloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   890	(MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   891		(MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   892	(MOVHload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   893		(MOVHloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   894	(MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   895		(MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   896	(MOVWload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   897		(MOVWloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   898	(MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   899		(MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   900	(FMOVSload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   901		(FMOVSloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   902	(FMOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   903		(FMOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   904	
   905	(MOVBstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   906		(MOVBstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   907	(MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   908		(MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   909	(MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   910		(MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   911	(MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   912		(MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   913	(FMOVSstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   914		(FMOVSstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   915	(FMOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   916		(FMOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   917	
   918	(MOVBZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVBZloadidx [off] {sym} ptr idx mem)
   919	(MOVBload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVBloadidx  [off] {sym} ptr idx mem)
   920	(MOVHZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVHZloadidx [off] {sym} ptr idx mem)
   921	(MOVHload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVHloadidx  [off] {sym} ptr idx mem)
   922	(MOVWZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVWZloadidx [off] {sym} ptr idx mem)
   923	(MOVWload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVWloadidx  [off] {sym} ptr idx mem)
   924	(MOVDload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVDloadidx  [off] {sym} ptr idx mem)
   925	(FMOVSload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (FMOVSloadidx [off] {sym} ptr idx mem)
   926	(FMOVDload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (FMOVDloadidx [off] {sym} ptr idx mem)
   927	
   928	(MOVBstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVBstoreidx  [off] {sym} ptr idx val mem)
   929	(MOVHstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVHstoreidx  [off] {sym} ptr idx val mem)
   930	(MOVWstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVWstoreidx  [off] {sym} ptr idx val mem)
   931	(MOVDstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVDstoreidx  [off] {sym} ptr idx val mem)
   932	(FMOVSstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (FMOVSstoreidx [off] {sym} ptr idx val mem)
   933	(FMOVDstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (FMOVDstoreidx [off] {sym} ptr idx val mem)
   934	
   935	// combine ADD into indexed loads and stores
   936	(MOVBZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
   937	(MOVBloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBloadidx  [c+d] {sym} ptr idx mem)
   938	(MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
   939	(MOVHloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHloadidx  [c+d] {sym} ptr idx mem)
   940	(MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
   941	(MOVWloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWloadidx  [c+d] {sym} ptr idx mem)
   942	(MOVDloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVDloadidx  [c+d] {sym} ptr idx mem)
   943	(FMOVSloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
   944	(FMOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
   945	
   946	(MOVBstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVBstoreidx  [c+d] {sym} ptr idx val mem)
   947	(MOVHstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVHstoreidx  [c+d] {sym} ptr idx val mem)
   948	(MOVWstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVWstoreidx  [c+d] {sym} ptr idx val mem)
   949	(MOVDstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVDstoreidx  [c+d] {sym} ptr idx val mem)
   950	(FMOVSstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
   951	(FMOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
   952	
   953	(MOVBZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
   954	(MOVBloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBloadidx  [c+d] {sym} ptr idx mem)
   955	(MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
   956	(MOVHloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHloadidx  [c+d] {sym} ptr idx mem)
   957	(MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
   958	(MOVWloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWloadidx  [c+d] {sym} ptr idx mem)
   959	(MOVDloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVDloadidx  [c+d] {sym} ptr idx mem)
   960	(FMOVSloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
   961	(FMOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
   962	
   963	(MOVBstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVBstoreidx  [c+d] {sym} ptr idx val mem)
   964	(MOVHstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVHstoreidx  [c+d] {sym} ptr idx val mem)
   965	(MOVWstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVWstoreidx  [c+d] {sym} ptr idx val mem)
   966	(MOVDstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVDstoreidx  [c+d] {sym} ptr idx val mem)
   967	(FMOVSstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
   968	(FMOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
   969	
   970	// MOVDaddr into MOVDaddridx
   971	(MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
   972	       (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
   973	(MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
   974	       (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
   975	
   976	// Absorb InvertFlags into branches.
   977	((LT|GT|LE|GE|EQ|NE) (InvertFlags cmp) yes no) -> ((GT|LT|GE|LE|EQ|NE) cmp yes no)
   978	
   979	// Constant comparisons.
   980	(CMPconst (MOVDconst [x]) [y]) && x==y -> (FlagEQ)
   981	(CMPconst (MOVDconst [x]) [y]) && x<y -> (FlagLT)
   982	(CMPconst (MOVDconst [x]) [y]) && x>y -> (FlagGT)
   983	(CMPUconst (MOVDconst [x]) [y]) && uint64(x)==uint64(y) -> (FlagEQ)
   984	(CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) -> (FlagLT)
   985	(CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) -> (FlagGT)
   986	
   987	(CMPWconst (MOVDconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
   988	(CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) -> (FlagLT)
   989	(CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) -> (FlagGT)
   990	(CMPWUconst (MOVDconst [x]) [y]) && uint32(x)==uint32(y) -> (FlagEQ)
   991	(CMPWUconst (MOVDconst [x]) [y]) && uint32(x)<uint32(y) -> (FlagLT)
   992	(CMPWUconst (MOVDconst [x]) [y]) && uint32(x)>uint32(y) -> (FlagGT)
   993	
   994	(CMP(W|WU)const (MOVBZreg _) [c]) &&   0xff < c -> (FlagLT)
   995	(CMP(W|WU)const (MOVHZreg _) [c]) && 0xffff < c -> (FlagLT)
   996	
   997	(CMPconst  (SRDconst _ [c]) [n]) && c > 0 && n < 0 -> (FlagGT)
   998	(CMPWconst (SRWconst _ [c]) [n]) && c > 0 && n < 0 -> (FlagGT)
   999	
  1000	(CMPUconst  (SRDconst _ [c]) [n]) && c > 0 && c < 64 && (1<<uint(64-c)) <= uint64(n) -> (FlagLT)
  1001	(CMPWUconst (SRWconst _ [c]) [n]) && c > 0 && c < 32 && (1<<uint(32-c)) <= uint32(n) -> (FlagLT)
  1002	
  1003	(CMPWconst  (ANDWconst _ [m]) [n]) && int32(m) >= 0 &&  int32(m) <  int32(n) -> (FlagLT)
  1004	(CMPWUconst (ANDWconst _ [m]) [n]) && uint32(m) < uint32(n) -> (FlagLT)
  1005	
  1006	// Convert 64-bit comparisons to 32-bit comparisons and signed comparisons
  1007	// to unsigned comparisons.
  1008	// Helps simplify constant comparison detection.
  1009	(CM(P|PU)const (MOV(W|WZ)reg x) [c]) -> (CMP(W|WU)const x [c])
  1010	(CM(P|P|PU|PU)const x:(MOV(H|HZ|H|HZ)reg _) [c]) -> (CMP(W|W|WU|WU)const x [c])
  1011	(CM(P|P|PU|PU)const x:(MOV(B|BZ|B|BZ)reg _) [c]) -> (CMP(W|W|WU|WU)const x [c])
  1012	(CMPconst  (MOV(WZ|W)reg x:(ANDWconst [m] _)) [c]) && int32(m) >= 0 && c >= 0 -> (CMPWUconst x [c])
  1013	(CMPUconst (MOV(WZ|W)reg x:(ANDWconst [m] _)) [c]) && int32(m) >= 0           -> (CMPWUconst x [c])
  1014	(CMPconst  x:(SRDconst _ [c]) [n]) && c > 0 && n >= 0 -> (CMPUconst  x [n])
  1015	(CMPWconst x:(SRWconst _ [c]) [n]) && c > 0 && n >= 0 -> (CMPWUconst x [n])
  1016	
  1017	// Absorb sign and zero extensions into 32-bit comparisons.
  1018	(CMP(W|W|WU|WU)      x (MOV(W|WZ|W|WZ)reg y))   -> (CMP(W|W|WU|WU) x y)
  1019	(CMP(W|W|WU|WU)      (MOV(W|WZ|W|WZ)reg x) y)   -> (CMP(W|W|WU|WU) x y)
  1020	(CMP(W|W|WU|WU)const (MOV(W|WZ|W|WZ)reg x) [c]) -> (CMP(W|W|WU|WU)const x [c])
  1021	
  1022	// Absorb flag constants into branches.
  1023	(EQ (FlagEQ) yes no) -> (First nil yes no)
  1024	(EQ (FlagLT) yes no) -> (First nil no yes)
  1025	(EQ (FlagGT) yes no) -> (First nil no yes)
  1026	
  1027	(NE (FlagEQ) yes no) -> (First nil no yes)
  1028	(NE (FlagLT) yes no) -> (First nil yes no)
  1029	(NE (FlagGT) yes no) -> (First nil yes no)
  1030	
  1031	(LT (FlagEQ) yes no) -> (First nil no yes)
  1032	(LT (FlagLT) yes no) -> (First nil yes no)
  1033	(LT (FlagGT) yes no) -> (First nil no yes)
  1034	
  1035	(LE (FlagEQ) yes no) -> (First nil yes no)
  1036	(LE (FlagLT) yes no) -> (First nil yes no)
  1037	(LE (FlagGT) yes no) -> (First nil no yes)
  1038	
  1039	(GT (FlagEQ) yes no) -> (First nil no yes)
  1040	(GT (FlagLT) yes no) -> (First nil no yes)
  1041	(GT (FlagGT) yes no) -> (First nil yes no)
  1042	
  1043	(GE (FlagEQ) yes no) -> (First nil yes no)
  1044	(GE (FlagLT) yes no) -> (First nil no yes)
  1045	(GE (FlagGT) yes no) -> (First nil yes no)
  1046	
  1047	// Absorb flag constants into SETxx ops.
  1048	(MOVDEQ _ x (FlagEQ)) -> x
  1049	(MOVDEQ y _ (FlagLT)) -> y
  1050	(MOVDEQ y _ (FlagGT)) -> y
  1051	
  1052	(MOVDNE y _ (FlagEQ)) -> y
  1053	(MOVDNE _ x (FlagLT)) -> x
  1054	(MOVDNE _ x (FlagGT)) -> x
  1055	
  1056	(MOVDLT y _ (FlagEQ)) -> y
  1057	(MOVDLT _ x (FlagLT)) -> x
  1058	(MOVDLT y _ (FlagGT)) -> y
  1059	
  1060	(MOVDLE _ x (FlagEQ)) -> x
  1061	(MOVDLE _ x (FlagLT)) -> x
  1062	(MOVDLE y _ (FlagGT)) -> y
  1063	
  1064	(MOVDGT y _ (FlagEQ)) -> y
  1065	(MOVDGT y _ (FlagLT)) -> y
  1066	(MOVDGT _ x (FlagGT)) -> x
  1067	
  1068	(MOVDGE _ x (FlagEQ)) -> x
  1069	(MOVDGE y _ (FlagLT)) -> y
  1070	(MOVDGE _ x (FlagGT)) -> x
  1071	
  1072	// Remove redundant *const ops
  1073	(ADDconst [0] x) -> x
  1074	(ADDWconst [c] x) && int32(c)==0 -> x
  1075	(SUBconst [0] x) -> x
  1076	(SUBWconst [c] x) && int32(c) == 0 -> x
  1077	(ANDconst [0] _)                 -> (MOVDconst [0])
  1078	(ANDWconst [c] _) && int32(c)==0  -> (MOVDconst [0])
  1079	(ANDconst [-1] x)                -> x
  1080	(ANDWconst [c] x) && int32(c)==-1 -> x
  1081	(ORconst [0] x)                  -> x
  1082	(ORWconst [c] x) && int32(c)==0   -> x
  1083	(ORconst [-1] _)                 -> (MOVDconst [-1])
  1084	(ORWconst [c] _) && int32(c)==-1  -> (MOVDconst [-1])
  1085	(XORconst [0] x)                  -> x
  1086	(XORWconst [c] x) && int32(c)==0   -> x
  1087	
  1088	// Convert constant subtracts to constant adds.
  1089	(SUBconst [c] x) && c != -(1<<31) -> (ADDconst [-c] x)
  1090	(SUBWconst [c] x) -> (ADDWconst [int64(int32(-c))] x)
  1091	
  1092	// generic constant folding
  1093	// TODO: more of this
  1094	(ADDconst [c] (MOVDconst [d])) -> (MOVDconst [c+d])
  1095	(ADDWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c+d))])
  1096	(ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
  1097	(ADDWconst [c] (ADDWconst [d] x)) -> (ADDWconst [int64(int32(c+d))] x)
  1098	(SUBconst (MOVDconst [d]) [c]) -> (MOVDconst [d-c])
  1099	(SUBconst (SUBconst x [d]) [c]) && is32Bit(-c-d) -> (ADDconst [-c-d] x)
  1100	(SRADconst [c] (MOVDconst [d])) -> (MOVDconst [d>>uint64(c)])
  1101	(SRAWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(d))>>uint64(c)])
  1102	(NEG (MOVDconst [c])) -> (MOVDconst [-c])
  1103	(NEGW (MOVDconst [c])) -> (MOVDconst [int64(int32(-c))])
  1104	(MULLDconst [c] (MOVDconst [d])) -> (MOVDconst [c*d])
  1105	(MULLWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c*d))])
  1106	(AND (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c&d])
  1107	(ANDconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
  1108	(ANDWconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
  1109	(OR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c|d])
  1110	(ORconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
  1111	(ORWconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
  1112	(XOR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c^d])
  1113	(XORconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
  1114	(XORWconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
  1115	(LoweredRound32F x:(FMOVSconst)) -> x
  1116	(LoweredRound64F x:(FMOVDconst)) -> x
  1117	
  1118	// generic simplifications
  1119	// TODO: more of this
  1120	(ADD x (NEG y)) -> (SUB x y)
  1121	(ADDW x (NEGW y)) -> (SUBW x y)
  1122	(SUB x x) -> (MOVDconst [0])
  1123	(SUBW x x) -> (MOVDconst [0])
  1124	(AND x x) -> x
  1125	(ANDW x x) -> x
  1126	(OR x x) -> x
  1127	(ORW x x) -> x
  1128	(XOR x x) -> (MOVDconst [0])
  1129	(XORW x x) -> (MOVDconst [0])
  1130	(NEG (ADDconst [c] (NEG x))) && c != -(1<<31) -> (ADDconst [-c] x)
  1131	(MOVBZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
  1132	(MOVHZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
  1133	(MOVBreg  (ANDWconst [m] x)) &&  int8(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
  1134	(MOVHreg  (ANDWconst [m] x)) && int16(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
  1135	
  1136	// carry flag generation
  1137	// (only constant fold carry of zero)
  1138	(Select1 (ADDCconst (MOVDconst [c]) [d]))
  1139	  && uint64(c+d) >= uint64(c) && c+d == 0
  1140	  -> (FlagEQ)
  1141	(Select1 (ADDCconst (MOVDconst [c]) [d]))
  1142	  && uint64(c+d) >= uint64(c) && c+d != 0
  1143	  -> (FlagLT)
  1144	
  1145	// borrow flag generation
  1146	// (only constant fold borrow of zero)
  1147	(Select1 (SUBC (MOVDconst [c]) (MOVDconst [d])))
  1148	  && uint64(d) <= uint64(c) && c-d == 0
  1149	  -> (FlagGT)
  1150	(Select1 (SUBC (MOVDconst [c]) (MOVDconst [d])))
  1151	  && uint64(d) <= uint64(c) && c-d != 0
  1152	  -> (FlagOV)
  1153	
  1154	// add with carry
  1155	(ADDE x y (FlagEQ)) -> (ADDC x y)
  1156	(ADDE x y (FlagLT)) -> (ADDC x y)
  1157	(ADDC x (MOVDconst [c])) && is16Bit(c) -> (ADDCconst x [c])
  1158	(Select0 (ADDCconst (MOVDconst [c]) [d])) -> (MOVDconst [c+d])
  1159	
  1160	// subtract with borrow
  1161	(SUBE x y (FlagGT)) -> (SUBC x y)
  1162	(SUBE x y (FlagOV)) -> (SUBC x y)
  1163	(Select0 (SUBC (MOVDconst [c]) (MOVDconst [d]))) -> (MOVDconst [c-d])
  1164	
  1165	// collapse carry chain
  1166	(ADDE x y (Select1 (ADDCconst [-1] (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) c)))))
  1167	  -> (ADDE x y c)
  1168	
  1169	// collapse borrow chain
  1170	(SUBE x y (Select1 (SUBC (MOVDconst [0]) (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) c))))))
  1171	  -> (SUBE x y c)
  1172	
  1173	// fused multiply-add
  1174	(FADD (FMUL y z) x) -> (FMADD x y z)
  1175	(FADDS (FMULS y z) x) -> (FMADDS x y z)
  1176	(FSUB (FMUL y z) x) -> (FMSUB x y z)
  1177	(FSUBS (FMULS y z) x) -> (FMSUBS x y z)
  1178	
  1179	// Fold memory operations into operations.
  1180	// Exclude global data (SB) because these instructions cannot handle relative addresses.
  1181	// TODO(mundaym): use LARL in the assembler to handle SB?
  1182	// TODO(mundaym): indexed versions of these?
  1183	(ADD <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1184		-> (ADDload <t> [off] {sym} x ptr mem)
  1185	(ADD <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1186		-> (ADDload <t> [off] {sym} x ptr mem)
  1187	(ADDW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1188		-> (ADDWload <t> [off] {sym} x ptr mem)
  1189	(ADDW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1190		-> (ADDWload <t> [off] {sym} x ptr mem)
  1191	(ADDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1192		-> (ADDWload <t> [off] {sym} x ptr mem)
  1193	(ADDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1194		-> (ADDWload <t> [off] {sym} x ptr mem)
  1195	(MULLD <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1196		-> (MULLDload <t> [off] {sym} x ptr mem)
  1197	(MULLD <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1198		-> (MULLDload <t> [off] {sym} x ptr mem)
  1199	(MULLW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1200		-> (MULLWload <t> [off] {sym} x ptr mem)
  1201	(MULLW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1202		-> (MULLWload <t> [off] {sym} x ptr mem)
  1203	(MULLW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1204		-> (MULLWload <t> [off] {sym} x ptr mem)
  1205	(MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1206		-> (MULLWload <t> [off] {sym} x ptr mem)
  1207	(SUB <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1208		-> (SUBload <t> [off] {sym} x ptr mem)
  1209	(SUBW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1210		-> (SUBWload <t> [off] {sym} x ptr mem)
  1211	(SUBW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1212		-> (SUBWload <t> [off] {sym} x ptr mem)
  1213	(AND <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1214		-> (ANDload <t> [off] {sym} x ptr mem)
  1215	(AND <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1216		-> (ANDload <t> [off] {sym} x ptr mem)
  1217	(ANDW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1218		-> (ANDWload <t> [off] {sym} x ptr mem)
  1219	(ANDW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1220		-> (ANDWload <t> [off] {sym} x ptr mem)
  1221	(ANDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1222		-> (ANDWload <t> [off] {sym} x ptr mem)
  1223	(ANDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1224		-> (ANDWload <t> [off] {sym} x ptr mem)
  1225	(OR <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1226		-> (ORload <t> [off] {sym} x ptr mem)
  1227	(OR <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1228		-> (ORload <t> [off] {sym} x ptr mem)
  1229	(ORW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1230		-> (ORWload <t> [off] {sym} x ptr mem)
  1231	(ORW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1232		-> (ORWload <t> [off] {sym} x ptr mem)
  1233	(ORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1234		-> (ORWload <t> [off] {sym} x ptr mem)
  1235	(ORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1236		-> (ORWload <t> [off] {sym} x ptr mem)
  1237	(XOR <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1238		-> (XORload <t> [off] {sym} x ptr mem)
  1239	(XOR <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1240		-> (XORload <t> [off] {sym} x ptr mem)
  1241	(XORW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1242		-> (XORWload <t> [off] {sym} x ptr mem)
  1243	(XORW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1244		-> (XORWload <t> [off] {sym} x ptr mem)
  1245	(XORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1246		-> (XORWload <t> [off] {sym} x ptr mem)
  1247	(XORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
  1248		-> (XORWload <t> [off] {sym} x ptr mem)
  1249	
  1250	// Combine constant stores into larger (unaligned) stores.
  1251	// Avoid SB because constant stores to relative offsets are
  1252	// emulated by the assembler and also can't handle unaligned offsets.
  1253	(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
  1254	  && p.Op != OpSB
  1255	  && x.Uses == 1
  1256	  && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
  1257	  && clobber(x)
  1258	  -> (MOVHstoreconst [makeValAndOff(ValAndOff(c).Val()&0xff | ValAndOff(a).Val()<<8, ValAndOff(a).Off())] {s} p mem)
  1259	(MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
  1260	  && p.Op != OpSB
  1261	  && x.Uses == 1
  1262	  && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
  1263	  && clobber(x)
  1264	  -> (MOVWstore [ValAndOff(a).Off()] {s} p (MOVDconst [int64(int32(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16))]) mem)
  1265	(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
  1266	  && p.Op != OpSB
  1267	  && x.Uses == 1
  1268	  && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
  1269	  && clobber(x)
  1270	  -> (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
  1271	
  1272	// Combine stores into larger (unaligned) stores.
  1273	// It doesn't work on global data (based on SB) because stores with relative addressing
  1274	// require that the memory operand be aligned.
  1275	(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRDconst [8] w) mem))
  1276	  && p.Op != OpSB
  1277	  && x.Uses == 1
  1278	  && clobber(x)
  1279	  -> (MOVHstore [i-1] {s} p w mem)
  1280	(MOVBstore [i] {s} p w0:(SRDconst [j] w) x:(MOVBstore [i-1] {s} p (SRDconst [j+8] w) mem))
  1281	  && p.Op != OpSB
  1282	  && x.Uses == 1
  1283	  && clobber(x)
  1284	  -> (MOVHstore [i-1] {s} p w0 mem)
  1285	(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
  1286	  && p.Op != OpSB
  1287	  && x.Uses == 1
  1288	  && clobber(x)
  1289	  -> (MOVHstore [i-1] {s} p w mem)
  1290	(MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
  1291	  && p.Op != OpSB
  1292	  && x.Uses == 1
  1293	  && clobber(x)
  1294	  -> (MOVHstore [i-1] {s} p w0 mem)
  1295	(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
  1296	  && p.Op != OpSB
  1297	  && x.Uses == 1
  1298	  && clobber(x)
  1299	  -> (MOVWstore [i-2] {s} p w mem)
  1300	(MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
  1301	  && p.Op != OpSB
  1302	  && x.Uses == 1
  1303	  && clobber(x)
  1304	  -> (MOVWstore [i-2] {s} p w0 mem)
  1305	(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
  1306	  && p.Op != OpSB
  1307	  && x.Uses == 1
  1308	  && clobber(x)
  1309	  -> (MOVWstore [i-2] {s} p w mem)
  1310	(MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
  1311	  && p.Op != OpSB
  1312	  && x.Uses == 1
  1313	  && clobber(x)
  1314	  -> (MOVWstore [i-2] {s} p w0 mem)
  1315	(MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
  1316	  && p.Op != OpSB
  1317	  && x.Uses == 1
  1318	  && clobber(x)
  1319	  -> (MOVDstore [i-4] {s} p w mem)
  1320	(MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
  1321	  && p.Op != OpSB
  1322	  && x.Uses == 1
  1323	  && clobber(x)
  1324	  -> (MOVDstore [i-4] {s} p w0 mem)
  1325	
  1326	(MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
  1327	  && x.Uses == 1
  1328	  && clobber(x)
  1329	  -> (MOVHstoreidx [i-1] {s} p idx w mem)
  1330	(MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
  1331	  && x.Uses == 1
  1332	  && clobber(x)
  1333	  -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
  1334	(MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
  1335	  && x.Uses == 1
  1336	  && clobber(x)
  1337	  -> (MOVHstoreidx [i-1] {s} p idx w mem)
  1338	(MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
  1339	  && x.Uses == 1
  1340	  && clobber(x)
  1341	  -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
  1342	(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
  1343	  && x.Uses == 1
  1344	  && clobber(x)
  1345	  -> (MOVWstoreidx [i-2] {s} p idx w mem)
  1346	(MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
  1347	  && x.Uses == 1
  1348	  && clobber(x)
  1349	  -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
  1350	(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
  1351	  && x.Uses == 1
  1352	  && clobber(x)
  1353	  -> (MOVWstoreidx [i-2] {s} p idx w mem)
  1354	(MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
  1355	  && x.Uses == 1
  1356	  && clobber(x)
  1357	  -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
  1358	(MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
  1359	  && x.Uses == 1
  1360	  && clobber(x)
  1361	  -> (MOVDstoreidx [i-4] {s} p idx w mem)
  1362	(MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
  1363	  && x.Uses == 1
  1364	  && clobber(x)
  1365	  -> (MOVDstoreidx [i-4] {s} p idx w0 mem)
  1366	
  1367	// Combine stores into larger (unaligned) stores with the bytes reversed (little endian).
  1368	// Store-with-bytes-reversed instructions do not support relative memory addresses,
  1369	// so these stores can't operate on global data (SB).
  1370	(MOVBstore [i] {s} p (SRDconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
  1371	  && p.Op != OpSB
  1372	  && x.Uses == 1
  1373	  && clobber(x)
  1374	  -> (MOVHBRstore [i-1] {s} p w mem)
  1375	(MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
  1376	  && p.Op != OpSB
  1377	  && x.Uses == 1
  1378	  && clobber(x)
  1379	  -> (MOVHBRstore [i-1] {s} p w0 mem)
  1380	(MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
  1381	  && p.Op != OpSB
  1382	  && x.Uses == 1
  1383	  && clobber(x)
  1384	  -> (MOVHBRstore [i-1] {s} p w mem)
  1385	(MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
  1386	  && p.Op != OpSB
  1387	  && x.Uses == 1
  1388	  && clobber(x)
  1389	  -> (MOVHBRstore [i-1] {s} p w0 mem)
  1390	(MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
  1391	  && x.Uses == 1
  1392	  && clobber(x)
  1393	  -> (MOVWBRstore [i-2] {s} p w mem)
  1394	(MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
  1395	  && x.Uses == 1
  1396	  && clobber(x)
  1397	  -> (MOVWBRstore [i-2] {s} p w0 mem)
  1398	(MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
  1399	  && x.Uses == 1
  1400	  && clobber(x)
  1401	  -> (MOVWBRstore [i-2] {s} p w mem)
  1402	(MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
  1403	  && x.Uses == 1
  1404	  && clobber(x)
  1405	  -> (MOVWBRstore [i-2] {s} p w0 mem)
  1406	(MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
  1407	  && x.Uses == 1
  1408	  && clobber(x)
  1409	  -> (MOVDBRstore [i-4] {s} p w mem)
  1410	(MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
  1411	  && x.Uses == 1
  1412	  && clobber(x)
  1413	  -> (MOVDBRstore [i-4] {s} p w0 mem)
  1414	
  1415	(MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
  1416	  && x.Uses == 1
  1417	  && clobber(x)
  1418	  -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
  1419	(MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
  1420	  && x.Uses == 1
  1421	  && clobber(x)
  1422	  -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
  1423	(MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
  1424	  && x.Uses == 1
  1425	  && clobber(x)
  1426	  -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
  1427	(MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
  1428	  && x.Uses == 1
  1429	  && clobber(x)
  1430	  -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
  1431	(MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
  1432	  && x.Uses == 1
  1433	  && clobber(x)
  1434	  -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
  1435	(MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
  1436	  && x.Uses == 1
  1437	  && clobber(x)
  1438	  -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
  1439	(MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
  1440	  && x.Uses == 1
  1441	  && clobber(x)
  1442	  -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
  1443	(MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
  1444	  && x.Uses == 1
  1445	  && clobber(x)
  1446	  -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
  1447	(MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
  1448	  && x.Uses == 1
  1449	  && clobber(x)
  1450	  -> (MOVDBRstoreidx [i-4] {s} p idx w mem)
  1451	(MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
  1452	  && x.Uses == 1
  1453	  && clobber(x)
  1454	  -> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
  1455	
  1456	// Combining byte loads into larger (unaligned) loads.
  1457	
  1458	// Big-endian loads
  1459	
  1460	(ORW                 x1:(MOVBZload [i1] {s} p mem)
  1461	    sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
  1462	  && i1 == i0+1
  1463	  && p.Op != OpSB
  1464	  && x0.Uses == 1
  1465	  && x1.Uses == 1
  1466	  && sh.Uses == 1
  1467	  && mergePoint(b,x0,x1) != nil
  1468	  && clobber(x0)
  1469	  && clobber(x1)
  1470	  && clobber(sh)
  1471	  -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
  1472	
  1473	(OR                  x1:(MOVBZload [i1] {s} p mem)
  1474	    sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
  1475	  && i1 == i0+1
  1476	  && p.Op != OpSB
  1477	  && x0.Uses == 1
  1478	  && x1.Uses == 1
  1479	  && sh.Uses == 1
  1480	  && mergePoint(b,x0,x1) != nil
  1481	  && clobber(x0)
  1482	  && clobber(x1)
  1483	  && clobber(sh)
  1484	  -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
  1485	
  1486	(ORW                  x1:(MOVHZload [i1] {s} p mem)
  1487	    sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
  1488	  && i1 == i0+2
  1489	  && p.Op != OpSB
  1490	  && x0.Uses == 1
  1491	  && x1.Uses == 1
  1492	  && sh.Uses == 1
  1493	  && mergePoint(b,x0,x1) != nil
  1494	  && clobber(x0)
  1495	  && clobber(x1)
  1496	  && clobber(sh)
  1497	  -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
  1498	
  1499	(OR                   x1:(MOVHZload [i1] {s} p mem)
  1500	    sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
  1501	  && i1 == i0+2
  1502	  && p.Op != OpSB
  1503	  && x0.Uses == 1
  1504	  && x1.Uses == 1
  1505	  && sh.Uses == 1
  1506	  && mergePoint(b,x0,x1) != nil
  1507	  && clobber(x0)
  1508	  && clobber(x1)
  1509	  && clobber(sh)
  1510	  -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
  1511	
  1512	(OR                   x1:(MOVWZload [i1] {s} p mem)
  1513	    sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
  1514	  && i1 == i0+4
  1515	  && p.Op != OpSB
  1516	  && x0.Uses == 1
  1517	  && x1.Uses == 1
  1518	  && sh.Uses == 1
  1519	  && mergePoint(b,x0,x1) != nil
  1520	  && clobber(x0)
  1521	  && clobber(x1)
  1522	  && clobber(sh)
  1523	  -> @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
  1524	
  1525	(ORW
  1526	    s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
  1527	    or:(ORW
  1528	        s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
  1529		y))
  1530	  && i1 == i0+1
  1531	  && j1 == j0-8
  1532	  && j1 % 16 == 0
  1533	  && x0.Uses == 1
  1534	  && x1.Uses == 1
  1535	  && s0.Uses == 1
  1536	  && s1.Uses == 1
  1537	  && or.Uses == 1
  1538	  && mergePoint(b,x0,x1,y) != nil
  1539	  && clobber(x0)
  1540	  && clobber(x1)
  1541	  && clobber(s0)
  1542	  && clobber(s1)
  1543	  && clobber(or)
  1544	  -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
  1545	
  1546	(OR
  1547	    s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
  1548	    or:(OR
  1549	        s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
  1550		y))
  1551	  && i1 == i0+1
  1552	  && j1 == j0-8
  1553	  && j1 % 16 == 0
  1554	  && x0.Uses == 1
  1555	  && x1.Uses == 1
  1556	  && s0.Uses == 1
  1557	  && s1.Uses == 1
  1558	  && or.Uses == 1
  1559	  && mergePoint(b,x0,x1,y) != nil
  1560	  && clobber(x0)
  1561	  && clobber(x1)
  1562	  && clobber(s0)
  1563	  && clobber(s1)
  1564	  && clobber(or)
  1565	  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
  1566	
  1567	(OR
  1568	    s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
  1569	    or:(OR
  1570	        s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))
  1571		y))
  1572	  && i1 == i0+2
  1573	  && j1 == j0-16
  1574	  && j1 % 32 == 0
  1575	  && x0.Uses == 1
  1576	  && x1.Uses == 1
  1577	  && s0.Uses == 1
  1578	  && s1.Uses == 1
  1579	  && or.Uses == 1
  1580	  && mergePoint(b,x0,x1,y) != nil
  1581	  && clobber(x0)
  1582	  && clobber(x1)
  1583	  && clobber(s0)
  1584	  && clobber(s1)
  1585	  && clobber(or)
  1586	  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
  1587	
  1588	// Big-endian indexed loads
  1589	
  1590	(ORW                 x1:(MOVBZloadidx [i1] {s} p idx mem)
  1591	    sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
  1592	  && i1 == i0+1
  1593	  && p.Op != OpSB
  1594	  && x0.Uses == 1
  1595	  && x1.Uses == 1
  1596	  && sh.Uses == 1
  1597	  && mergePoint(b,x0,x1) != nil
  1598	  && clobber(x0)
  1599	  && clobber(x1)
  1600	  && clobber(sh)
  1601	  -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
  1602	
  1603	(OR                  x1:(MOVBZloadidx [i1] {s} p idx mem)
  1604	    sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
  1605	  && i1 == i0+1
  1606	  && p.Op != OpSB
  1607	  && x0.Uses == 1
  1608	  && x1.Uses == 1
  1609	  && sh.Uses == 1
  1610	  && mergePoint(b,x0,x1) != nil
  1611	  && clobber(x0)
  1612	  && clobber(x1)
  1613	  && clobber(sh)
  1614	  -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
  1615	
  1616	(ORW                  x1:(MOVHZloadidx [i1] {s} p idx mem)
  1617	    sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
  1618	  && i1 == i0+2
  1619	  && p.Op != OpSB
  1620	  && x0.Uses == 1
  1621	  && x1.Uses == 1
  1622	  && sh.Uses == 1
  1623	  && mergePoint(b,x0,x1) != nil
  1624	  && clobber(x0)
  1625	  && clobber(x1)
  1626	  && clobber(sh)
  1627	  -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
  1628	
  1629	(OR                   x1:(MOVHZloadidx [i1] {s} p idx mem)
  1630	    sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
  1631	  && i1 == i0+2
  1632	  && p.Op != OpSB
  1633	  && x0.Uses == 1
  1634	  && x1.Uses == 1
  1635	  && sh.Uses == 1
  1636	  && mergePoint(b,x0,x1) != nil
  1637	  && clobber(x0)
  1638	  && clobber(x1)
  1639	  && clobber(sh)
  1640	  -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
  1641	
  1642	(OR                   x1:(MOVWZloadidx [i1] {s} p idx mem)
  1643	    sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
  1644	  && i1 == i0+4
  1645	  && p.Op != OpSB
  1646	  && x0.Uses == 1
  1647	  && x1.Uses == 1
  1648	  && sh.Uses == 1
  1649	  && mergePoint(b,x0,x1) != nil
  1650	  && clobber(x0)
  1651	  && clobber(x1)
  1652	  && clobber(sh)
  1653	  -> @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
  1654	
  1655	(ORW
  1656	    s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
  1657	    or:(ORW
  1658	        s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
  1659		y))
  1660	  && i1 == i0+1
  1661	  && j1 == j0-8
  1662	  && j1 % 16 == 0
  1663	  && x0.Uses == 1
  1664	  && x1.Uses == 1
  1665	  && s0.Uses == 1
  1666	  && s1.Uses == 1
  1667	  && or.Uses == 1
  1668	  && mergePoint(b,x0,x1,y) != nil
  1669	  && clobber(x0)
  1670	  && clobber(x1)
  1671	  && clobber(s0)
  1672	  && clobber(s1)
  1673	  && clobber(or)
  1674	  -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
  1675	
  1676	(OR
  1677	    s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
  1678	    or:(OR
  1679	        s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
  1680		y))
  1681	  && i1 == i0+1
  1682	  && j1 == j0-8
  1683	  && j1 % 16 == 0
  1684	  && x0.Uses == 1
  1685	  && x1.Uses == 1
  1686	  && s0.Uses == 1
  1687	  && s1.Uses == 1
  1688	  && or.Uses == 1
  1689	  && mergePoint(b,x0,x1,y) != nil
  1690	  && clobber(x0)
  1691	  && clobber(x1)
  1692	  && clobber(s0)
  1693	  && clobber(s1)
  1694	  && clobber(or)
  1695	  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
  1696	
  1697	(OR
  1698	    s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))
  1699	    or:(OR
  1700	        s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))
  1701		y))
  1702	  && i1 == i0+2
  1703	  && j1 == j0-16
  1704	  && j1 % 32 == 0
  1705	  && x0.Uses == 1
  1706	  && x1.Uses == 1
  1707	  && s0.Uses == 1
  1708	  && s1.Uses == 1
  1709	  && or.Uses == 1
  1710	  && mergePoint(b,x0,x1,y) != nil
  1711	  && clobber(x0)
  1712	  && clobber(x1)
  1713	  && clobber(s0)
  1714	  && clobber(s1)
  1715	  && clobber(or)
  1716	  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
  1717	
  1718	// Little-endian loads
  1719	
  1720	(ORW                 x0:(MOVBZload [i0] {s} p mem)
  1721	    sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
  1722	  && p.Op != OpSB
  1723	  && i1 == i0+1
  1724	  && x0.Uses == 1
  1725	  && x1.Uses == 1
  1726	  && sh.Uses == 1
  1727	  && mergePoint(b,x0,x1) != nil
  1728	  && clobber(x0)
  1729	  && clobber(x1)
  1730	  && clobber(sh)
  1731	  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
  1732	
  1733	(OR                  x0:(MOVBZload [i0] {s} p mem)
  1734	    sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
  1735	  && p.Op != OpSB
  1736	  && i1 == i0+1
  1737	  && x0.Uses == 1
  1738	  && x1.Uses == 1
  1739	  && sh.Uses == 1
  1740	  && mergePoint(b,x0,x1) != nil
  1741	  && clobber(x0)
  1742	  && clobber(x1)
  1743	  && clobber(sh)
  1744	  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
  1745	
  1746	(ORW                  r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
  1747	    sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
  1748	  && i1 == i0+2
  1749	  && x0.Uses == 1
  1750	  && x1.Uses == 1
  1751	  && r0.Uses == 1
  1752	  && r1.Uses == 1
  1753	  && sh.Uses == 1
  1754	  && mergePoint(b,x0,x1) != nil
  1755	  && clobber(x0)
  1756	  && clobber(x1)
  1757	  && clobber(r0)
  1758	  && clobber(r1)
  1759	  && clobber(sh)
  1760	  -> @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
  1761	
  1762	(OR                   r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
  1763	    sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
  1764	  && i1 == i0+2
  1765	  && x0.Uses == 1
  1766	  && x1.Uses == 1
  1767	  && r0.Uses == 1
  1768	  && r1.Uses == 1
  1769	  && sh.Uses == 1
  1770	  && mergePoint(b,x0,x1) != nil
  1771	  && clobber(x0)
  1772	  && clobber(x1)
  1773	  && clobber(r0)
  1774	  && clobber(r1)
  1775	  && clobber(sh)
  1776	  -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
  1777	
  1778	(OR                   r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
  1779	    sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
  1780	  && i1 == i0+4
  1781	  && x0.Uses == 1
  1782	  && x1.Uses == 1
  1783	  && r0.Uses == 1
  1784	  && r1.Uses == 1
  1785	  && sh.Uses == 1
  1786	  && mergePoint(b,x0,x1) != nil
  1787	  && clobber(x0)
  1788	  && clobber(x1)
  1789	  && clobber(r0)
  1790	  && clobber(r1)
  1791	  && clobber(sh)
  1792	  -> @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
  1793	
  1794	(ORW
  1795	    s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
  1796	    or:(ORW
  1797	        s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
  1798		y))
  1799	  && p.Op != OpSB
  1800	  && i1 == i0+1
  1801	  && j1 == j0+8
  1802	  && j0 % 16 == 0
  1803	  && x0.Uses == 1
  1804	  && x1.Uses == 1
  1805	  && s0.Uses == 1
  1806	  && s1.Uses == 1
  1807	  && or.Uses == 1
  1808	  && mergePoint(b,x0,x1,y) != nil
  1809	  && clobber(x0)
  1810	  && clobber(x1)
  1811	  && clobber(s0)
  1812	  && clobber(s1)
  1813	  && clobber(or)
  1814	  -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
  1815	
  1816	(OR
  1817	    s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
  1818	    or:(OR
  1819	        s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
  1820		y))
  1821	  && p.Op != OpSB
  1822	  && i1 == i0+1
  1823	  && j1 == j0+8
  1824	  && j0 % 16 == 0
  1825	  && x0.Uses == 1
  1826	  && x1.Uses == 1
  1827	  && s0.Uses == 1
  1828	  && s1.Uses == 1
  1829	  && or.Uses == 1
  1830	  && mergePoint(b,x0,x1,y) != nil
  1831	  && clobber(x0)
  1832	  && clobber(x1)
  1833	  && clobber(s0)
  1834	  && clobber(s1)
  1835	  && clobber(or)
  1836	  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
  1837	
  1838	(OR
  1839	    s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
  1840	    or:(OR
  1841	        s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
  1842		y))
  1843	  && i1 == i0+2
  1844	  && j1 == j0+16
  1845	  && j0 % 32 == 0
  1846	  && x0.Uses == 1
  1847	  && x1.Uses == 1
  1848	  && r0.Uses == 1
  1849	  && r1.Uses == 1
  1850	  && s0.Uses == 1
  1851	  && s1.Uses == 1
  1852	  && or.Uses == 1
  1853	  && mergePoint(b,x0,x1,y) != nil
  1854	  && clobber(x0)
  1855	  && clobber(x1)
  1856	  && clobber(r0)
  1857	  && clobber(r1)
  1858	  && clobber(s0)
  1859	  && clobber(s1)
  1860	  && clobber(or)
  1861	  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
  1862	
  1863	// Little-endian indexed loads
  1864	
  1865	(ORW                 x0:(MOVBZloadidx [i0] {s} p idx mem)
  1866	    sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
  1867	  && p.Op != OpSB
  1868	  && i1 == i0+1
  1869	  && x0.Uses == 1
  1870	  && x1.Uses == 1
  1871	  && sh.Uses == 1
  1872	  && mergePoint(b,x0,x1) != nil
  1873	  && clobber(x0)
  1874	  && clobber(x1)
  1875	  && clobber(sh)
  1876	  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
  1877	
  1878	(OR                  x0:(MOVBZloadidx [i0] {s} p idx mem)
  1879	    sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
  1880	  && p.Op != OpSB
  1881	  && i1 == i0+1
  1882	  && x0.Uses == 1
  1883	  && x1.Uses == 1
  1884	  && sh.Uses == 1
  1885	  && mergePoint(b,x0,x1) != nil
  1886	  && clobber(x0)
  1887	  && clobber(x1)
  1888	  && clobber(sh)
  1889	  -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
  1890	
  1891	(ORW                  r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
  1892	    sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
  1893	  && i1 == i0+2
  1894	  && x0.Uses == 1
  1895	  && x1.Uses == 1
  1896	  && r0.Uses == 1
  1897	  && r1.Uses == 1
  1898	  && sh.Uses == 1
  1899	  && mergePoint(b,x0,x1) != nil
  1900	  && clobber(x0)
  1901	  && clobber(x1)
  1902	  && clobber(r0)
  1903	  && clobber(r1)
  1904	  && clobber(sh)
  1905	  -> @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
  1906	
  1907	(OR                   r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
  1908	    sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
  1909	  && i1 == i0+2
  1910	  && x0.Uses == 1
  1911	  && x1.Uses == 1
  1912	  && r0.Uses == 1
  1913	  && r1.Uses == 1
  1914	  && sh.Uses == 1
  1915	  && mergePoint(b,x0,x1) != nil
  1916	  && clobber(x0)
  1917	  && clobber(x1)
  1918	  && clobber(r0)
  1919	  && clobber(r1)
  1920	  && clobber(sh)
  1921	  -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
  1922	
  1923	(OR                   r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem))
  1924	    sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
  1925	  && i1 == i0+4
  1926	  && x0.Uses == 1
  1927	  && x1.Uses == 1
  1928	  && r0.Uses == 1
  1929	  && r1.Uses == 1
  1930	  && sh.Uses == 1
  1931	  && mergePoint(b,x0,x1) != nil
  1932	  && clobber(x0)
  1933	  && clobber(x1)
  1934	  && clobber(r0)
  1935	  && clobber(r1)
  1936	  && clobber(sh)
  1937	  -> @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
  1938	
  1939	(ORW
  1940	    s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
  1941	    or:(ORW
  1942	        s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
  1943		y))
  1944	  && p.Op != OpSB
  1945	  && i1 == i0+1
  1946	  && j1 == j0+8
  1947	  && j0 % 16 == 0
  1948	  && x0.Uses == 1
  1949	  && x1.Uses == 1
  1950	  && s0.Uses == 1
  1951	  && s1.Uses == 1
  1952	  && or.Uses == 1
  1953	  && mergePoint(b,x0,x1,y) != nil
  1954	  && clobber(x0)
  1955	  && clobber(x1)
  1956	  && clobber(s0)
  1957	  && clobber(s1)
  1958	  && clobber(or)
  1959	  -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
  1960	
  1961	(OR
  1962	    s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
  1963	    or:(OR
  1964	        s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
  1965		y))
  1966	  && p.Op != OpSB
  1967	  && i1 == i0+1
  1968	  && j1 == j0+8
  1969	  && j0 % 16 == 0
  1970	  && x0.Uses == 1
  1971	  && x1.Uses == 1
  1972	  && s0.Uses == 1
  1973	  && s1.Uses == 1
  1974	  && or.Uses == 1
  1975	  && mergePoint(b,x0,x1,y) != nil
  1976	  && clobber(x0)
  1977	  && clobber(x1)
  1978	  && clobber(s0)
  1979	  && clobber(s1)
  1980	  && clobber(or)
  1981	  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
  1982	
  1983	(OR
  1984	    s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))
  1985	    or:(OR
  1986	        s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
  1987		y))
  1988	  && i1 == i0+2
  1989	  && j1 == j0+16
  1990	  && j0 % 32 == 0
  1991	  && x0.Uses == 1
  1992	  && x1.Uses == 1
  1993	  && r0.Uses == 1
  1994	  && r1.Uses == 1
  1995	  && s0.Uses == 1
  1996	  && s1.Uses == 1
  1997	  && or.Uses == 1
  1998	  && mergePoint(b,x0,x1,y) != nil
  1999	  && clobber(x0)
  2000	  && clobber(x1)
  2001	  && clobber(r0)
  2002	  && clobber(r1)
  2003	  && clobber(s0)
  2004	  && clobber(s1)
  2005	  && clobber(or)
  2006	  -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
  2007	
  2008	// Combine stores into store multiples.
  2009	// 32-bit
  2010	(MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
  2011	  && p.Op != OpSB
  2012	  && x.Uses == 1
  2013	  && is20Bit(i-4)
  2014	  && clobber(x)
  2015	  -> (STM2 [i-4] {s} p w0 w1 mem)
  2016	(MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
  2017	  && x.Uses == 1
  2018	  && is20Bit(i-8)
  2019	  && clobber(x)
  2020	  -> (STM3 [i-8] {s} p w0 w1 w2 mem)
  2021	(MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
  2022	  && x.Uses == 1
  2023	  && is20Bit(i-12)
  2024	  && clobber(x)
  2025	  -> (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
  2026	(STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem))
  2027	  && x.Uses == 1
  2028	  && is20Bit(i-8)
  2029	  && clobber(x)
  2030	  -> (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
  2031	// 64-bit
  2032	(MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
  2033	  && p.Op != OpSB
  2034	  && x.Uses == 1
  2035	  && is20Bit(i-8)
  2036	  && clobber(x)
  2037	  -> (STMG2 [i-8] {s} p w0 w1 mem)
  2038	(MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
  2039	  && x.Uses == 1
  2040	  && is20Bit(i-16)
  2041	  && clobber(x)
  2042	  -> (STMG3 [i-16] {s} p w0 w1 w2 mem)
  2043	(MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
  2044	  && x.Uses == 1
  2045	  && is20Bit(i-24)
  2046	  && clobber(x)
  2047	  -> (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
  2048	(STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem))
  2049	  && x.Uses == 1
  2050	  && is20Bit(i-16)
  2051	  && clobber(x)
  2052	  -> (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
  2053	
  2054	// Convert 32-bit store multiples into 64-bit stores.
  2055	(STM2 [i] {s} p (SRDconst [32] x) x mem) -> (MOVDstore [i] {s} p x mem)

View as plain text