...

Source file src/pkg/cmd/compile/internal/amd64/ssa.go

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package amd64
     6	
     7	import (
     8		"fmt"
     9		"math"
    10	
    11		"cmd/compile/internal/gc"
    12		"cmd/compile/internal/ssa"
    13		"cmd/compile/internal/types"
    14		"cmd/internal/obj"
    15		"cmd/internal/obj/x86"
    16	)
    17	
    18	// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19	func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20		flive := b.FlagsLiveAtEnd
    21		if b.Control != nil && b.Control.Type.IsFlags() {
    22			flive = true
    23		}
    24		for i := len(b.Values) - 1; i >= 0; i-- {
    25			v := b.Values[i]
    26			if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    27				// The "mark" is any non-nil Aux value.
    28				v.Aux = v
    29			}
    30			if v.Type.IsFlags() {
    31				flive = false
    32			}
    33			for _, a := range v.Args {
    34				if a.Type.IsFlags() {
    35					flive = true
    36				}
    37			}
    38		}
    39	}
    40	
    41	// loadByType returns the load instruction of the given type.
    42	func loadByType(t *types.Type) obj.As {
    43		// Avoid partial register write
    44		if !t.IsFloat() && t.Size() <= 2 {
    45			if t.Size() == 1 {
    46				return x86.AMOVBLZX
    47			} else {
    48				return x86.AMOVWLZX
    49			}
    50		}
    51		// Otherwise, there's no difference between load and store opcodes.
    52		return storeByType(t)
    53	}
    54	
    55	// storeByType returns the store instruction of the given type.
    56	func storeByType(t *types.Type) obj.As {
    57		width := t.Size()
    58		if t.IsFloat() {
    59			switch width {
    60			case 4:
    61				return x86.AMOVSS
    62			case 8:
    63				return x86.AMOVSD
    64			}
    65		} else {
    66			switch width {
    67			case 1:
    68				return x86.AMOVB
    69			case 2:
    70				return x86.AMOVW
    71			case 4:
    72				return x86.AMOVL
    73			case 8:
    74				return x86.AMOVQ
    75			}
    76		}
    77		panic("bad store type")
    78	}
    79	
    80	// moveByType returns the reg->reg move instruction of the given type.
    81	func moveByType(t *types.Type) obj.As {
    82		if t.IsFloat() {
    83			// Moving the whole sse2 register is faster
    84			// than moving just the correct low portion of it.
    85			// There is no xmm->xmm move with 1 byte opcode,
    86			// so use movups, which has 2 byte opcode.
    87			return x86.AMOVUPS
    88		} else {
    89			switch t.Size() {
    90			case 1:
    91				// Avoids partial register write
    92				return x86.AMOVL
    93			case 2:
    94				return x86.AMOVL
    95			case 4:
    96				return x86.AMOVL
    97			case 8:
    98				return x86.AMOVQ
    99			case 16:
   100				return x86.AMOVUPS // int128s are in SSE registers
   101			default:
   102				panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   103			}
   104		}
   105	}
   106	
   107	// opregreg emits instructions for
   108	//     dest := dest(To) op src(From)
   109	// and also returns the created obj.Prog so it
   110	// may be further adjusted (offset, scale, etc).
   111	func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   112		p := s.Prog(op)
   113		p.From.Type = obj.TYPE_REG
   114		p.To.Type = obj.TYPE_REG
   115		p.To.Reg = dest
   116		p.From.Reg = src
   117		return p
   118	}
   119	
   120	// memIdx fills out a as an indexed memory reference for v.
   121	// It assumes that the base register and the index register
   122	// are v.Args[0].Reg() and v.Args[1].Reg(), respectively.
   123	// The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary.
   124	func memIdx(a *obj.Addr, v *ssa.Value) {
   125		r, i := v.Args[0].Reg(), v.Args[1].Reg()
   126		a.Type = obj.TYPE_MEM
   127		a.Scale = v.Op.Scale()
   128		if a.Scale == 1 && i == x86.REG_SP {
   129			r, i = i, r
   130		}
   131		a.Reg = r
   132		a.Index = i
   133	}
   134	
   135	// DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
   136	// See runtime/mkduff.go.
   137	func duffStart(size int64) int64 {
   138		x, _ := duff(size)
   139		return x
   140	}
   141	func duffAdj(size int64) int64 {
   142		_, x := duff(size)
   143		return x
   144	}
   145	
   146	// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   147	// required to use the duffzero mechanism for a block of the given size.
   148	func duff(size int64) (int64, int64) {
   149		if size < 32 || size > 1024 || size%dzClearStep != 0 {
   150			panic("bad duffzero size")
   151		}
   152		steps := size / dzClearStep
   153		blocks := steps / dzBlockLen
   154		steps %= dzBlockLen
   155		off := dzBlockSize * (dzBlocks - blocks)
   156		var adj int64
   157		if steps != 0 {
   158			off -= dzLeaqSize
   159			off -= dzMovSize * steps
   160			adj -= dzClearStep * (dzBlockLen - steps)
   161		}
   162		return off, adj
   163	}
   164	
   165	func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   166		switch v.Op {
   167		case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   168			r := v.Reg()
   169			r1 := v.Args[0].Reg()
   170			r2 := v.Args[1].Reg()
   171			switch {
   172			case r == r1:
   173				p := s.Prog(v.Op.Asm())
   174				p.From.Type = obj.TYPE_REG
   175				p.From.Reg = r2
   176				p.To.Type = obj.TYPE_REG
   177				p.To.Reg = r
   178			case r == r2:
   179				p := s.Prog(v.Op.Asm())
   180				p.From.Type = obj.TYPE_REG
   181				p.From.Reg = r1
   182				p.To.Type = obj.TYPE_REG
   183				p.To.Reg = r
   184			default:
   185				var asm obj.As
   186				if v.Op == ssa.OpAMD64ADDQ {
   187					asm = x86.ALEAQ
   188				} else {
   189					asm = x86.ALEAL
   190				}
   191				p := s.Prog(asm)
   192				p.From.Type = obj.TYPE_MEM
   193				p.From.Reg = r1
   194				p.From.Scale = 1
   195				p.From.Index = r2
   196				p.To.Type = obj.TYPE_REG
   197				p.To.Reg = r
   198			}
   199		// 2-address opcode arithmetic
   200		case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   201			ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   202			ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   203			ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   204			ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   205			ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   206			ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   207			ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   208			ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
   209			ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
   210			ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   211			ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   212			ssa.OpAMD64PXOR,
   213			ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
   214			ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
   215			ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
   216			r := v.Reg()
   217			if r != v.Args[0].Reg() {
   218				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   219			}
   220			opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   221	
   222		case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   223			// Arg[0] (the dividend) is in AX.
   224			// Arg[1] (the divisor) can be in any other register.
   225			// Result[0] (the quotient) is in AX.
   226			// Result[1] (the remainder) is in DX.
   227			r := v.Args[1].Reg()
   228	
   229			// Zero extend dividend.
   230			c := s.Prog(x86.AXORL)
   231			c.From.Type = obj.TYPE_REG
   232			c.From.Reg = x86.REG_DX
   233			c.To.Type = obj.TYPE_REG
   234			c.To.Reg = x86.REG_DX
   235	
   236			// Issue divide.
   237			p := s.Prog(v.Op.Asm())
   238			p.From.Type = obj.TYPE_REG
   239			p.From.Reg = r
   240	
   241		case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   242			// Arg[0] (the dividend) is in AX.
   243			// Arg[1] (the divisor) can be in any other register.
   244			// Result[0] (the quotient) is in AX.
   245			// Result[1] (the remainder) is in DX.
   246			r := v.Args[1].Reg()
   247			var j1 *obj.Prog
   248	
   249			// CPU faults upon signed overflow, which occurs when the most
   250			// negative int is divided by -1. Handle divide by -1 as a special case.
   251			if ssa.NeedsFixUp(v) {
   252				var c *obj.Prog
   253				switch v.Op {
   254				case ssa.OpAMD64DIVQ:
   255					c = s.Prog(x86.ACMPQ)
   256				case ssa.OpAMD64DIVL:
   257					c = s.Prog(x86.ACMPL)
   258				case ssa.OpAMD64DIVW:
   259					c = s.Prog(x86.ACMPW)
   260				}
   261				c.From.Type = obj.TYPE_REG
   262				c.From.Reg = r
   263				c.To.Type = obj.TYPE_CONST
   264				c.To.Offset = -1
   265				j1 = s.Prog(x86.AJEQ)
   266				j1.To.Type = obj.TYPE_BRANCH
   267			}
   268	
   269			// Sign extend dividend.
   270			switch v.Op {
   271			case ssa.OpAMD64DIVQ:
   272				s.Prog(x86.ACQO)
   273			case ssa.OpAMD64DIVL:
   274				s.Prog(x86.ACDQ)
   275			case ssa.OpAMD64DIVW:
   276				s.Prog(x86.ACWD)
   277			}
   278	
   279			// Issue divide.
   280			p := s.Prog(v.Op.Asm())
   281			p.From.Type = obj.TYPE_REG
   282			p.From.Reg = r
   283	
   284			if j1 != nil {
   285				// Skip over -1 fixup code.
   286				j2 := s.Prog(obj.AJMP)
   287				j2.To.Type = obj.TYPE_BRANCH
   288	
   289				// Issue -1 fixup code.
   290				// n / -1 = -n
   291				var n1 *obj.Prog
   292				switch v.Op {
   293				case ssa.OpAMD64DIVQ:
   294					n1 = s.Prog(x86.ANEGQ)
   295				case ssa.OpAMD64DIVL:
   296					n1 = s.Prog(x86.ANEGL)
   297				case ssa.OpAMD64DIVW:
   298					n1 = s.Prog(x86.ANEGW)
   299				}
   300				n1.To.Type = obj.TYPE_REG
   301				n1.To.Reg = x86.REG_AX
   302	
   303				// n % -1 == 0
   304				n2 := s.Prog(x86.AXORL)
   305				n2.From.Type = obj.TYPE_REG
   306				n2.From.Reg = x86.REG_DX
   307				n2.To.Type = obj.TYPE_REG
   308				n2.To.Reg = x86.REG_DX
   309	
   310				// TODO(khr): issue only the -1 fixup code we need.
   311				// For instance, if only the quotient is used, no point in zeroing the remainder.
   312	
   313				j1.To.Val = n1
   314				j2.To.Val = s.Pc()
   315			}
   316	
   317		case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   318			// the frontend rewrites constant division by 8/16/32 bit integers into
   319			// HMUL by a constant
   320			// SSA rewrites generate the 64 bit versions
   321	
   322			// Arg[0] is already in AX as it's the only register we allow
   323			// and DX is the only output we care about (the high bits)
   324			p := s.Prog(v.Op.Asm())
   325			p.From.Type = obj.TYPE_REG
   326			p.From.Reg = v.Args[1].Reg()
   327	
   328			// IMULB puts the high portion in AH instead of DL,
   329			// so move it to DL for consistency
   330			if v.Type.Size() == 1 {
   331				m := s.Prog(x86.AMOVB)
   332				m.From.Type = obj.TYPE_REG
   333				m.From.Reg = x86.REG_AH
   334				m.To.Type = obj.TYPE_REG
   335				m.To.Reg = x86.REG_DX
   336			}
   337	
   338		case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
   339			// Arg[0] is already in AX as it's the only register we allow
   340			// results lo in AX
   341			p := s.Prog(v.Op.Asm())
   342			p.From.Type = obj.TYPE_REG
   343			p.From.Reg = v.Args[1].Reg()
   344	
   345		case ssa.OpAMD64MULQU2:
   346			// Arg[0] is already in AX as it's the only register we allow
   347			// results hi in DX, lo in AX
   348			p := s.Prog(v.Op.Asm())
   349			p.From.Type = obj.TYPE_REG
   350			p.From.Reg = v.Args[1].Reg()
   351	
   352		case ssa.OpAMD64DIVQU2:
   353			// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   354			// results q in AX, r in DX
   355			p := s.Prog(v.Op.Asm())
   356			p.From.Type = obj.TYPE_REG
   357			p.From.Reg = v.Args[2].Reg()
   358	
   359		case ssa.OpAMD64AVGQU:
   360			// compute (x+y)/2 unsigned.
   361			// Do a 64-bit add, the overflow goes into the carry.
   362			// Shift right once and pull the carry back into the 63rd bit.
   363			r := v.Reg()
   364			if r != v.Args[0].Reg() {
   365				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   366			}
   367			p := s.Prog(x86.AADDQ)
   368			p.From.Type = obj.TYPE_REG
   369			p.To.Type = obj.TYPE_REG
   370			p.To.Reg = r
   371			p.From.Reg = v.Args[1].Reg()
   372			p = s.Prog(x86.ARCRQ)
   373			p.From.Type = obj.TYPE_CONST
   374			p.From.Offset = 1
   375			p.To.Type = obj.TYPE_REG
   376			p.To.Reg = r
   377	
   378		case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
   379			r := v.Reg0()
   380			r0 := v.Args[0].Reg()
   381			r1 := v.Args[1].Reg()
   382			switch r {
   383			case r0:
   384				p := s.Prog(v.Op.Asm())
   385				p.From.Type = obj.TYPE_REG
   386				p.From.Reg = r1
   387				p.To.Type = obj.TYPE_REG
   388				p.To.Reg = r
   389			case r1:
   390				p := s.Prog(v.Op.Asm())
   391				p.From.Type = obj.TYPE_REG
   392				p.From.Reg = r0
   393				p.To.Type = obj.TYPE_REG
   394				p.To.Reg = r
   395			default:
   396				v.Fatalf("output not in same register as an input %s", v.LongString())
   397			}
   398	
   399		case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
   400			p := s.Prog(v.Op.Asm())
   401			p.From.Type = obj.TYPE_REG
   402			p.From.Reg = v.Args[1].Reg()
   403			p.To.Type = obj.TYPE_REG
   404			p.To.Reg = v.Reg0()
   405	
   406		case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
   407			p := s.Prog(v.Op.Asm())
   408			p.From.Type = obj.TYPE_CONST
   409			p.From.Offset = v.AuxInt
   410			p.To.Type = obj.TYPE_REG
   411			p.To.Reg = v.Reg0()
   412	
   413		case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   414			r := v.Reg()
   415			a := v.Args[0].Reg()
   416			if r == a {
   417				switch v.AuxInt {
   418				case 1:
   419					var asm obj.As
   420					// Software optimization manual recommends add $1,reg.
   421					// But inc/dec is 1 byte smaller. ICC always uses inc
   422					// Clang/GCC choose depending on flags, but prefer add.
   423					// Experiments show that inc/dec is both a little faster
   424					// and make a binary a little smaller.
   425					if v.Op == ssa.OpAMD64ADDQconst {
   426						asm = x86.AINCQ
   427					} else {
   428						asm = x86.AINCL
   429					}
   430					p := s.Prog(asm)
   431					p.To.Type = obj.TYPE_REG
   432					p.To.Reg = r
   433					return
   434				case -1:
   435					var asm obj.As
   436					if v.Op == ssa.OpAMD64ADDQconst {
   437						asm = x86.ADECQ
   438					} else {
   439						asm = x86.ADECL
   440					}
   441					p := s.Prog(asm)
   442					p.To.Type = obj.TYPE_REG
   443					p.To.Reg = r
   444					return
   445				case 0x80:
   446					// 'SUBQ $-0x80, r' is shorter to encode than
   447					// and functionally equivalent to 'ADDQ $0x80, r'.
   448					asm := x86.ASUBL
   449					if v.Op == ssa.OpAMD64ADDQconst {
   450						asm = x86.ASUBQ
   451					}
   452					p := s.Prog(asm)
   453					p.From.Type = obj.TYPE_CONST
   454					p.From.Offset = -0x80
   455					p.To.Type = obj.TYPE_REG
   456					p.To.Reg = r
   457					return
   458	
   459				}
   460				p := s.Prog(v.Op.Asm())
   461				p.From.Type = obj.TYPE_CONST
   462				p.From.Offset = v.AuxInt
   463				p.To.Type = obj.TYPE_REG
   464				p.To.Reg = r
   465				return
   466			}
   467			var asm obj.As
   468			if v.Op == ssa.OpAMD64ADDQconst {
   469				asm = x86.ALEAQ
   470			} else {
   471				asm = x86.ALEAL
   472			}
   473			p := s.Prog(asm)
   474			p.From.Type = obj.TYPE_MEM
   475			p.From.Reg = a
   476			p.From.Offset = v.AuxInt
   477			p.To.Type = obj.TYPE_REG
   478			p.To.Reg = r
   479	
   480		case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
   481			ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
   482			ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
   483			ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
   484			ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
   485			ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
   486			ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
   487			ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
   488			ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
   489			ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
   490			ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
   491			ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
   492			r := v.Reg()
   493			if r != v.Args[0].Reg() {
   494				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   495			}
   496			p := s.Prog(v.Op.Asm())
   497			p.From.Type = obj.TYPE_REG
   498			p.From.Reg = v.Args[1].Reg()
   499			p.To.Type = obj.TYPE_REG
   500			p.To.Reg = r
   501	
   502		case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
   503			r := v.Reg()
   504			if r != v.Args[0].Reg() {
   505				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   506			}
   507			// Flag condition: ^ZERO || PARITY
   508			// Generate:
   509			//   CMOV*NE  SRC,DST
   510			//   CMOV*PS  SRC,DST
   511			p := s.Prog(v.Op.Asm())
   512			p.From.Type = obj.TYPE_REG
   513			p.From.Reg = v.Args[1].Reg()
   514			p.To.Type = obj.TYPE_REG
   515			p.To.Reg = r
   516			var q *obj.Prog
   517			if v.Op == ssa.OpAMD64CMOVQNEF {
   518				q = s.Prog(x86.ACMOVQPS)
   519			} else if v.Op == ssa.OpAMD64CMOVLNEF {
   520				q = s.Prog(x86.ACMOVLPS)
   521			} else {
   522				q = s.Prog(x86.ACMOVWPS)
   523			}
   524			q.From.Type = obj.TYPE_REG
   525			q.From.Reg = v.Args[1].Reg()
   526			q.To.Type = obj.TYPE_REG
   527			q.To.Reg = r
   528	
   529		case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
   530			r := v.Reg()
   531			if r != v.Args[0].Reg() {
   532				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   533			}
   534	
   535			// Flag condition: ZERO && !PARITY
   536			// Generate:
   537			//   MOV      SRC,AX
   538			//   CMOV*NE  DST,AX
   539			//   CMOV*PC  AX,DST
   540			//
   541			// TODO(rasky): we could generate:
   542			//   CMOV*NE  DST,SRC
   543			//   CMOV*PC  SRC,DST
   544			// But this requires a way for regalloc to know that SRC might be
   545			// clobbered by this instruction.
   546			if v.Args[1].Reg() != x86.REG_AX {
   547				opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
   548			}
   549			p := s.Prog(v.Op.Asm())
   550			p.From.Type = obj.TYPE_REG
   551			p.From.Reg = r
   552			p.To.Type = obj.TYPE_REG
   553			p.To.Reg = x86.REG_AX
   554			var q *obj.Prog
   555			if v.Op == ssa.OpAMD64CMOVQEQF {
   556				q = s.Prog(x86.ACMOVQPC)
   557			} else if v.Op == ssa.OpAMD64CMOVLEQF {
   558				q = s.Prog(x86.ACMOVLPC)
   559			} else {
   560				q = s.Prog(x86.ACMOVWPC)
   561			}
   562			q.From.Type = obj.TYPE_REG
   563			q.From.Reg = x86.REG_AX
   564			q.To.Type = obj.TYPE_REG
   565			q.To.Reg = r
   566	
   567		case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   568			r := v.Reg()
   569			p := s.Prog(v.Op.Asm())
   570			p.From.Type = obj.TYPE_CONST
   571			p.From.Offset = v.AuxInt
   572			p.To.Type = obj.TYPE_REG
   573			p.To.Reg = r
   574			p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
   575	
   576		case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   577			ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
   578			ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   579			ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   580			ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   581			ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   582			ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   583			ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   584			r := v.Reg()
   585			if r != v.Args[0].Reg() {
   586				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   587			}
   588			p := s.Prog(v.Op.Asm())
   589			p.From.Type = obj.TYPE_CONST
   590			p.From.Offset = v.AuxInt
   591			p.To.Type = obj.TYPE_REG
   592			p.To.Reg = r
   593		case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   594			r := v.Reg()
   595			p := s.Prog(v.Op.Asm())
   596			p.From.Type = obj.TYPE_REG
   597			p.From.Reg = r
   598			p.To.Type = obj.TYPE_REG
   599			p.To.Reg = r
   600		case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
   601			ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
   602			ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   603			p := s.Prog(v.Op.Asm())
   604			memIdx(&p.From, v)
   605			o := v.Reg()
   606			p.To.Type = obj.TYPE_REG
   607			p.To.Reg = o
   608			if v.AuxInt != 0 && v.Aux == nil {
   609				// Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA.
   610				switch v.Op {
   611				case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   612					p = s.Prog(x86.ALEAQ)
   613				case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
   614					p = s.Prog(x86.ALEAL)
   615				case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   616					p = s.Prog(x86.ALEAW)
   617				}
   618				p.From.Type = obj.TYPE_MEM
   619				p.From.Reg = o
   620				p.To.Type = obj.TYPE_REG
   621				p.To.Reg = o
   622			}
   623			gc.AddAux(&p.From, v)
   624		case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
   625			p := s.Prog(v.Op.Asm())
   626			p.From.Type = obj.TYPE_MEM
   627			p.From.Reg = v.Args[0].Reg()
   628			gc.AddAux(&p.From, v)
   629			p.To.Type = obj.TYPE_REG
   630			p.To.Reg = v.Reg()
   631		case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   632			ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   633			ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   634			opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   635		case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   636			// Go assembler has swapped operands for UCOMISx relative to CMP,
   637			// must account for that right here.
   638			opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   639		case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   640			p := s.Prog(v.Op.Asm())
   641			p.From.Type = obj.TYPE_REG
   642			p.From.Reg = v.Args[0].Reg()
   643			p.To.Type = obj.TYPE_CONST
   644			p.To.Offset = v.AuxInt
   645		case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
   646			ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   647			ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
   648			ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
   649			ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
   650			op := v.Op
   651			if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
   652				// Emit 32-bit version because it's shorter
   653				op = ssa.OpAMD64BTLconst
   654			}
   655			p := s.Prog(op.Asm())
   656			p.From.Type = obj.TYPE_CONST
   657			p.From.Offset = v.AuxInt
   658			p.To.Type = obj.TYPE_REG
   659			p.To.Reg = v.Args[0].Reg()
   660		case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
   661			p := s.Prog(v.Op.Asm())
   662			p.From.Type = obj.TYPE_MEM
   663			p.From.Reg = v.Args[0].Reg()
   664			gc.AddAux(&p.From, v)
   665			p.To.Type = obj.TYPE_REG
   666			p.To.Reg = v.Args[1].Reg()
   667		case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
   668			sc := v.AuxValAndOff()
   669			p := s.Prog(v.Op.Asm())
   670			p.From.Type = obj.TYPE_MEM
   671			p.From.Reg = v.Args[0].Reg()
   672			gc.AddAux2(&p.From, v, sc.Off())
   673			p.To.Type = obj.TYPE_CONST
   674			p.To.Offset = sc.Val()
   675		case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   676			x := v.Reg()
   677	
   678			// If flags aren't live (indicated by v.Aux == nil),
   679			// then we can rewrite MOV $0, AX into XOR AX, AX.
   680			if v.AuxInt == 0 && v.Aux == nil {
   681				p := s.Prog(x86.AXORL)
   682				p.From.Type = obj.TYPE_REG
   683				p.From.Reg = x
   684				p.To.Type = obj.TYPE_REG
   685				p.To.Reg = x
   686				break
   687			}
   688	
   689			asm := v.Op.Asm()
   690			// Use MOVL to move a small constant into a register
   691			// when the constant is positive and fits into 32 bits.
   692			if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   693				// The upper 32bit are zeroed automatically when using MOVL.
   694				asm = x86.AMOVL
   695			}
   696			p := s.Prog(asm)
   697			p.From.Type = obj.TYPE_CONST
   698			p.From.Offset = v.AuxInt
   699			p.To.Type = obj.TYPE_REG
   700			p.To.Reg = x
   701		case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   702			x := v.Reg()
   703			p := s.Prog(v.Op.Asm())
   704			p.From.Type = obj.TYPE_FCONST
   705			p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   706			p.To.Type = obj.TYPE_REG
   707			p.To.Reg = x
   708		case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
   709			p := s.Prog(v.Op.Asm())
   710			p.From.Type = obj.TYPE_MEM
   711			p.From.Reg = v.Args[0].Reg()
   712			gc.AddAux(&p.From, v)
   713			p.To.Type = obj.TYPE_REG
   714			p.To.Reg = v.Reg()
   715		case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
   716			ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2:
   717			p := s.Prog(v.Op.Asm())
   718			memIdx(&p.From, v)
   719			gc.AddAux(&p.From, v)
   720			p.To.Type = obj.TYPE_REG
   721			p.To.Reg = v.Reg()
   722		case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
   723			ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify,
   724			ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
   725			ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
   726			p := s.Prog(v.Op.Asm())
   727			p.From.Type = obj.TYPE_REG
   728			p.From.Reg = v.Args[1].Reg()
   729			p.To.Type = obj.TYPE_MEM
   730			p.To.Reg = v.Args[0].Reg()
   731			gc.AddAux(&p.To, v)
   732		case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
   733			ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2:
   734			p := s.Prog(v.Op.Asm())
   735			p.From.Type = obj.TYPE_REG
   736			p.From.Reg = v.Args[2].Reg()
   737			memIdx(&p.To, v)
   738			gc.AddAux(&p.To, v)
   739		case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
   740			sc := v.AuxValAndOff()
   741			off := sc.Off()
   742			val := sc.Val()
   743			if val == 1 || val == -1 {
   744				var asm obj.As
   745				if v.Op == ssa.OpAMD64ADDQconstmodify {
   746					if val == 1 {
   747						asm = x86.AINCQ
   748					} else {
   749						asm = x86.ADECQ
   750					}
   751				} else {
   752					if val == 1 {
   753						asm = x86.AINCL
   754					} else {
   755						asm = x86.ADECL
   756					}
   757				}
   758				p := s.Prog(asm)
   759				p.To.Type = obj.TYPE_MEM
   760				p.To.Reg = v.Args[0].Reg()
   761				gc.AddAux2(&p.To, v, off)
   762				break
   763			}
   764			fallthrough
   765		case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
   766			ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify,
   767			ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
   768			sc := v.AuxValAndOff()
   769			off := sc.Off()
   770			val := sc.Val()
   771			p := s.Prog(v.Op.Asm())
   772			p.From.Type = obj.TYPE_CONST
   773			p.From.Offset = val
   774			p.To.Type = obj.TYPE_MEM
   775			p.To.Reg = v.Args[0].Reg()
   776			gc.AddAux2(&p.To, v, off)
   777		case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   778			p := s.Prog(v.Op.Asm())
   779			p.From.Type = obj.TYPE_CONST
   780			sc := v.AuxValAndOff()
   781			p.From.Offset = sc.Val()
   782			p.To.Type = obj.TYPE_MEM
   783			p.To.Reg = v.Args[0].Reg()
   784			gc.AddAux2(&p.To, v, sc.Off())
   785		case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
   786			p := s.Prog(v.Op.Asm())
   787			p.From.Type = obj.TYPE_CONST
   788			sc := v.AuxValAndOff()
   789			p.From.Offset = sc.Val()
   790			memIdx(&p.To, v)
   791			gc.AddAux2(&p.To, v, sc.Off())
   792		case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   793			ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   794			ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   795			opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   796		case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   797			r := v.Reg()
   798			// Break false dependency on destination register.
   799			opregreg(s, x86.AXORPS, r, r)
   800			opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   801		case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   802			var p *obj.Prog
   803			switch v.Op {
   804			case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
   805				p = s.Prog(x86.AMOVQ)
   806			case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   807				p = s.Prog(x86.AMOVL)
   808			}
   809			p.From.Type = obj.TYPE_REG
   810			p.From.Reg = v.Args[0].Reg()
   811			p.To.Type = obj.TYPE_REG
   812			p.To.Reg = v.Reg()
   813		case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
   814			ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
   815			ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
   816			ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
   817			ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
   818			p := s.Prog(v.Op.Asm())
   819			p.From.Type = obj.TYPE_MEM
   820			p.From.Reg = v.Args[1].Reg()
   821			gc.AddAux(&p.From, v)
   822			p.To.Type = obj.TYPE_REG
   823			p.To.Reg = v.Reg()
   824			if v.Reg() != v.Args[0].Reg() {
   825				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   826			}
   827		case ssa.OpAMD64DUFFZERO:
   828			off := duffStart(v.AuxInt)
   829			adj := duffAdj(v.AuxInt)
   830			var p *obj.Prog
   831			if adj != 0 {
   832				p = s.Prog(x86.ALEAQ)
   833				p.From.Type = obj.TYPE_MEM
   834				p.From.Offset = adj
   835				p.From.Reg = x86.REG_DI
   836				p.To.Type = obj.TYPE_REG
   837				p.To.Reg = x86.REG_DI
   838			}
   839			p = s.Prog(obj.ADUFFZERO)
   840			p.To.Type = obj.TYPE_ADDR
   841			p.To.Sym = gc.Duffzero
   842			p.To.Offset = off
   843		case ssa.OpAMD64MOVOconst:
   844			if v.AuxInt != 0 {
   845				v.Fatalf("MOVOconst can only do constant=0")
   846			}
   847			r := v.Reg()
   848			opregreg(s, x86.AXORPS, r, r)
   849		case ssa.OpAMD64DUFFCOPY:
   850			p := s.Prog(obj.ADUFFCOPY)
   851			p.To.Type = obj.TYPE_ADDR
   852			p.To.Sym = gc.Duffcopy
   853			p.To.Offset = v.AuxInt
   854	
   855		case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
   856			if v.Type.IsMemory() {
   857				return
   858			}
   859			x := v.Args[0].Reg()
   860			y := v.Reg()
   861			if x != y {
   862				opregreg(s, moveByType(v.Type), y, x)
   863			}
   864		case ssa.OpLoadReg:
   865			if v.Type.IsFlags() {
   866				v.Fatalf("load flags not implemented: %v", v.LongString())
   867				return
   868			}
   869			p := s.Prog(loadByType(v.Type))
   870			gc.AddrAuto(&p.From, v.Args[0])
   871			p.To.Type = obj.TYPE_REG
   872			p.To.Reg = v.Reg()
   873	
   874		case ssa.OpStoreReg:
   875			if v.Type.IsFlags() {
   876				v.Fatalf("store flags not implemented: %v", v.LongString())
   877				return
   878			}
   879			p := s.Prog(storeByType(v.Type))
   880			p.From.Type = obj.TYPE_REG
   881			p.From.Reg = v.Args[0].Reg()
   882			gc.AddrAuto(&p.To, v)
   883		case ssa.OpAMD64LoweredGetClosurePtr:
   884			// Closure pointer is DX.
   885			gc.CheckLoweredGetClosurePtr(v)
   886		case ssa.OpAMD64LoweredGetG:
   887			r := v.Reg()
   888			// See the comments in cmd/internal/obj/x86/obj6.go
   889			// near CanUse1InsnTLS for a detailed explanation of these instructions.
   890			if x86.CanUse1InsnTLS(gc.Ctxt) {
   891				// MOVQ (TLS), r
   892				p := s.Prog(x86.AMOVQ)
   893				p.From.Type = obj.TYPE_MEM
   894				p.From.Reg = x86.REG_TLS
   895				p.To.Type = obj.TYPE_REG
   896				p.To.Reg = r
   897			} else {
   898				// MOVQ TLS, r
   899				// MOVQ (r)(TLS*1), r
   900				p := s.Prog(x86.AMOVQ)
   901				p.From.Type = obj.TYPE_REG
   902				p.From.Reg = x86.REG_TLS
   903				p.To.Type = obj.TYPE_REG
   904				p.To.Reg = r
   905				q := s.Prog(x86.AMOVQ)
   906				q.From.Type = obj.TYPE_MEM
   907				q.From.Reg = r
   908				q.From.Index = x86.REG_TLS
   909				q.From.Scale = 1
   910				q.To.Type = obj.TYPE_REG
   911				q.To.Reg = r
   912			}
   913		case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
   914			s.Call(v)
   915	
   916		case ssa.OpAMD64LoweredGetCallerPC:
   917			p := s.Prog(x86.AMOVQ)
   918			p.From.Type = obj.TYPE_MEM
   919			p.From.Offset = -8 // PC is stored 8 bytes below first parameter.
   920			p.From.Name = obj.NAME_PARAM
   921			p.To.Type = obj.TYPE_REG
   922			p.To.Reg = v.Reg()
   923	
   924		case ssa.OpAMD64LoweredGetCallerSP:
   925			// caller's SP is the address of the first arg
   926			mov := x86.AMOVQ
   927			if gc.Widthptr == 4 {
   928				mov = x86.AMOVL
   929			}
   930			p := s.Prog(mov)
   931			p.From.Type = obj.TYPE_ADDR
   932			p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on amd64, just to be consistent with other architectures
   933			p.From.Name = obj.NAME_PARAM
   934			p.To.Type = obj.TYPE_REG
   935			p.To.Reg = v.Reg()
   936	
   937		case ssa.OpAMD64LoweredWB:
   938			p := s.Prog(obj.ACALL)
   939			p.To.Type = obj.TYPE_MEM
   940			p.To.Name = obj.NAME_EXTERN
   941			p.To.Sym = v.Aux.(*obj.LSym)
   942	
   943		case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC:
   944			p := s.Prog(obj.ACALL)
   945			p.To.Type = obj.TYPE_MEM
   946			p.To.Name = obj.NAME_EXTERN
   947			p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
   948			s.UseArgs(int64(2 * gc.Widthptr)) // space used in callee args area by assembly stubs
   949	
   950		case ssa.OpAMD64LoweredPanicExtendA, ssa.OpAMD64LoweredPanicExtendB, ssa.OpAMD64LoweredPanicExtendC:
   951			p := s.Prog(obj.ACALL)
   952			p.To.Type = obj.TYPE_MEM
   953			p.To.Name = obj.NAME_EXTERN
   954			p.To.Sym = gc.ExtendCheckFunc[v.AuxInt]
   955			s.UseArgs(int64(3 * gc.Widthptr)) // space used in callee args area by assembly stubs
   956	
   957		case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
   958			ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
   959			ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
   960			r := v.Reg()
   961			if r != v.Args[0].Reg() {
   962				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   963			}
   964			p := s.Prog(v.Op.Asm())
   965			p.To.Type = obj.TYPE_REG
   966			p.To.Reg = r
   967	
   968		case ssa.OpAMD64NEGLflags:
   969			r := v.Reg0()
   970			if r != v.Args[0].Reg() {
   971				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   972			}
   973			p := s.Prog(v.Op.Asm())
   974			p.To.Type = obj.TYPE_REG
   975			p.To.Reg = r
   976	
   977		case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD:
   978			p := s.Prog(v.Op.Asm())
   979			p.From.Type = obj.TYPE_REG
   980			p.From.Reg = v.Args[0].Reg()
   981			p.To.Type = obj.TYPE_REG
   982			switch v.Op {
   983			case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
   984				p.To.Reg = v.Reg0()
   985			case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD:
   986				p.To.Reg = v.Reg()
   987			}
   988		case ssa.OpAMD64ROUNDSD:
   989			p := s.Prog(v.Op.Asm())
   990			val := v.AuxInt
   991			// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
   992			if val != 0 && val != 1 && val != 2 && val != 3 {
   993				v.Fatalf("Invalid rounding mode")
   994			}
   995			p.From.Offset = val
   996			p.From.Type = obj.TYPE_CONST
   997			p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
   998			p.To.Type = obj.TYPE_REG
   999			p.To.Reg = v.Reg()
  1000		case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
  1001			if v.Args[0].Reg() != v.Reg() {
  1002				// POPCNT on Intel has a false dependency on the destination register.
  1003				// Xor register with itself to break the dependency.
  1004				p := s.Prog(x86.AXORQ)
  1005				p.From.Type = obj.TYPE_REG
  1006				p.From.Reg = v.Reg()
  1007				p.To.Type = obj.TYPE_REG
  1008				p.To.Reg = v.Reg()
  1009			}
  1010			p := s.Prog(v.Op.Asm())
  1011			p.From.Type = obj.TYPE_REG
  1012			p.From.Reg = v.Args[0].Reg()
  1013			p.To.Type = obj.TYPE_REG
  1014			p.To.Reg = v.Reg()
  1015	
  1016		case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
  1017			ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
  1018			ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
  1019			ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
  1020			ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
  1021			ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
  1022			ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
  1023			ssa.OpAMD64SETO:
  1024			p := s.Prog(v.Op.Asm())
  1025			p.To.Type = obj.TYPE_REG
  1026			p.To.Reg = v.Reg()
  1027	
  1028		case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
  1029			ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
  1030			ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
  1031			ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
  1032			ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
  1033			p := s.Prog(v.Op.Asm())
  1034			p.To.Type = obj.TYPE_MEM
  1035			p.To.Reg = v.Args[0].Reg()
  1036			gc.AddAux(&p.To, v)
  1037	
  1038		case ssa.OpAMD64SETNEF:
  1039			p := s.Prog(v.Op.Asm())
  1040			p.To.Type = obj.TYPE_REG
  1041			p.To.Reg = v.Reg()
  1042			q := s.Prog(x86.ASETPS)
  1043			q.To.Type = obj.TYPE_REG
  1044			q.To.Reg = x86.REG_AX
  1045			// ORL avoids partial register write and is smaller than ORQ, used by old compiler
  1046			opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
  1047	
  1048		case ssa.OpAMD64SETEQF:
  1049			p := s.Prog(v.Op.Asm())
  1050			p.To.Type = obj.TYPE_REG
  1051			p.To.Reg = v.Reg()
  1052			q := s.Prog(x86.ASETPC)
  1053			q.To.Type = obj.TYPE_REG
  1054			q.To.Reg = x86.REG_AX
  1055			// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
  1056			opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
  1057	
  1058		case ssa.OpAMD64InvertFlags:
  1059			v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1060		case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
  1061			v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1062		case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
  1063			v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
  1064		case ssa.OpAMD64REPSTOSQ:
  1065			s.Prog(x86.AREP)
  1066			s.Prog(x86.ASTOSQ)
  1067		case ssa.OpAMD64REPMOVSQ:
  1068			s.Prog(x86.AREP)
  1069			s.Prog(x86.AMOVSQ)
  1070		case ssa.OpAMD64LoweredNilCheck:
  1071			// Issue a load which will fault if the input is nil.
  1072			// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
  1073			// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
  1074			// but it doesn't have false dependency on AX.
  1075			// Or maybe allocate an output register and use MOVL (reg),reg2 ?
  1076			// That trades clobbering flags for clobbering a register.
  1077			p := s.Prog(x86.ATESTB)
  1078			p.From.Type = obj.TYPE_REG
  1079			p.From.Reg = x86.REG_AX
  1080			p.To.Type = obj.TYPE_MEM
  1081			p.To.Reg = v.Args[0].Reg()
  1082			gc.AddAux(&p.To, v)
  1083			if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1084				gc.Warnl(v.Pos, "generated nil check")
  1085			}
  1086		case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
  1087			p := s.Prog(v.Op.Asm())
  1088			p.From.Type = obj.TYPE_MEM
  1089			p.From.Reg = v.Args[0].Reg()
  1090			gc.AddAux(&p.From, v)
  1091			p.To.Type = obj.TYPE_REG
  1092			p.To.Reg = v.Reg0()
  1093		case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
  1094			r := v.Reg0()
  1095			if r != v.Args[0].Reg() {
  1096				v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
  1097			}
  1098			p := s.Prog(v.Op.Asm())
  1099			p.From.Type = obj.TYPE_REG
  1100			p.From.Reg = r
  1101			p.To.Type = obj.TYPE_MEM
  1102			p.To.Reg = v.Args[1].Reg()
  1103			gc.AddAux(&p.To, v)
  1104		case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
  1105			r := v.Reg0()
  1106			if r != v.Args[0].Reg() {
  1107				v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
  1108			}
  1109			s.Prog(x86.ALOCK)
  1110			p := s.Prog(v.Op.Asm())
  1111			p.From.Type = obj.TYPE_REG
  1112			p.From.Reg = r
  1113			p.To.Type = obj.TYPE_MEM
  1114			p.To.Reg = v.Args[1].Reg()
  1115			gc.AddAux(&p.To, v)
  1116		case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
  1117			if v.Args[1].Reg() != x86.REG_AX {
  1118				v.Fatalf("input[1] not in AX %s", v.LongString())
  1119			}
  1120			s.Prog(x86.ALOCK)
  1121			p := s.Prog(v.Op.Asm())
  1122			p.From.Type = obj.TYPE_REG
  1123			p.From.Reg = v.Args[2].Reg()
  1124			p.To.Type = obj.TYPE_MEM
  1125			p.To.Reg = v.Args[0].Reg()
  1126			gc.AddAux(&p.To, v)
  1127			p = s.Prog(x86.ASETEQ)
  1128			p.To.Type = obj.TYPE_REG
  1129			p.To.Reg = v.Reg0()
  1130		case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
  1131			s.Prog(x86.ALOCK)
  1132			p := s.Prog(v.Op.Asm())
  1133			p.From.Type = obj.TYPE_REG
  1134			p.From.Reg = v.Args[1].Reg()
  1135			p.To.Type = obj.TYPE_MEM
  1136			p.To.Reg = v.Args[0].Reg()
  1137			gc.AddAux(&p.To, v)
  1138		case ssa.OpClobber:
  1139			p := s.Prog(x86.AMOVL)
  1140			p.From.Type = obj.TYPE_CONST
  1141			p.From.Offset = 0xdeaddead
  1142			p.To.Type = obj.TYPE_MEM
  1143			p.To.Reg = x86.REG_SP
  1144			gc.AddAux(&p.To, v)
  1145			p = s.Prog(x86.AMOVL)
  1146			p.From.Type = obj.TYPE_CONST
  1147			p.From.Offset = 0xdeaddead
  1148			p.To.Type = obj.TYPE_MEM
  1149			p.To.Reg = x86.REG_SP
  1150			gc.AddAux(&p.To, v)
  1151			p.To.Offset += 4
  1152		default:
  1153			v.Fatalf("genValue not implemented: %s", v.LongString())
  1154		}
  1155	}
  1156	
  1157	var blockJump = [...]struct {
  1158		asm, invasm obj.As
  1159	}{
  1160		ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
  1161		ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
  1162		ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
  1163		ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
  1164		ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
  1165		ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
  1166		ssa.BlockAMD64OS:  {x86.AJOS, x86.AJOC},
  1167		ssa.BlockAMD64OC:  {x86.AJOC, x86.AJOS},
  1168		ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
  1169		ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
  1170		ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
  1171		ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
  1172		ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
  1173		ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
  1174	}
  1175	
  1176	var eqfJumps = [2][2]gc.FloatingEQNEJump{
  1177		{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
  1178		{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
  1179	}
  1180	var nefJumps = [2][2]gc.FloatingEQNEJump{
  1181		{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
  1182		{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
  1183	}
  1184	
  1185	func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
  1186		switch b.Kind {
  1187		case ssa.BlockPlain:
  1188			if b.Succs[0].Block() != next {
  1189				p := s.Prog(obj.AJMP)
  1190				p.To.Type = obj.TYPE_BRANCH
  1191				s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1192			}
  1193		case ssa.BlockDefer:
  1194			// defer returns in rax:
  1195			// 0 if we should continue executing
  1196			// 1 if we should jump to deferreturn call
  1197			p := s.Prog(x86.ATESTL)
  1198			p.From.Type = obj.TYPE_REG
  1199			p.From.Reg = x86.REG_AX
  1200			p.To.Type = obj.TYPE_REG
  1201			p.To.Reg = x86.REG_AX
  1202			p = s.Prog(x86.AJNE)
  1203			p.To.Type = obj.TYPE_BRANCH
  1204			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1205			if b.Succs[0].Block() != next {
  1206				p := s.Prog(obj.AJMP)
  1207				p.To.Type = obj.TYPE_BRANCH
  1208				s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1209			}
  1210		case ssa.BlockExit:
  1211		case ssa.BlockRet:
  1212			s.Prog(obj.ARET)
  1213		case ssa.BlockRetJmp:
  1214			p := s.Prog(obj.ARET)
  1215			p.To.Type = obj.TYPE_MEM
  1216			p.To.Name = obj.NAME_EXTERN
  1217			p.To.Sym = b.Aux.(*obj.LSym)
  1218	
  1219		case ssa.BlockAMD64EQF:
  1220			s.FPJump(b, next, &eqfJumps)
  1221	
  1222		case ssa.BlockAMD64NEF:
  1223			s.FPJump(b, next, &nefJumps)
  1224	
  1225		case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
  1226			ssa.BlockAMD64LT, ssa.BlockAMD64GE,
  1227			ssa.BlockAMD64LE, ssa.BlockAMD64GT,
  1228			ssa.BlockAMD64OS, ssa.BlockAMD64OC,
  1229			ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
  1230			ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
  1231			jmp := blockJump[b.Kind]
  1232			switch next {
  1233			case b.Succs[0].Block():
  1234				s.Br(jmp.invasm, b.Succs[1].Block())
  1235			case b.Succs[1].Block():
  1236				s.Br(jmp.asm, b.Succs[0].Block())
  1237			default:
  1238				if b.Likely != ssa.BranchUnlikely {
  1239					s.Br(jmp.asm, b.Succs[0].Block())
  1240					s.Br(obj.AJMP, b.Succs[1].Block())
  1241				} else {
  1242					s.Br(jmp.invasm, b.Succs[1].Block())
  1243					s.Br(obj.AJMP, b.Succs[0].Block())
  1244				}
  1245			}
  1246	
  1247		default:
  1248			b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
  1249		}
  1250	}
  1251	

View as plain text