...

Source file src/pkg/cmd/compile/internal/x86/ssa.go

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package x86
     6	
     7	import (
     8		"fmt"
     9		"math"
    10	
    11		"cmd/compile/internal/gc"
    12		"cmd/compile/internal/ssa"
    13		"cmd/compile/internal/types"
    14		"cmd/internal/obj"
    15		"cmd/internal/obj/x86"
    16	)
    17	
    18	// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    19	func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    20		flive := b.FlagsLiveAtEnd
    21		if b.Control != nil && b.Control.Type.IsFlags() {
    22			flive = true
    23		}
    24		for i := len(b.Values) - 1; i >= 0; i-- {
    25			v := b.Values[i]
    26			if flive && v.Op == ssa.Op386MOVLconst {
    27				// The "mark" is any non-nil Aux value.
    28				v.Aux = v
    29			}
    30			if v.Type.IsFlags() {
    31				flive = false
    32			}
    33			for _, a := range v.Args {
    34				if a.Type.IsFlags() {
    35					flive = true
    36				}
    37			}
    38		}
    39	}
    40	
    41	// loadByType returns the load instruction of the given type.
    42	func loadByType(t *types.Type) obj.As {
    43		// Avoid partial register write
    44		if !t.IsFloat() && t.Size() <= 2 {
    45			if t.Size() == 1 {
    46				return x86.AMOVBLZX
    47			} else {
    48				return x86.AMOVWLZX
    49			}
    50		}
    51		// Otherwise, there's no difference between load and store opcodes.
    52		return storeByType(t)
    53	}
    54	
    55	// storeByType returns the store instruction of the given type.
    56	func storeByType(t *types.Type) obj.As {
    57		width := t.Size()
    58		if t.IsFloat() {
    59			switch width {
    60			case 4:
    61				return x86.AMOVSS
    62			case 8:
    63				return x86.AMOVSD
    64			}
    65		} else {
    66			switch width {
    67			case 1:
    68				return x86.AMOVB
    69			case 2:
    70				return x86.AMOVW
    71			case 4:
    72				return x86.AMOVL
    73			}
    74		}
    75		panic("bad store type")
    76	}
    77	
    78	// moveByType returns the reg->reg move instruction of the given type.
    79	func moveByType(t *types.Type) obj.As {
    80		if t.IsFloat() {
    81			switch t.Size() {
    82			case 4:
    83				return x86.AMOVSS
    84			case 8:
    85				return x86.AMOVSD
    86			default:
    87				panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
    88			}
    89		} else {
    90			switch t.Size() {
    91			case 1:
    92				// Avoids partial register write
    93				return x86.AMOVL
    94			case 2:
    95				return x86.AMOVL
    96			case 4:
    97				return x86.AMOVL
    98			default:
    99				panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   100			}
   101		}
   102	}
   103	
   104	// opregreg emits instructions for
   105	//     dest := dest(To) op src(From)
   106	// and also returns the created obj.Prog so it
   107	// may be further adjusted (offset, scale, etc).
   108	func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   109		p := s.Prog(op)
   110		p.From.Type = obj.TYPE_REG
   111		p.To.Type = obj.TYPE_REG
   112		p.To.Reg = dest
   113		p.From.Reg = src
   114		return p
   115	}
   116	
   117	func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   118		switch v.Op {
   119		case ssa.Op386ADDL:
   120			r := v.Reg()
   121			r1 := v.Args[0].Reg()
   122			r2 := v.Args[1].Reg()
   123			switch {
   124			case r == r1:
   125				p := s.Prog(v.Op.Asm())
   126				p.From.Type = obj.TYPE_REG
   127				p.From.Reg = r2
   128				p.To.Type = obj.TYPE_REG
   129				p.To.Reg = r
   130			case r == r2:
   131				p := s.Prog(v.Op.Asm())
   132				p.From.Type = obj.TYPE_REG
   133				p.From.Reg = r1
   134				p.To.Type = obj.TYPE_REG
   135				p.To.Reg = r
   136			default:
   137				p := s.Prog(x86.ALEAL)
   138				p.From.Type = obj.TYPE_MEM
   139				p.From.Reg = r1
   140				p.From.Scale = 1
   141				p.From.Index = r2
   142				p.To.Type = obj.TYPE_REG
   143				p.To.Reg = r
   144			}
   145	
   146		// 2-address opcode arithmetic
   147		case ssa.Op386SUBL,
   148			ssa.Op386MULL,
   149			ssa.Op386ANDL,
   150			ssa.Op386ORL,
   151			ssa.Op386XORL,
   152			ssa.Op386SHLL,
   153			ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
   154			ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
   155			ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   156			ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
   157			ssa.Op386PXOR,
   158			ssa.Op386ADCL,
   159			ssa.Op386SBBL:
   160			r := v.Reg()
   161			if r != v.Args[0].Reg() {
   162				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   163			}
   164			opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   165	
   166		case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
   167			// output 0 is carry/borrow, output 1 is the low 32 bits.
   168			r := v.Reg0()
   169			if r != v.Args[0].Reg() {
   170				v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   171			}
   172			opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   173	
   174		case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
   175			// output 0 is carry/borrow, output 1 is the low 32 bits.
   176			r := v.Reg0()
   177			if r != v.Args[0].Reg() {
   178				v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
   179			}
   180			p := s.Prog(v.Op.Asm())
   181			p.From.Type = obj.TYPE_CONST
   182			p.From.Offset = v.AuxInt
   183			p.To.Type = obj.TYPE_REG
   184			p.To.Reg = r
   185	
   186		case ssa.Op386DIVL, ssa.Op386DIVW,
   187			ssa.Op386DIVLU, ssa.Op386DIVWU,
   188			ssa.Op386MODL, ssa.Op386MODW,
   189			ssa.Op386MODLU, ssa.Op386MODWU:
   190	
   191			// Arg[0] is already in AX as it's the only register we allow
   192			// and AX is the only output
   193			x := v.Args[1].Reg()
   194	
   195			// CPU faults upon signed overflow, which occurs when most
   196			// negative int is divided by -1.
   197			var j *obj.Prog
   198			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
   199				v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
   200	
   201				if ssa.NeedsFixUp(v) {
   202					var c *obj.Prog
   203					switch v.Op {
   204					case ssa.Op386DIVL, ssa.Op386MODL:
   205						c = s.Prog(x86.ACMPL)
   206						j = s.Prog(x86.AJEQ)
   207	
   208					case ssa.Op386DIVW, ssa.Op386MODW:
   209						c = s.Prog(x86.ACMPW)
   210						j = s.Prog(x86.AJEQ)
   211					}
   212					c.From.Type = obj.TYPE_REG
   213					c.From.Reg = x
   214					c.To.Type = obj.TYPE_CONST
   215					c.To.Offset = -1
   216	
   217					j.To.Type = obj.TYPE_BRANCH
   218				}
   219				// sign extend the dividend
   220				switch v.Op {
   221				case ssa.Op386DIVL, ssa.Op386MODL:
   222					s.Prog(x86.ACDQ)
   223				case ssa.Op386DIVW, ssa.Op386MODW:
   224					s.Prog(x86.ACWD)
   225				}
   226			}
   227	
   228			// for unsigned ints, we sign extend by setting DX = 0
   229			// signed ints were sign extended above
   230			if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
   231				v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
   232				c := s.Prog(x86.AXORL)
   233				c.From.Type = obj.TYPE_REG
   234				c.From.Reg = x86.REG_DX
   235				c.To.Type = obj.TYPE_REG
   236				c.To.Reg = x86.REG_DX
   237			}
   238	
   239			p := s.Prog(v.Op.Asm())
   240			p.From.Type = obj.TYPE_REG
   241			p.From.Reg = x
   242	
   243			// signed division, rest of the check for -1 case
   244			if j != nil {
   245				j2 := s.Prog(obj.AJMP)
   246				j2.To.Type = obj.TYPE_BRANCH
   247	
   248				var n *obj.Prog
   249				if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
   250					// n * -1 = -n
   251					n = s.Prog(x86.ANEGL)
   252					n.To.Type = obj.TYPE_REG
   253					n.To.Reg = x86.REG_AX
   254				} else {
   255					// n % -1 == 0
   256					n = s.Prog(x86.AXORL)
   257					n.From.Type = obj.TYPE_REG
   258					n.From.Reg = x86.REG_DX
   259					n.To.Type = obj.TYPE_REG
   260					n.To.Reg = x86.REG_DX
   261				}
   262	
   263				j.To.Val = n
   264				j2.To.Val = s.Pc()
   265			}
   266	
   267		case ssa.Op386HMULL, ssa.Op386HMULLU:
   268			// the frontend rewrites constant division by 8/16/32 bit integers into
   269			// HMUL by a constant
   270			// SSA rewrites generate the 64 bit versions
   271	
   272			// Arg[0] is already in AX as it's the only register we allow
   273			// and DX is the only output we care about (the high bits)
   274			p := s.Prog(v.Op.Asm())
   275			p.From.Type = obj.TYPE_REG
   276			p.From.Reg = v.Args[1].Reg()
   277	
   278			// IMULB puts the high portion in AH instead of DL,
   279			// so move it to DL for consistency
   280			if v.Type.Size() == 1 {
   281				m := s.Prog(x86.AMOVB)
   282				m.From.Type = obj.TYPE_REG
   283				m.From.Reg = x86.REG_AH
   284				m.To.Type = obj.TYPE_REG
   285				m.To.Reg = x86.REG_DX
   286			}
   287	
   288		case ssa.Op386MULLU:
   289			// Arg[0] is already in AX as it's the only register we allow
   290			// results lo in AX
   291			p := s.Prog(v.Op.Asm())
   292			p.From.Type = obj.TYPE_REG
   293			p.From.Reg = v.Args[1].Reg()
   294	
   295		case ssa.Op386MULLQU:
   296			// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
   297			p := s.Prog(v.Op.Asm())
   298			p.From.Type = obj.TYPE_REG
   299			p.From.Reg = v.Args[1].Reg()
   300	
   301		case ssa.Op386AVGLU:
   302			// compute (x+y)/2 unsigned.
   303			// Do a 32-bit add, the overflow goes into the carry.
   304			// Shift right once and pull the carry back into the 31st bit.
   305			r := v.Reg()
   306			if r != v.Args[0].Reg() {
   307				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   308			}
   309			p := s.Prog(x86.AADDL)
   310			p.From.Type = obj.TYPE_REG
   311			p.To.Type = obj.TYPE_REG
   312			p.To.Reg = r
   313			p.From.Reg = v.Args[1].Reg()
   314			p = s.Prog(x86.ARCRL)
   315			p.From.Type = obj.TYPE_CONST
   316			p.From.Offset = 1
   317			p.To.Type = obj.TYPE_REG
   318			p.To.Reg = r
   319	
   320		case ssa.Op386ADDLconst:
   321			r := v.Reg()
   322			a := v.Args[0].Reg()
   323			if r == a {
   324				if v.AuxInt == 1 {
   325					p := s.Prog(x86.AINCL)
   326					p.To.Type = obj.TYPE_REG
   327					p.To.Reg = r
   328					return
   329				}
   330				if v.AuxInt == -1 {
   331					p := s.Prog(x86.ADECL)
   332					p.To.Type = obj.TYPE_REG
   333					p.To.Reg = r
   334					return
   335				}
   336				p := s.Prog(v.Op.Asm())
   337				p.From.Type = obj.TYPE_CONST
   338				p.From.Offset = v.AuxInt
   339				p.To.Type = obj.TYPE_REG
   340				p.To.Reg = r
   341				return
   342			}
   343			p := s.Prog(x86.ALEAL)
   344			p.From.Type = obj.TYPE_MEM
   345			p.From.Reg = a
   346			p.From.Offset = v.AuxInt
   347			p.To.Type = obj.TYPE_REG
   348			p.To.Reg = r
   349	
   350		case ssa.Op386MULLconst:
   351			r := v.Reg()
   352			p := s.Prog(v.Op.Asm())
   353			p.From.Type = obj.TYPE_CONST
   354			p.From.Offset = v.AuxInt
   355			p.To.Type = obj.TYPE_REG
   356			p.To.Reg = r
   357			p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
   358	
   359		case ssa.Op386SUBLconst,
   360			ssa.Op386ADCLconst,
   361			ssa.Op386SBBLconst,
   362			ssa.Op386ANDLconst,
   363			ssa.Op386ORLconst,
   364			ssa.Op386XORLconst,
   365			ssa.Op386SHLLconst,
   366			ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
   367			ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
   368			ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
   369			r := v.Reg()
   370			if r != v.Args[0].Reg() {
   371				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   372			}
   373			p := s.Prog(v.Op.Asm())
   374			p.From.Type = obj.TYPE_CONST
   375			p.From.Offset = v.AuxInt
   376			p.To.Type = obj.TYPE_REG
   377			p.To.Reg = r
   378		case ssa.Op386SBBLcarrymask:
   379			r := v.Reg()
   380			p := s.Prog(v.Op.Asm())
   381			p.From.Type = obj.TYPE_REG
   382			p.From.Reg = r
   383			p.To.Type = obj.TYPE_REG
   384			p.To.Reg = r
   385		case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
   386			r := v.Args[0].Reg()
   387			i := v.Args[1].Reg()
   388			p := s.Prog(x86.ALEAL)
   389			switch v.Op {
   390			case ssa.Op386LEAL1:
   391				p.From.Scale = 1
   392				if i == x86.REG_SP {
   393					r, i = i, r
   394				}
   395			case ssa.Op386LEAL2:
   396				p.From.Scale = 2
   397			case ssa.Op386LEAL4:
   398				p.From.Scale = 4
   399			case ssa.Op386LEAL8:
   400				p.From.Scale = 8
   401			}
   402			p.From.Type = obj.TYPE_MEM
   403			p.From.Reg = r
   404			p.From.Index = i
   405			gc.AddAux(&p.From, v)
   406			p.To.Type = obj.TYPE_REG
   407			p.To.Reg = v.Reg()
   408		case ssa.Op386LEAL:
   409			p := s.Prog(x86.ALEAL)
   410			p.From.Type = obj.TYPE_MEM
   411			p.From.Reg = v.Args[0].Reg()
   412			gc.AddAux(&p.From, v)
   413			p.To.Type = obj.TYPE_REG
   414			p.To.Reg = v.Reg()
   415		case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
   416			ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
   417			opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   418		case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   419			// Go assembler has swapped operands for UCOMISx relative to CMP,
   420			// must account for that right here.
   421			opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   422		case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
   423			p := s.Prog(v.Op.Asm())
   424			p.From.Type = obj.TYPE_REG
   425			p.From.Reg = v.Args[0].Reg()
   426			p.To.Type = obj.TYPE_CONST
   427			p.To.Offset = v.AuxInt
   428		case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
   429			p := s.Prog(v.Op.Asm())
   430			p.From.Type = obj.TYPE_CONST
   431			p.From.Offset = v.AuxInt
   432			p.To.Type = obj.TYPE_REG
   433			p.To.Reg = v.Args[0].Reg()
   434		case ssa.Op386CMPLload, ssa.Op386CMPWload, ssa.Op386CMPBload:
   435			p := s.Prog(v.Op.Asm())
   436			p.From.Type = obj.TYPE_MEM
   437			p.From.Reg = v.Args[0].Reg()
   438			gc.AddAux(&p.From, v)
   439			p.To.Type = obj.TYPE_REG
   440			p.To.Reg = v.Args[1].Reg()
   441		case ssa.Op386CMPLconstload, ssa.Op386CMPWconstload, ssa.Op386CMPBconstload:
   442			sc := v.AuxValAndOff()
   443			p := s.Prog(v.Op.Asm())
   444			p.From.Type = obj.TYPE_MEM
   445			p.From.Reg = v.Args[0].Reg()
   446			gc.AddAux2(&p.From, v, sc.Off())
   447			p.To.Type = obj.TYPE_CONST
   448			p.To.Offset = sc.Val()
   449		case ssa.Op386MOVLconst:
   450			x := v.Reg()
   451	
   452			// If flags aren't live (indicated by v.Aux == nil),
   453			// then we can rewrite MOV $0, AX into XOR AX, AX.
   454			if v.AuxInt == 0 && v.Aux == nil {
   455				p := s.Prog(x86.AXORL)
   456				p.From.Type = obj.TYPE_REG
   457				p.From.Reg = x
   458				p.To.Type = obj.TYPE_REG
   459				p.To.Reg = x
   460				break
   461			}
   462	
   463			p := s.Prog(v.Op.Asm())
   464			p.From.Type = obj.TYPE_CONST
   465			p.From.Offset = v.AuxInt
   466			p.To.Type = obj.TYPE_REG
   467			p.To.Reg = x
   468		case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
   469			x := v.Reg()
   470			p := s.Prog(v.Op.Asm())
   471			p.From.Type = obj.TYPE_FCONST
   472			p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   473			p.To.Type = obj.TYPE_REG
   474			p.To.Reg = x
   475		case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
   476			p := s.Prog(x86.ALEAL)
   477			p.From.Type = obj.TYPE_MEM
   478			p.From.Name = obj.NAME_EXTERN
   479			f := math.Float64frombits(uint64(v.AuxInt))
   480			if v.Op == ssa.Op386MOVSDconst1 {
   481				p.From.Sym = gc.Ctxt.Float64Sym(f)
   482			} else {
   483				p.From.Sym = gc.Ctxt.Float32Sym(float32(f))
   484			}
   485			p.To.Type = obj.TYPE_REG
   486			p.To.Reg = v.Reg()
   487		case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
   488			p := s.Prog(v.Op.Asm())
   489			p.From.Type = obj.TYPE_MEM
   490			p.From.Reg = v.Args[0].Reg()
   491			p.To.Type = obj.TYPE_REG
   492			p.To.Reg = v.Reg()
   493	
   494		case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
   495			p := s.Prog(v.Op.Asm())
   496			p.From.Type = obj.TYPE_MEM
   497			p.From.Reg = v.Args[0].Reg()
   498			gc.AddAux(&p.From, v)
   499			p.To.Type = obj.TYPE_REG
   500			p.To.Reg = v.Reg()
   501		case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1,
   502			ssa.Op386MOVSDloadidx8, ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4, ssa.Op386MOVWloadidx2:
   503			r := v.Args[0].Reg()
   504			i := v.Args[1].Reg()
   505			p := s.Prog(v.Op.Asm())
   506			p.From.Type = obj.TYPE_MEM
   507			switch v.Op {
   508			case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
   509				if i == x86.REG_SP {
   510					r, i = i, r
   511				}
   512				p.From.Scale = 1
   513			case ssa.Op386MOVSDloadidx8:
   514				p.From.Scale = 8
   515			case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
   516				p.From.Scale = 4
   517			case ssa.Op386MOVWloadidx2:
   518				p.From.Scale = 2
   519			}
   520			p.From.Reg = r
   521			p.From.Index = i
   522			gc.AddAux(&p.From, v)
   523			p.To.Type = obj.TYPE_REG
   524			p.To.Reg = v.Reg()
   525		case ssa.Op386ADDLloadidx4, ssa.Op386SUBLloadidx4, ssa.Op386MULLloadidx4,
   526			ssa.Op386ANDLloadidx4, ssa.Op386ORLloadidx4, ssa.Op386XORLloadidx4:
   527			p := s.Prog(v.Op.Asm())
   528			p.From.Type = obj.TYPE_MEM
   529			p.From.Reg = v.Args[1].Reg()
   530			p.From.Index = v.Args[2].Reg()
   531			p.From.Scale = 4
   532			gc.AddAux(&p.From, v)
   533			p.To.Type = obj.TYPE_REG
   534			p.To.Reg = v.Reg()
   535			if v.Reg() != v.Args[0].Reg() {
   536				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   537			}
   538		case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload,
   539			ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
   540			ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload,
   541			ssa.Op386MULSDload, ssa.Op386MULSSload, ssa.Op386DIVSSload, ssa.Op386DIVSDload:
   542			p := s.Prog(v.Op.Asm())
   543			p.From.Type = obj.TYPE_MEM
   544			p.From.Reg = v.Args[1].Reg()
   545			gc.AddAux(&p.From, v)
   546			p.To.Type = obj.TYPE_REG
   547			p.To.Reg = v.Reg()
   548			if v.Reg() != v.Args[0].Reg() {
   549				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   550			}
   551		case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
   552			ssa.Op386ADDLmodify, ssa.Op386SUBLmodify, ssa.Op386ANDLmodify, ssa.Op386ORLmodify, ssa.Op386XORLmodify:
   553			p := s.Prog(v.Op.Asm())
   554			p.From.Type = obj.TYPE_REG
   555			p.From.Reg = v.Args[1].Reg()
   556			p.To.Type = obj.TYPE_MEM
   557			p.To.Reg = v.Args[0].Reg()
   558			gc.AddAux(&p.To, v)
   559		case ssa.Op386ADDLconstmodify:
   560			sc := v.AuxValAndOff()
   561			val := sc.Val()
   562			if val == 1 || val == -1 {
   563				var p *obj.Prog
   564				if val == 1 {
   565					p = s.Prog(x86.AINCL)
   566				} else {
   567					p = s.Prog(x86.ADECL)
   568				}
   569				off := sc.Off()
   570				p.To.Type = obj.TYPE_MEM
   571				p.To.Reg = v.Args[0].Reg()
   572				gc.AddAux2(&p.To, v, off)
   573				break
   574			}
   575			fallthrough
   576		case ssa.Op386ANDLconstmodify, ssa.Op386ORLconstmodify, ssa.Op386XORLconstmodify:
   577			sc := v.AuxValAndOff()
   578			off := sc.Off()
   579			val := sc.Val()
   580			p := s.Prog(v.Op.Asm())
   581			p.From.Type = obj.TYPE_CONST
   582			p.From.Offset = val
   583			p.To.Type = obj.TYPE_MEM
   584			p.To.Reg = v.Args[0].Reg()
   585			gc.AddAux2(&p.To, v, off)
   586		case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1,
   587			ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2,
   588			ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   589			r := v.Args[0].Reg()
   590			i := v.Args[1].Reg()
   591			p := s.Prog(v.Op.Asm())
   592			p.From.Type = obj.TYPE_REG
   593			p.From.Reg = v.Args[2].Reg()
   594			p.To.Type = obj.TYPE_MEM
   595			switch v.Op {
   596			case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
   597				if i == x86.REG_SP {
   598					r, i = i, r
   599				}
   600				p.To.Scale = 1
   601			case ssa.Op386MOVSDstoreidx8:
   602				p.To.Scale = 8
   603			case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4,
   604				ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
   605				p.To.Scale = 4
   606			case ssa.Op386MOVWstoreidx2:
   607				p.To.Scale = 2
   608			}
   609			p.To.Reg = r
   610			p.To.Index = i
   611			gc.AddAux(&p.To, v)
   612		case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
   613			p := s.Prog(v.Op.Asm())
   614			p.From.Type = obj.TYPE_CONST
   615			sc := v.AuxValAndOff()
   616			p.From.Offset = sc.Val()
   617			p.To.Type = obj.TYPE_MEM
   618			p.To.Reg = v.Args[0].Reg()
   619			gc.AddAux2(&p.To, v, sc.Off())
   620		case ssa.Op386ADDLconstmodifyidx4:
   621			sc := v.AuxValAndOff()
   622			val := sc.Val()
   623			if val == 1 || val == -1 {
   624				var p *obj.Prog
   625				if val == 1 {
   626					p = s.Prog(x86.AINCL)
   627				} else {
   628					p = s.Prog(x86.ADECL)
   629				}
   630				off := sc.Off()
   631				p.To.Type = obj.TYPE_MEM
   632				p.To.Reg = v.Args[0].Reg()
   633				p.To.Scale = 4
   634				p.To.Index = v.Args[1].Reg()
   635				gc.AddAux2(&p.To, v, off)
   636				break
   637			}
   638			fallthrough
   639		case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1,
   640			ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   641			p := s.Prog(v.Op.Asm())
   642			p.From.Type = obj.TYPE_CONST
   643			sc := v.AuxValAndOff()
   644			p.From.Offset = sc.Val()
   645			r := v.Args[0].Reg()
   646			i := v.Args[1].Reg()
   647			switch v.Op {
   648			case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
   649				p.To.Scale = 1
   650				if i == x86.REG_SP {
   651					r, i = i, r
   652				}
   653			case ssa.Op386MOVWstoreconstidx2:
   654				p.To.Scale = 2
   655			case ssa.Op386MOVLstoreconstidx4,
   656				ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
   657				p.To.Scale = 4
   658			}
   659			p.To.Type = obj.TYPE_MEM
   660			p.To.Reg = r
   661			p.To.Index = i
   662			gc.AddAux2(&p.To, v, sc.Off())
   663		case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
   664			ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
   665			ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
   666			ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
   667			opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   668		case ssa.Op386DUFFZERO:
   669			p := s.Prog(obj.ADUFFZERO)
   670			p.To.Type = obj.TYPE_ADDR
   671			p.To.Sym = gc.Duffzero
   672			p.To.Offset = v.AuxInt
   673		case ssa.Op386DUFFCOPY:
   674			p := s.Prog(obj.ADUFFCOPY)
   675			p.To.Type = obj.TYPE_ADDR
   676			p.To.Sym = gc.Duffcopy
   677			p.To.Offset = v.AuxInt
   678	
   679		case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
   680			if v.Type.IsMemory() {
   681				return
   682			}
   683			x := v.Args[0].Reg()
   684			y := v.Reg()
   685			if x != y {
   686				opregreg(s, moveByType(v.Type), y, x)
   687			}
   688		case ssa.OpLoadReg:
   689			if v.Type.IsFlags() {
   690				v.Fatalf("load flags not implemented: %v", v.LongString())
   691				return
   692			}
   693			p := s.Prog(loadByType(v.Type))
   694			gc.AddrAuto(&p.From, v.Args[0])
   695			p.To.Type = obj.TYPE_REG
   696			p.To.Reg = v.Reg()
   697	
   698		case ssa.OpStoreReg:
   699			if v.Type.IsFlags() {
   700				v.Fatalf("store flags not implemented: %v", v.LongString())
   701				return
   702			}
   703			p := s.Prog(storeByType(v.Type))
   704			p.From.Type = obj.TYPE_REG
   705			p.From.Reg = v.Args[0].Reg()
   706			gc.AddrAuto(&p.To, v)
   707		case ssa.Op386LoweredGetClosurePtr:
   708			// Closure pointer is DX.
   709			gc.CheckLoweredGetClosurePtr(v)
   710		case ssa.Op386LoweredGetG:
   711			r := v.Reg()
   712			// See the comments in cmd/internal/obj/x86/obj6.go
   713			// near CanUse1InsnTLS for a detailed explanation of these instructions.
   714			if x86.CanUse1InsnTLS(gc.Ctxt) {
   715				// MOVL (TLS), r
   716				p := s.Prog(x86.AMOVL)
   717				p.From.Type = obj.TYPE_MEM
   718				p.From.Reg = x86.REG_TLS
   719				p.To.Type = obj.TYPE_REG
   720				p.To.Reg = r
   721			} else {
   722				// MOVL TLS, r
   723				// MOVL (r)(TLS*1), r
   724				p := s.Prog(x86.AMOVL)
   725				p.From.Type = obj.TYPE_REG
   726				p.From.Reg = x86.REG_TLS
   727				p.To.Type = obj.TYPE_REG
   728				p.To.Reg = r
   729				q := s.Prog(x86.AMOVL)
   730				q.From.Type = obj.TYPE_MEM
   731				q.From.Reg = r
   732				q.From.Index = x86.REG_TLS
   733				q.From.Scale = 1
   734				q.To.Type = obj.TYPE_REG
   735				q.To.Reg = r
   736			}
   737	
   738		case ssa.Op386LoweredGetCallerPC:
   739			p := s.Prog(x86.AMOVL)
   740			p.From.Type = obj.TYPE_MEM
   741			p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
   742			p.From.Name = obj.NAME_PARAM
   743			p.To.Type = obj.TYPE_REG
   744			p.To.Reg = v.Reg()
   745	
   746		case ssa.Op386LoweredGetCallerSP:
   747			// caller's SP is the address of the first arg
   748			p := s.Prog(x86.AMOVL)
   749			p.From.Type = obj.TYPE_ADDR
   750			p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on 386, just to be consistent with other architectures
   751			p.From.Name = obj.NAME_PARAM
   752			p.To.Type = obj.TYPE_REG
   753			p.To.Reg = v.Reg()
   754	
   755		case ssa.Op386LoweredWB:
   756			p := s.Prog(obj.ACALL)
   757			p.To.Type = obj.TYPE_MEM
   758			p.To.Name = obj.NAME_EXTERN
   759			p.To.Sym = v.Aux.(*obj.LSym)
   760	
   761		case ssa.Op386LoweredPanicBoundsA, ssa.Op386LoweredPanicBoundsB, ssa.Op386LoweredPanicBoundsC:
   762			p := s.Prog(obj.ACALL)
   763			p.To.Type = obj.TYPE_MEM
   764			p.To.Name = obj.NAME_EXTERN
   765			p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
   766			s.UseArgs(8) // space used in callee args area by assembly stubs
   767	
   768		case ssa.Op386LoweredPanicExtendA, ssa.Op386LoweredPanicExtendB, ssa.Op386LoweredPanicExtendC:
   769			p := s.Prog(obj.ACALL)
   770			p.To.Type = obj.TYPE_MEM
   771			p.To.Name = obj.NAME_EXTERN
   772			p.To.Sym = gc.ExtendCheckFunc[v.AuxInt]
   773			s.UseArgs(12) // space used in callee args area by assembly stubs
   774	
   775		case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   776			s.Call(v)
   777		case ssa.Op386NEGL,
   778			ssa.Op386BSWAPL,
   779			ssa.Op386NOTL:
   780			r := v.Reg()
   781			if r != v.Args[0].Reg() {
   782				v.Fatalf("input[0] and output not in same register %s", v.LongString())
   783			}
   784			p := s.Prog(v.Op.Asm())
   785			p.To.Type = obj.TYPE_REG
   786			p.To.Reg = r
   787		case ssa.Op386BSFL, ssa.Op386BSFW,
   788			ssa.Op386BSRL, ssa.Op386BSRW,
   789			ssa.Op386SQRTSD:
   790			p := s.Prog(v.Op.Asm())
   791			p.From.Type = obj.TYPE_REG
   792			p.From.Reg = v.Args[0].Reg()
   793			p.To.Type = obj.TYPE_REG
   794			p.To.Reg = v.Reg()
   795		case ssa.Op386SETEQ, ssa.Op386SETNE,
   796			ssa.Op386SETL, ssa.Op386SETLE,
   797			ssa.Op386SETG, ssa.Op386SETGE,
   798			ssa.Op386SETGF, ssa.Op386SETGEF,
   799			ssa.Op386SETB, ssa.Op386SETBE,
   800			ssa.Op386SETORD, ssa.Op386SETNAN,
   801			ssa.Op386SETA, ssa.Op386SETAE,
   802			ssa.Op386SETO:
   803			p := s.Prog(v.Op.Asm())
   804			p.To.Type = obj.TYPE_REG
   805			p.To.Reg = v.Reg()
   806	
   807		case ssa.Op386SETNEF:
   808			p := s.Prog(v.Op.Asm())
   809			p.To.Type = obj.TYPE_REG
   810			p.To.Reg = v.Reg()
   811			q := s.Prog(x86.ASETPS)
   812			q.To.Type = obj.TYPE_REG
   813			q.To.Reg = x86.REG_AX
   814			opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
   815	
   816		case ssa.Op386SETEQF:
   817			p := s.Prog(v.Op.Asm())
   818			p.To.Type = obj.TYPE_REG
   819			p.To.Reg = v.Reg()
   820			q := s.Prog(x86.ASETPC)
   821			q.To.Type = obj.TYPE_REG
   822			q.To.Reg = x86.REG_AX
   823			opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
   824	
   825		case ssa.Op386InvertFlags:
   826			v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
   827		case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
   828			v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
   829		case ssa.Op386REPSTOSL:
   830			s.Prog(x86.AREP)
   831			s.Prog(x86.ASTOSL)
   832		case ssa.Op386REPMOVSL:
   833			s.Prog(x86.AREP)
   834			s.Prog(x86.AMOVSL)
   835		case ssa.Op386LoweredNilCheck:
   836			// Issue a load which will fault if the input is nil.
   837			// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
   838			// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
   839			// but it doesn't have false dependency on AX.
   840			// Or maybe allocate an output register and use MOVL (reg),reg2 ?
   841			// That trades clobbering flags for clobbering a register.
   842			p := s.Prog(x86.ATESTB)
   843			p.From.Type = obj.TYPE_REG
   844			p.From.Reg = x86.REG_AX
   845			p.To.Type = obj.TYPE_MEM
   846			p.To.Reg = v.Args[0].Reg()
   847			gc.AddAux(&p.To, v)
   848			if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
   849				gc.Warnl(v.Pos, "generated nil check")
   850			}
   851		case ssa.Op386FCHS:
   852			v.Fatalf("FCHS in non-387 mode")
   853		case ssa.OpClobber:
   854			p := s.Prog(x86.AMOVL)
   855			p.From.Type = obj.TYPE_CONST
   856			p.From.Offset = 0xdeaddead
   857			p.To.Type = obj.TYPE_MEM
   858			p.To.Reg = x86.REG_SP
   859			gc.AddAux(&p.To, v)
   860		default:
   861			v.Fatalf("genValue not implemented: %s", v.LongString())
   862		}
   863	}
   864	
   865	var blockJump = [...]struct {
   866		asm, invasm obj.As
   867	}{
   868		ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
   869		ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
   870		ssa.Block386LT:  {x86.AJLT, x86.AJGE},
   871		ssa.Block386GE:  {x86.AJGE, x86.AJLT},
   872		ssa.Block386LE:  {x86.AJLE, x86.AJGT},
   873		ssa.Block386GT:  {x86.AJGT, x86.AJLE},
   874		ssa.Block386OS:  {x86.AJOS, x86.AJOC},
   875		ssa.Block386OC:  {x86.AJOC, x86.AJOS},
   876		ssa.Block386ULT: {x86.AJCS, x86.AJCC},
   877		ssa.Block386UGE: {x86.AJCC, x86.AJCS},
   878		ssa.Block386UGT: {x86.AJHI, x86.AJLS},
   879		ssa.Block386ULE: {x86.AJLS, x86.AJHI},
   880		ssa.Block386ORD: {x86.AJPC, x86.AJPS},
   881		ssa.Block386NAN: {x86.AJPS, x86.AJPC},
   882	}
   883	
   884	var eqfJumps = [2][2]gc.FloatingEQNEJump{
   885		{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
   886		{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
   887	}
   888	var nefJumps = [2][2]gc.FloatingEQNEJump{
   889		{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
   890		{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
   891	}
   892	
   893	func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
   894		switch b.Kind {
   895		case ssa.BlockPlain:
   896			if b.Succs[0].Block() != next {
   897				p := s.Prog(obj.AJMP)
   898				p.To.Type = obj.TYPE_BRANCH
   899				s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   900			}
   901		case ssa.BlockDefer:
   902			// defer returns in rax:
   903			// 0 if we should continue executing
   904			// 1 if we should jump to deferreturn call
   905			p := s.Prog(x86.ATESTL)
   906			p.From.Type = obj.TYPE_REG
   907			p.From.Reg = x86.REG_AX
   908			p.To.Type = obj.TYPE_REG
   909			p.To.Reg = x86.REG_AX
   910			p = s.Prog(x86.AJNE)
   911			p.To.Type = obj.TYPE_BRANCH
   912			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   913			if b.Succs[0].Block() != next {
   914				p := s.Prog(obj.AJMP)
   915				p.To.Type = obj.TYPE_BRANCH
   916				s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   917			}
   918		case ssa.BlockExit:
   919		case ssa.BlockRet:
   920			s.Prog(obj.ARET)
   921		case ssa.BlockRetJmp:
   922			p := s.Prog(obj.AJMP)
   923			p.To.Type = obj.TYPE_MEM
   924			p.To.Name = obj.NAME_EXTERN
   925			p.To.Sym = b.Aux.(*obj.LSym)
   926	
   927		case ssa.Block386EQF:
   928			s.FPJump(b, next, &eqfJumps)
   929	
   930		case ssa.Block386NEF:
   931			s.FPJump(b, next, &nefJumps)
   932	
   933		case ssa.Block386EQ, ssa.Block386NE,
   934			ssa.Block386LT, ssa.Block386GE,
   935			ssa.Block386LE, ssa.Block386GT,
   936			ssa.Block386OS, ssa.Block386OC,
   937			ssa.Block386ULT, ssa.Block386UGT,
   938			ssa.Block386ULE, ssa.Block386UGE:
   939			jmp := blockJump[b.Kind]
   940			switch next {
   941			case b.Succs[0].Block():
   942				s.Br(jmp.invasm, b.Succs[1].Block())
   943			case b.Succs[1].Block():
   944				s.Br(jmp.asm, b.Succs[0].Block())
   945			default:
   946				if b.Likely != ssa.BranchUnlikely {
   947					s.Br(jmp.asm, b.Succs[0].Block())
   948					s.Br(obj.AJMP, b.Succs[1].Block())
   949				} else {
   950					s.Br(jmp.invasm, b.Succs[1].Block())
   951					s.Br(obj.AJMP, b.Succs[0].Block())
   952				}
   953			}
   954		default:
   955			b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   956		}
   957	}
   958	

View as plain text