...

Source file src/pkg/cmd/compile/internal/ppc64/ssa.go

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package ppc64
     6	
     7	import (
     8		"cmd/compile/internal/gc"
     9		"cmd/compile/internal/ssa"
    10		"cmd/compile/internal/types"
    11		"cmd/internal/obj"
    12		"cmd/internal/obj/ppc64"
    13		"cmd/internal/objabi"
    14		"math"
    15		"strings"
    16	)
    17	
    18	// iselOp encodes mapping of comparison operations onto ISEL operands
    19	type iselOp struct {
    20		cond        int64
    21		valueIfCond int // if cond is true, the value to return (0 or 1)
    22	}
    23	
    24	// Input registers to ISEL used for comparison. Index 0 is zero, 1 is (will be) 1
    25	var iselRegs = [2]int16{ppc64.REG_R0, ppc64.REGTMP}
    26	
    27	var iselOps = map[ssa.Op]iselOp{
    28		ssa.OpPPC64Equal:         {cond: ppc64.C_COND_EQ, valueIfCond: 1},
    29		ssa.OpPPC64NotEqual:      {cond: ppc64.C_COND_EQ, valueIfCond: 0},
    30		ssa.OpPPC64LessThan:      {cond: ppc64.C_COND_LT, valueIfCond: 1},
    31		ssa.OpPPC64GreaterEqual:  {cond: ppc64.C_COND_LT, valueIfCond: 0},
    32		ssa.OpPPC64GreaterThan:   {cond: ppc64.C_COND_GT, valueIfCond: 1},
    33		ssa.OpPPC64LessEqual:     {cond: ppc64.C_COND_GT, valueIfCond: 0},
    34		ssa.OpPPC64FLessThan:     {cond: ppc64.C_COND_LT, valueIfCond: 1},
    35		ssa.OpPPC64FGreaterThan:  {cond: ppc64.C_COND_GT, valueIfCond: 1},
    36		ssa.OpPPC64FLessEqual:    {cond: ppc64.C_COND_LT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ
    37		ssa.OpPPC64FGreaterEqual: {cond: ppc64.C_COND_GT, valueIfCond: 1}, // 2 comparisons, 2nd is EQ
    38	}
    39	
    40	// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    41	func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    42		//	flive := b.FlagsLiveAtEnd
    43		//	if b.Control != nil && b.Control.Type.IsFlags() {
    44		//		flive = true
    45		//	}
    46		//	for i := len(b.Values) - 1; i >= 0; i-- {
    47		//		v := b.Values[i]
    48		//		if flive && (v.Op == v.Op == ssa.OpPPC64MOVDconst) {
    49		//			// The "mark" is any non-nil Aux value.
    50		//			v.Aux = v
    51		//		}
    52		//		if v.Type.IsFlags() {
    53		//			flive = false
    54		//		}
    55		//		for _, a := range v.Args {
    56		//			if a.Type.IsFlags() {
    57		//				flive = true
    58		//			}
    59		//		}
    60		//	}
    61	}
    62	
    63	// loadByType returns the load instruction of the given type.
    64	func loadByType(t *types.Type) obj.As {
    65		if t.IsFloat() {
    66			switch t.Size() {
    67			case 4:
    68				return ppc64.AFMOVS
    69			case 8:
    70				return ppc64.AFMOVD
    71			}
    72		} else {
    73			switch t.Size() {
    74			case 1:
    75				if t.IsSigned() {
    76					return ppc64.AMOVB
    77				} else {
    78					return ppc64.AMOVBZ
    79				}
    80			case 2:
    81				if t.IsSigned() {
    82					return ppc64.AMOVH
    83				} else {
    84					return ppc64.AMOVHZ
    85				}
    86			case 4:
    87				if t.IsSigned() {
    88					return ppc64.AMOVW
    89				} else {
    90					return ppc64.AMOVWZ
    91				}
    92			case 8:
    93				return ppc64.AMOVD
    94			}
    95		}
    96		panic("bad load type")
    97	}
    98	
    99	// storeByType returns the store instruction of the given type.
   100	func storeByType(t *types.Type) obj.As {
   101		if t.IsFloat() {
   102			switch t.Size() {
   103			case 4:
   104				return ppc64.AFMOVS
   105			case 8:
   106				return ppc64.AFMOVD
   107			}
   108		} else {
   109			switch t.Size() {
   110			case 1:
   111				return ppc64.AMOVB
   112			case 2:
   113				return ppc64.AMOVH
   114			case 4:
   115				return ppc64.AMOVW
   116			case 8:
   117				return ppc64.AMOVD
   118			}
   119		}
   120		panic("bad store type")
   121	}
   122	
   123	func ssaGenISEL(s *gc.SSAGenState, v *ssa.Value, cr int64, r1, r2 int16) {
   124		r := v.Reg()
   125		p := s.Prog(ppc64.AISEL)
   126		p.To.Type = obj.TYPE_REG
   127		p.To.Reg = r
   128		p.Reg = r1
   129		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
   130		p.From.Type = obj.TYPE_CONST
   131		p.From.Offset = cr
   132	}
   133	
   134	func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   135		switch v.Op {
   136		case ssa.OpCopy:
   137			t := v.Type
   138			if t.IsMemory() {
   139				return
   140			}
   141			x := v.Args[0].Reg()
   142			y := v.Reg()
   143			if x != y {
   144				rt := obj.TYPE_REG
   145				op := ppc64.AMOVD
   146	
   147				if t.IsFloat() {
   148					op = ppc64.AFMOVD
   149				}
   150				p := s.Prog(op)
   151				p.From.Type = rt
   152				p.From.Reg = x
   153				p.To.Type = rt
   154				p.To.Reg = y
   155			}
   156	
   157		case ssa.OpPPC64LoweredMuluhilo:
   158			// MULHDU	Rarg1, Rarg0, Reg0
   159			// MULLD	Rarg1, Rarg0, Reg1
   160			r0 := v.Args[0].Reg()
   161			r1 := v.Args[1].Reg()
   162			p := s.Prog(ppc64.AMULHDU)
   163			p.From.Type = obj.TYPE_REG
   164			p.From.Reg = r1
   165			p.Reg = r0
   166			p.To.Type = obj.TYPE_REG
   167			p.To.Reg = v.Reg0()
   168			p1 := s.Prog(ppc64.AMULLD)
   169			p1.From.Type = obj.TYPE_REG
   170			p1.From.Reg = r1
   171			p1.Reg = r0
   172			p1.To.Type = obj.TYPE_REG
   173			p1.To.Reg = v.Reg1()
   174	
   175		case ssa.OpPPC64LoweredAdd64Carry:
   176			// ADDC		Rarg2, -1, Rtmp
   177			// ADDE		Rarg1, Rarg0, Reg0
   178			// ADDZE	Rzero, Reg1
   179			r0 := v.Args[0].Reg()
   180			r1 := v.Args[1].Reg()
   181			r2 := v.Args[2].Reg()
   182			p := s.Prog(ppc64.AADDC)
   183			p.From.Type = obj.TYPE_CONST
   184			p.From.Offset = -1
   185			p.Reg = r2
   186			p.To.Type = obj.TYPE_REG
   187			p.To.Reg = ppc64.REGTMP
   188			p1 := s.Prog(ppc64.AADDE)
   189			p1.From.Type = obj.TYPE_REG
   190			p1.From.Reg = r1
   191			p1.Reg = r0
   192			p1.To.Type = obj.TYPE_REG
   193			p1.To.Reg = v.Reg0()
   194			p2 := s.Prog(ppc64.AADDZE)
   195			p2.From.Type = obj.TYPE_REG
   196			p2.From.Reg = ppc64.REGZERO
   197			p2.To.Type = obj.TYPE_REG
   198			p2.To.Reg = v.Reg1()
   199	
   200		case ssa.OpPPC64LoweredAtomicAnd8,
   201			ssa.OpPPC64LoweredAtomicOr8:
   202			// LWSYNC
   203			// LBAR		(Rarg0), Rtmp
   204			// AND/OR	Rarg1, Rtmp
   205			// STBCCC	Rtmp, (Rarg0)
   206			// BNE		-3(PC)
   207			r0 := v.Args[0].Reg()
   208			r1 := v.Args[1].Reg()
   209			// LWSYNC - Assuming shared data not write-through-required nor
   210			// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   211			plwsync := s.Prog(ppc64.ALWSYNC)
   212			plwsync.To.Type = obj.TYPE_NONE
   213			p := s.Prog(ppc64.ALBAR)
   214			p.From.Type = obj.TYPE_MEM
   215			p.From.Reg = r0
   216			p.To.Type = obj.TYPE_REG
   217			p.To.Reg = ppc64.REGTMP
   218			p1 := s.Prog(v.Op.Asm())
   219			p1.From.Type = obj.TYPE_REG
   220			p1.From.Reg = r1
   221			p1.To.Type = obj.TYPE_REG
   222			p1.To.Reg = ppc64.REGTMP
   223			p2 := s.Prog(ppc64.ASTBCCC)
   224			p2.From.Type = obj.TYPE_REG
   225			p2.From.Reg = ppc64.REGTMP
   226			p2.To.Type = obj.TYPE_MEM
   227			p2.To.Reg = r0
   228			p2.RegTo2 = ppc64.REGTMP
   229			p3 := s.Prog(ppc64.ABNE)
   230			p3.To.Type = obj.TYPE_BRANCH
   231			gc.Patch(p3, p)
   232	
   233		case ssa.OpPPC64LoweredAtomicAdd32,
   234			ssa.OpPPC64LoweredAtomicAdd64:
   235			// LWSYNC
   236			// LDAR/LWAR    (Rarg0), Rout
   237			// ADD		Rarg1, Rout
   238			// STDCCC/STWCCC Rout, (Rarg0)
   239			// BNE         -3(PC)
   240			// MOVW		Rout,Rout (if Add32)
   241			ld := ppc64.ALDAR
   242			st := ppc64.ASTDCCC
   243			if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   244				ld = ppc64.ALWAR
   245				st = ppc64.ASTWCCC
   246			}
   247			r0 := v.Args[0].Reg()
   248			r1 := v.Args[1].Reg()
   249			out := v.Reg0()
   250			// LWSYNC - Assuming shared data not write-through-required nor
   251			// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   252			plwsync := s.Prog(ppc64.ALWSYNC)
   253			plwsync.To.Type = obj.TYPE_NONE
   254			// LDAR or LWAR
   255			p := s.Prog(ld)
   256			p.From.Type = obj.TYPE_MEM
   257			p.From.Reg = r0
   258			p.To.Type = obj.TYPE_REG
   259			p.To.Reg = out
   260			// ADD reg1,out
   261			p1 := s.Prog(ppc64.AADD)
   262			p1.From.Type = obj.TYPE_REG
   263			p1.From.Reg = r1
   264			p1.To.Reg = out
   265			p1.To.Type = obj.TYPE_REG
   266			// STDCCC or STWCCC
   267			p3 := s.Prog(st)
   268			p3.From.Type = obj.TYPE_REG
   269			p3.From.Reg = out
   270			p3.To.Type = obj.TYPE_MEM
   271			p3.To.Reg = r0
   272			// BNE retry
   273			p4 := s.Prog(ppc64.ABNE)
   274			p4.To.Type = obj.TYPE_BRANCH
   275			gc.Patch(p4, p)
   276	
   277			// Ensure a 32 bit result
   278			if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
   279				p5 := s.Prog(ppc64.AMOVWZ)
   280				p5.To.Type = obj.TYPE_REG
   281				p5.To.Reg = out
   282				p5.From.Type = obj.TYPE_REG
   283				p5.From.Reg = out
   284			}
   285	
   286		case ssa.OpPPC64LoweredAtomicExchange32,
   287			ssa.OpPPC64LoweredAtomicExchange64:
   288			// LWSYNC
   289			// LDAR/LWAR    (Rarg0), Rout
   290			// STDCCC/STWCCC Rout, (Rarg0)
   291			// BNE         -2(PC)
   292			// ISYNC
   293			ld := ppc64.ALDAR
   294			st := ppc64.ASTDCCC
   295			if v.Op == ssa.OpPPC64LoweredAtomicExchange32 {
   296				ld = ppc64.ALWAR
   297				st = ppc64.ASTWCCC
   298			}
   299			r0 := v.Args[0].Reg()
   300			r1 := v.Args[1].Reg()
   301			out := v.Reg0()
   302			// LWSYNC - Assuming shared data not write-through-required nor
   303			// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   304			plwsync := s.Prog(ppc64.ALWSYNC)
   305			plwsync.To.Type = obj.TYPE_NONE
   306			// LDAR or LWAR
   307			p := s.Prog(ld)
   308			p.From.Type = obj.TYPE_MEM
   309			p.From.Reg = r0
   310			p.To.Type = obj.TYPE_REG
   311			p.To.Reg = out
   312			// STDCCC or STWCCC
   313			p1 := s.Prog(st)
   314			p1.From.Type = obj.TYPE_REG
   315			p1.From.Reg = r1
   316			p1.To.Type = obj.TYPE_MEM
   317			p1.To.Reg = r0
   318			// BNE retry
   319			p2 := s.Prog(ppc64.ABNE)
   320			p2.To.Type = obj.TYPE_BRANCH
   321			gc.Patch(p2, p)
   322			// ISYNC
   323			pisync := s.Prog(ppc64.AISYNC)
   324			pisync.To.Type = obj.TYPE_NONE
   325	
   326		case ssa.OpPPC64LoweredAtomicLoad8,
   327			ssa.OpPPC64LoweredAtomicLoad32,
   328			ssa.OpPPC64LoweredAtomicLoad64,
   329			ssa.OpPPC64LoweredAtomicLoadPtr:
   330			// SYNC
   331			// MOVB/MOVD/MOVW (Rarg0), Rout
   332			// CMP Rout,Rout
   333			// BNE 1(PC)
   334			// ISYNC
   335			ld := ppc64.AMOVD
   336			cmp := ppc64.ACMP
   337			switch v.Op {
   338			case ssa.OpPPC64LoweredAtomicLoad8:
   339				ld = ppc64.AMOVBZ
   340			case ssa.OpPPC64LoweredAtomicLoad32:
   341				ld = ppc64.AMOVWZ
   342				cmp = ppc64.ACMPW
   343			}
   344			arg0 := v.Args[0].Reg()
   345			out := v.Reg0()
   346			// SYNC when AuxInt == 1; otherwise, load-acquire
   347			if v.AuxInt == 1 {
   348				psync := s.Prog(ppc64.ASYNC)
   349				psync.To.Type = obj.TYPE_NONE
   350			}
   351			// Load
   352			p := s.Prog(ld)
   353			p.From.Type = obj.TYPE_MEM
   354			p.From.Reg = arg0
   355			p.To.Type = obj.TYPE_REG
   356			p.To.Reg = out
   357			// CMP
   358			p1 := s.Prog(cmp)
   359			p1.From.Type = obj.TYPE_REG
   360			p1.From.Reg = out
   361			p1.To.Type = obj.TYPE_REG
   362			p1.To.Reg = out
   363			// BNE
   364			p2 := s.Prog(ppc64.ABNE)
   365			p2.To.Type = obj.TYPE_BRANCH
   366			// ISYNC
   367			pisync := s.Prog(ppc64.AISYNC)
   368			pisync.To.Type = obj.TYPE_NONE
   369			gc.Patch(p2, pisync)
   370	
   371		case ssa.OpPPC64LoweredAtomicStore32,
   372			ssa.OpPPC64LoweredAtomicStore64:
   373			// SYNC or LWSYNC
   374			// MOVD/MOVW arg1,(arg0)
   375			st := ppc64.AMOVD
   376			if v.Op == ssa.OpPPC64LoweredAtomicStore32 {
   377				st = ppc64.AMOVW
   378			}
   379			arg0 := v.Args[0].Reg()
   380			arg1 := v.Args[1].Reg()
   381			// If AuxInt == 0, LWSYNC (Store-Release), else SYNC
   382			// SYNC
   383			syncOp := ppc64.ASYNC
   384			if v.AuxInt == 0 {
   385				syncOp = ppc64.ALWSYNC
   386			}
   387			psync := s.Prog(syncOp)
   388			psync.To.Type = obj.TYPE_NONE
   389			// Store
   390			p := s.Prog(st)
   391			p.To.Type = obj.TYPE_MEM
   392			p.To.Reg = arg0
   393			p.From.Type = obj.TYPE_REG
   394			p.From.Reg = arg1
   395	
   396		case ssa.OpPPC64LoweredAtomicCas64,
   397			ssa.OpPPC64LoweredAtomicCas32:
   398			// LWSYNC
   399			// loop:
   400			// LDAR        (Rarg0), MutexHint, Rtmp
   401			// CMP         Rarg1, Rtmp
   402			// BNE         fail
   403			// STDCCC      Rarg2, (Rarg0)
   404			// BNE         loop
   405			// LWSYNC      // Only for sequential consistency; not required in CasRel.
   406			// MOVD        $1, Rout
   407			// BR          end
   408			// fail:
   409			// MOVD        $0, Rout
   410			// end:
   411			ld := ppc64.ALDAR
   412			st := ppc64.ASTDCCC
   413			cmp := ppc64.ACMP
   414			if v.Op == ssa.OpPPC64LoweredAtomicCas32 {
   415				ld = ppc64.ALWAR
   416				st = ppc64.ASTWCCC
   417				cmp = ppc64.ACMPW
   418			}
   419			r0 := v.Args[0].Reg()
   420			r1 := v.Args[1].Reg()
   421			r2 := v.Args[2].Reg()
   422			out := v.Reg0()
   423			// LWSYNC - Assuming shared data not write-through-required nor
   424			// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
   425			plwsync1 := s.Prog(ppc64.ALWSYNC)
   426			plwsync1.To.Type = obj.TYPE_NONE
   427			// LDAR or LWAR
   428			p := s.Prog(ld)
   429			p.From.Type = obj.TYPE_MEM
   430			p.From.Reg = r0
   431			p.To.Type = obj.TYPE_REG
   432			p.To.Reg = ppc64.REGTMP
   433			// If it is a Compare-and-Swap-Release operation, set the EH field with
   434			// the release hint.
   435			if v.AuxInt == 0 {
   436				p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0})
   437			}
   438			// CMP reg1,reg2
   439			p1 := s.Prog(cmp)
   440			p1.From.Type = obj.TYPE_REG
   441			p1.From.Reg = r1
   442			p1.To.Reg = ppc64.REGTMP
   443			p1.To.Type = obj.TYPE_REG
   444			// BNE cas_fail
   445			p2 := s.Prog(ppc64.ABNE)
   446			p2.To.Type = obj.TYPE_BRANCH
   447			// STDCCC or STWCCC
   448			p3 := s.Prog(st)
   449			p3.From.Type = obj.TYPE_REG
   450			p3.From.Reg = r2
   451			p3.To.Type = obj.TYPE_MEM
   452			p3.To.Reg = r0
   453			// BNE retry
   454			p4 := s.Prog(ppc64.ABNE)
   455			p4.To.Type = obj.TYPE_BRANCH
   456			gc.Patch(p4, p)
   457			// LWSYNC - Assuming shared data not write-through-required nor
   458			// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
   459			// If the operation is a CAS-Release, then synchronization is not necessary.
   460			if v.AuxInt != 0 {
   461				plwsync2 := s.Prog(ppc64.ALWSYNC)
   462				plwsync2.To.Type = obj.TYPE_NONE
   463			}
   464			// return true
   465			p5 := s.Prog(ppc64.AMOVD)
   466			p5.From.Type = obj.TYPE_CONST
   467			p5.From.Offset = 1
   468			p5.To.Type = obj.TYPE_REG
   469			p5.To.Reg = out
   470			// BR done
   471			p6 := s.Prog(obj.AJMP)
   472			p6.To.Type = obj.TYPE_BRANCH
   473			// return false
   474			p7 := s.Prog(ppc64.AMOVD)
   475			p7.From.Type = obj.TYPE_CONST
   476			p7.From.Offset = 0
   477			p7.To.Type = obj.TYPE_REG
   478			p7.To.Reg = out
   479			gc.Patch(p2, p7)
   480			// done (label)
   481			p8 := s.Prog(obj.ANOP)
   482			gc.Patch(p6, p8)
   483	
   484		case ssa.OpPPC64LoweredGetClosurePtr:
   485			// Closure pointer is R11 (already)
   486			gc.CheckLoweredGetClosurePtr(v)
   487	
   488		case ssa.OpPPC64LoweredGetCallerSP:
   489			// caller's SP is FixedFrameSize below the address of the first arg
   490			p := s.Prog(ppc64.AMOVD)
   491			p.From.Type = obj.TYPE_ADDR
   492			p.From.Offset = -gc.Ctxt.FixedFrameSize()
   493			p.From.Name = obj.NAME_PARAM
   494			p.To.Type = obj.TYPE_REG
   495			p.To.Reg = v.Reg()
   496	
   497		case ssa.OpPPC64LoweredGetCallerPC:
   498			p := s.Prog(obj.AGETCALLERPC)
   499			p.To.Type = obj.TYPE_REG
   500			p.To.Reg = v.Reg()
   501	
   502		case ssa.OpPPC64LoweredRound32F, ssa.OpPPC64LoweredRound64F:
   503			// input is already rounded
   504	
   505		case ssa.OpLoadReg:
   506			loadOp := loadByType(v.Type)
   507			p := s.Prog(loadOp)
   508			gc.AddrAuto(&p.From, v.Args[0])
   509			p.To.Type = obj.TYPE_REG
   510			p.To.Reg = v.Reg()
   511	
   512		case ssa.OpStoreReg:
   513			storeOp := storeByType(v.Type)
   514			p := s.Prog(storeOp)
   515			p.From.Type = obj.TYPE_REG
   516			p.From.Reg = v.Args[0].Reg()
   517			gc.AddrAuto(&p.To, v)
   518	
   519		case ssa.OpPPC64DIVD:
   520			// For now,
   521			//
   522			// cmp arg1, -1
   523			// be  ahead
   524			// v = arg0 / arg1
   525			// b over
   526			// ahead: v = - arg0
   527			// over: nop
   528			r := v.Reg()
   529			r0 := v.Args[0].Reg()
   530			r1 := v.Args[1].Reg()
   531	
   532			p := s.Prog(ppc64.ACMP)
   533			p.From.Type = obj.TYPE_REG
   534			p.From.Reg = r1
   535			p.To.Type = obj.TYPE_CONST
   536			p.To.Offset = -1
   537	
   538			pbahead := s.Prog(ppc64.ABEQ)
   539			pbahead.To.Type = obj.TYPE_BRANCH
   540	
   541			p = s.Prog(v.Op.Asm())
   542			p.From.Type = obj.TYPE_REG
   543			p.From.Reg = r1
   544			p.Reg = r0
   545			p.To.Type = obj.TYPE_REG
   546			p.To.Reg = r
   547	
   548			pbover := s.Prog(obj.AJMP)
   549			pbover.To.Type = obj.TYPE_BRANCH
   550	
   551			p = s.Prog(ppc64.ANEG)
   552			p.To.Type = obj.TYPE_REG
   553			p.To.Reg = r
   554			p.From.Type = obj.TYPE_REG
   555			p.From.Reg = r0
   556			gc.Patch(pbahead, p)
   557	
   558			p = s.Prog(obj.ANOP)
   559			gc.Patch(pbover, p)
   560	
   561		case ssa.OpPPC64DIVW:
   562			// word-width version of above
   563			r := v.Reg()
   564			r0 := v.Args[0].Reg()
   565			r1 := v.Args[1].Reg()
   566	
   567			p := s.Prog(ppc64.ACMPW)
   568			p.From.Type = obj.TYPE_REG
   569			p.From.Reg = r1
   570			p.To.Type = obj.TYPE_CONST
   571			p.To.Offset = -1
   572	
   573			pbahead := s.Prog(ppc64.ABEQ)
   574			pbahead.To.Type = obj.TYPE_BRANCH
   575	
   576			p = s.Prog(v.Op.Asm())
   577			p.From.Type = obj.TYPE_REG
   578			p.From.Reg = r1
   579			p.Reg = r0
   580			p.To.Type = obj.TYPE_REG
   581			p.To.Reg = r
   582	
   583			pbover := s.Prog(obj.AJMP)
   584			pbover.To.Type = obj.TYPE_BRANCH
   585	
   586			p = s.Prog(ppc64.ANEG)
   587			p.To.Type = obj.TYPE_REG
   588			p.To.Reg = r
   589			p.From.Type = obj.TYPE_REG
   590			p.From.Reg = r0
   591			gc.Patch(pbahead, p)
   592	
   593			p = s.Prog(obj.ANOP)
   594			gc.Patch(pbover, p)
   595	
   596		case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
   597			ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
   598			ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
   599			ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
   600			ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
   601			ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
   602			ssa.OpPPC64AND, ssa.OpPPC64ANDCC, ssa.OpPPC64OR, ssa.OpPPC64ORCC, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64XORCC, ssa.OpPPC64EQV:
   603			r := v.Reg()
   604			r1 := v.Args[0].Reg()
   605			r2 := v.Args[1].Reg()
   606			p := s.Prog(v.Op.Asm())
   607			p.From.Type = obj.TYPE_REG
   608			p.From.Reg = r2
   609			p.Reg = r1
   610			p.To.Type = obj.TYPE_REG
   611			p.To.Reg = r
   612	
   613		case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
   614			p := s.Prog(v.Op.Asm())
   615			p.From.Type = obj.TYPE_CONST
   616			p.From.Offset = v.AuxInt
   617			p.Reg = v.Args[0].Reg()
   618			p.To.Type = obj.TYPE_REG
   619			p.To.Reg = v.Reg()
   620	
   621		case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
   622			r := v.Reg()
   623			r1 := v.Args[0].Reg()
   624			r2 := v.Args[1].Reg()
   625			r3 := v.Args[2].Reg()
   626			// r = r1*r2 ± r3
   627			p := s.Prog(v.Op.Asm())
   628			p.From.Type = obj.TYPE_REG
   629			p.From.Reg = r1
   630			p.Reg = r3
   631			p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r2})
   632			p.To.Type = obj.TYPE_REG
   633			p.To.Reg = r
   634	
   635		case ssa.OpPPC64MaskIfNotCarry:
   636			r := v.Reg()
   637			p := s.Prog(v.Op.Asm())
   638			p.From.Type = obj.TYPE_REG
   639			p.From.Reg = ppc64.REGZERO
   640			p.To.Type = obj.TYPE_REG
   641			p.To.Reg = r
   642	
   643		case ssa.OpPPC64ADDconstForCarry:
   644			r1 := v.Args[0].Reg()
   645			p := s.Prog(v.Op.Asm())
   646			p.Reg = r1
   647			p.From.Type = obj.TYPE_CONST
   648			p.From.Offset = v.AuxInt
   649			p.To.Type = obj.TYPE_REG
   650			p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
   651	
   652		case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL,
   653			ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW,
   654			ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS,
   655			ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
   656			r := v.Reg()
   657			p := s.Prog(v.Op.Asm())
   658			p.To.Type = obj.TYPE_REG
   659			p.To.Reg = r
   660			p.From.Type = obj.TYPE_REG
   661			p.From.Reg = v.Args[0].Reg()
   662	
   663		case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst,
   664			ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst:
   665			p := s.Prog(v.Op.Asm())
   666			p.Reg = v.Args[0].Reg()
   667			p.From.Type = obj.TYPE_CONST
   668			p.From.Offset = v.AuxInt
   669			p.To.Type = obj.TYPE_REG
   670			p.To.Reg = v.Reg()
   671	
   672		case ssa.OpPPC64ANDCCconst:
   673			p := s.Prog(v.Op.Asm())
   674			p.Reg = v.Args[0].Reg()
   675	
   676			if v.Aux != nil {
   677				p.From.Type = obj.TYPE_CONST
   678				p.From.Offset = gc.AuxOffset(v)
   679			} else {
   680				p.From.Type = obj.TYPE_CONST
   681				p.From.Offset = v.AuxInt
   682			}
   683	
   684			p.To.Type = obj.TYPE_REG
   685			p.To.Reg = ppc64.REGTMP // discard result
   686	
   687		case ssa.OpPPC64MOVDaddr:
   688			switch v.Aux.(type) {
   689			default:
   690				v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
   691			case nil:
   692				// If aux offset and aux int are both 0, and the same
   693				// input and output regs are used, no instruction
   694				// needs to be generated, since it would just be
   695				// addi rx, rx, 0.
   696				if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
   697					p := s.Prog(ppc64.AMOVD)
   698					p.From.Type = obj.TYPE_ADDR
   699					p.From.Reg = v.Args[0].Reg()
   700					p.From.Offset = v.AuxInt
   701					p.To.Type = obj.TYPE_REG
   702					p.To.Reg = v.Reg()
   703				}
   704	
   705			case *obj.LSym, *gc.Node:
   706				p := s.Prog(ppc64.AMOVD)
   707				p.From.Type = obj.TYPE_ADDR
   708				p.From.Reg = v.Args[0].Reg()
   709				p.To.Type = obj.TYPE_REG
   710				p.To.Reg = v.Reg()
   711				gc.AddAux(&p.From, v)
   712	
   713			}
   714	
   715		case ssa.OpPPC64MOVDconst:
   716			p := s.Prog(v.Op.Asm())
   717			p.From.Type = obj.TYPE_CONST
   718			p.From.Offset = v.AuxInt
   719			p.To.Type = obj.TYPE_REG
   720			p.To.Reg = v.Reg()
   721	
   722		case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst:
   723			p := s.Prog(v.Op.Asm())
   724			p.From.Type = obj.TYPE_FCONST
   725			p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   726			p.To.Type = obj.TYPE_REG
   727			p.To.Reg = v.Reg()
   728	
   729		case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU:
   730			p := s.Prog(v.Op.Asm())
   731			p.From.Type = obj.TYPE_REG
   732			p.From.Reg = v.Args[0].Reg()
   733			p.To.Type = obj.TYPE_REG
   734			p.To.Reg = v.Args[1].Reg()
   735	
   736		case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst:
   737			p := s.Prog(v.Op.Asm())
   738			p.From.Type = obj.TYPE_REG
   739			p.From.Reg = v.Args[0].Reg()
   740			p.To.Type = obj.TYPE_CONST
   741			p.To.Offset = v.AuxInt
   742	
   743		case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg:
   744			// Shift in register to required size
   745			p := s.Prog(v.Op.Asm())
   746			p.From.Type = obj.TYPE_REG
   747			p.From.Reg = v.Args[0].Reg()
   748			p.To.Reg = v.Reg()
   749			p.To.Type = obj.TYPE_REG
   750	
   751		case ssa.OpPPC64MOVDload:
   752	
   753			// MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
   754			// For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
   755			// the offset is not known until link time. If the load of a go.string uses relocation for the
   756			// offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
   757			// To avoid this problem, the full address of the go.string is computed and loaded into the base register,
   758			// and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
   759			// go.string types because other types will have proper alignment.
   760	
   761			gostring := false
   762			switch n := v.Aux.(type) {
   763			case *obj.LSym:
   764				gostring = strings.HasPrefix(n.Name, "go.string.")
   765			}
   766			if gostring {
   767				// Generate full addr of the go.string const
   768				// including AuxInt
   769				p := s.Prog(ppc64.AMOVD)
   770				p.From.Type = obj.TYPE_ADDR
   771				p.From.Reg = v.Args[0].Reg()
   772				gc.AddAux(&p.From, v)
   773				p.To.Type = obj.TYPE_REG
   774				p.To.Reg = v.Reg()
   775				// Load go.string using 0 offset
   776				p = s.Prog(v.Op.Asm())
   777				p.From.Type = obj.TYPE_MEM
   778				p.From.Reg = v.Reg()
   779				p.To.Type = obj.TYPE_REG
   780				p.To.Reg = v.Reg()
   781				break
   782			}
   783			// Not a go.string, generate a normal load
   784			fallthrough
   785	
   786		case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
   787			p := s.Prog(v.Op.Asm())
   788			p.From.Type = obj.TYPE_MEM
   789			p.From.Reg = v.Args[0].Reg()
   790			gc.AddAux(&p.From, v)
   791			p.To.Type = obj.TYPE_REG
   792			p.To.Reg = v.Reg()
   793	
   794		case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
   795			p := s.Prog(v.Op.Asm())
   796			p.From.Type = obj.TYPE_MEM
   797			p.From.Reg = v.Args[0].Reg()
   798			p.To.Type = obj.TYPE_REG
   799			p.To.Reg = v.Reg()
   800	
   801		case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
   802			p := s.Prog(v.Op.Asm())
   803			p.To.Type = obj.TYPE_MEM
   804			p.To.Reg = v.Args[0].Reg()
   805			p.From.Type = obj.TYPE_REG
   806			p.From.Reg = v.Args[1].Reg()
   807	
   808		case ssa.OpPPC64MOVDloadidx, ssa.OpPPC64MOVWloadidx, ssa.OpPPC64MOVHloadidx, ssa.OpPPC64MOVWZloadidx,
   809			ssa.OpPPC64MOVBZloadidx, ssa.OpPPC64MOVHZloadidx, ssa.OpPPC64FMOVDloadidx, ssa.OpPPC64FMOVSloadidx,
   810			ssa.OpPPC64MOVDBRloadidx, ssa.OpPPC64MOVWBRloadidx, ssa.OpPPC64MOVHBRloadidx:
   811			p := s.Prog(v.Op.Asm())
   812			p.From.Type = obj.TYPE_MEM
   813			p.From.Reg = v.Args[0].Reg()
   814			p.From.Index = v.Args[1].Reg()
   815			gc.AddAux(&p.From, v)
   816			p.To.Type = obj.TYPE_REG
   817			p.To.Reg = v.Reg()
   818	
   819		case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
   820			p := s.Prog(v.Op.Asm())
   821			p.From.Type = obj.TYPE_REG
   822			p.From.Reg = ppc64.REGZERO
   823			p.To.Type = obj.TYPE_MEM
   824			p.To.Reg = v.Args[0].Reg()
   825			gc.AddAux(&p.To, v)
   826	
   827		case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
   828			p := s.Prog(v.Op.Asm())
   829			p.From.Type = obj.TYPE_REG
   830			p.From.Reg = v.Args[1].Reg()
   831			p.To.Type = obj.TYPE_MEM
   832			p.To.Reg = v.Args[0].Reg()
   833			gc.AddAux(&p.To, v)
   834	
   835		case ssa.OpPPC64MOVDstoreidx, ssa.OpPPC64MOVWstoreidx, ssa.OpPPC64MOVHstoreidx, ssa.OpPPC64MOVBstoreidx,
   836			ssa.OpPPC64FMOVDstoreidx, ssa.OpPPC64FMOVSstoreidx, ssa.OpPPC64MOVDBRstoreidx, ssa.OpPPC64MOVWBRstoreidx,
   837			ssa.OpPPC64MOVHBRstoreidx:
   838			p := s.Prog(v.Op.Asm())
   839			p.From.Type = obj.TYPE_REG
   840			p.From.Reg = v.Args[2].Reg()
   841			p.To.Index = v.Args[1].Reg()
   842			p.To.Type = obj.TYPE_MEM
   843			p.To.Reg = v.Args[0].Reg()
   844			gc.AddAux(&p.To, v)
   845	
   846		case ssa.OpPPC64Equal,
   847			ssa.OpPPC64NotEqual,
   848			ssa.OpPPC64LessThan,
   849			ssa.OpPPC64FLessThan,
   850			ssa.OpPPC64LessEqual,
   851			ssa.OpPPC64GreaterThan,
   852			ssa.OpPPC64FGreaterThan,
   853			ssa.OpPPC64GreaterEqual:
   854	
   855			// On Power7 or later, can use isel instruction:
   856			// for a < b, a > b, a = b:
   857			//   rtmp := 1
   858			//   isel rt,rtmp,r0,cond // rt is target in ppc asm
   859	
   860			// for  a >= b, a <= b, a != b:
   861			//   rtmp := 1
   862			//   isel rt,0,rtmp,!cond // rt is target in ppc asm
   863	
   864			p := s.Prog(ppc64.AMOVD)
   865			p.From.Type = obj.TYPE_CONST
   866			p.From.Offset = 1
   867			p.To.Type = obj.TYPE_REG
   868			p.To.Reg = iselRegs[1]
   869			iop := iselOps[v.Op]
   870			ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond])
   871	
   872		case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion
   873			ssa.OpPPC64FGreaterEqual:
   874	
   875			p := s.Prog(ppc64.AMOVD)
   876			p.From.Type = obj.TYPE_CONST
   877			p.From.Offset = 1
   878			p.To.Type = obj.TYPE_REG
   879			p.To.Reg = iselRegs[1]
   880			iop := iselOps[v.Op]
   881			ssaGenISEL(s, v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond])
   882			ssaGenISEL(s, v, ppc64.C_COND_EQ, iselRegs[1], v.Reg())
   883	
   884		case ssa.OpPPC64LoweredZero:
   885	
   886			// unaligned data doesn't hurt performance
   887			// for these instructions on power8 or later
   888	
   889			// for sizes >= 64 generate a loop as follows:
   890	
   891			// set up loop counter in CTR, used by BC
   892			//	 MOVD len/32,REG_TMP
   893			//	 MOVD REG_TMP,CTR
   894			//	 loop:
   895			//	 MOVD R0,(R3)
   896			//	 MOVD R0,8(R3)
   897			//	 MOVD R0,16(R3)
   898			//	 MOVD R0,24(R3)
   899			//	 ADD  $32,R3
   900			//	 BC   16, 0, loop
   901			//
   902			// any remainder is done as described below
   903	
   904			// for sizes < 64 bytes, first clear as many doublewords as possible,
   905			// then handle the remainder
   906			//	MOVD R0,(R3)
   907			//	MOVD R0,8(R3)
   908			// .... etc.
   909			//
   910			// the remainder bytes are cleared using one or more
   911			// of the following instructions with the appropriate
   912			// offsets depending which instructions are needed
   913			//
   914			//	MOVW R0,n1(R3)	4 bytes
   915			//	MOVH R0,n2(R3)	2 bytes
   916			//	MOVB R0,n3(R3)	1 byte
   917			//
   918			// 7 bytes: MOVW, MOVH, MOVB
   919			// 6 bytes: MOVW, MOVH
   920			// 5 bytes: MOVW, MOVB
   921			// 3 bytes: MOVH, MOVB
   922	
   923			// each loop iteration does 32 bytes
   924			ctr := v.AuxInt / 32
   925	
   926			// remainder bytes
   927			rem := v.AuxInt % 32
   928	
   929			// only generate a loop if there is more
   930			// than 1 iteration.
   931			if ctr > 1 {
   932				// Set up CTR loop counter
   933				p := s.Prog(ppc64.AMOVD)
   934				p.From.Type = obj.TYPE_CONST
   935				p.From.Offset = ctr
   936				p.To.Type = obj.TYPE_REG
   937				p.To.Reg = ppc64.REGTMP
   938	
   939				p = s.Prog(ppc64.AMOVD)
   940				p.From.Type = obj.TYPE_REG
   941				p.From.Reg = ppc64.REGTMP
   942				p.To.Type = obj.TYPE_REG
   943				p.To.Reg = ppc64.REG_CTR
   944	
   945				// generate 4 MOVDs
   946				// when this is a loop then the top must be saved
   947				var top *obj.Prog
   948				for offset := int64(0); offset < 32; offset += 8 {
   949					// This is the top of loop
   950					p := s.Prog(ppc64.AMOVD)
   951					p.From.Type = obj.TYPE_REG
   952					p.From.Reg = ppc64.REG_R0
   953					p.To.Type = obj.TYPE_MEM
   954					p.To.Reg = v.Args[0].Reg()
   955					p.To.Offset = offset
   956					// Save the top of loop
   957					if top == nil {
   958						top = p
   959					}
   960				}
   961	
   962				// Increment address for the
   963				// 4 doublewords just zeroed.
   964				p = s.Prog(ppc64.AADD)
   965				p.Reg = v.Args[0].Reg()
   966				p.From.Type = obj.TYPE_CONST
   967				p.From.Offset = 32
   968				p.To.Type = obj.TYPE_REG
   969				p.To.Reg = v.Args[0].Reg()
   970	
   971				// Branch back to top of loop
   972				// based on CTR
   973				// BC with BO_BCTR generates bdnz
   974				p = s.Prog(ppc64.ABC)
   975				p.From.Type = obj.TYPE_CONST
   976				p.From.Offset = ppc64.BO_BCTR
   977				p.Reg = ppc64.REG_R0
   978				p.To.Type = obj.TYPE_BRANCH
   979				gc.Patch(p, top)
   980			}
   981	
   982			// when ctr == 1 the loop was not generated but
   983			// there are at least 32 bytes to clear, so add
   984			// that to the remainder to generate the code
   985			// to clear those doublewords
   986			if ctr == 1 {
   987				rem += 32
   988			}
   989	
   990			// clear the remainder starting at offset zero
   991			offset := int64(0)
   992	
   993			// first clear as many doublewords as possible
   994			// then clear remaining sizes as available
   995			for rem > 0 {
   996				op, size := ppc64.AMOVB, int64(1)
   997				switch {
   998				case rem >= 8:
   999					op, size = ppc64.AMOVD, 8
  1000				case rem >= 4:
  1001					op, size = ppc64.AMOVW, 4
  1002				case rem >= 2:
  1003					op, size = ppc64.AMOVH, 2
  1004				}
  1005				p := s.Prog(op)
  1006				p.From.Type = obj.TYPE_REG
  1007				p.From.Reg = ppc64.REG_R0
  1008				p.To.Type = obj.TYPE_MEM
  1009				p.To.Reg = v.Args[0].Reg()
  1010				p.To.Offset = offset
  1011				rem -= size
  1012				offset += size
  1013			}
  1014	
  1015		case ssa.OpPPC64LoweredMove:
  1016	
  1017			// This will be used when moving more
  1018			// than 8 bytes.  Moves start with
  1019			// as many 8 byte moves as possible, then
  1020			// 4, 2, or 1 byte(s) as remaining.  This will
  1021			// work and be efficient for power8 or later.
  1022			// If there are 64 or more bytes, then a
  1023			// loop is generated to move 32 bytes and
  1024			// update the src and dst addresses on each
  1025			// iteration. When < 64 bytes, the appropriate
  1026			// number of moves are generated based on the
  1027			// size.
  1028			// When moving >= 64 bytes a loop is used
  1029			//	MOVD len/32,REG_TMP
  1030			//	MOVD REG_TMP,CTR
  1031			// top:
  1032			//	MOVD (R4),R7
  1033			//	MOVD 8(R4),R8
  1034			//	MOVD 16(R4),R9
  1035			//	MOVD 24(R4),R10
  1036			//	ADD  R4,$32
  1037			//	MOVD R7,(R3)
  1038			//	MOVD R8,8(R3)
  1039			//	MOVD R9,16(R3)
  1040			//	MOVD R10,24(R3)
  1041			//	ADD  R3,$32
  1042			//	BC 16,0,top
  1043			// Bytes not moved by this loop are moved
  1044			// with a combination of the following instructions,
  1045			// starting with the largest sizes and generating as
  1046			// many as needed, using the appropriate offset value.
  1047			//	MOVD  n(R4),R7
  1048			//	MOVD  R7,n(R3)
  1049			//	MOVW  n1(R4),R7
  1050			//	MOVW  R7,n1(R3)
  1051			//	MOVH  n2(R4),R7
  1052			//	MOVH  R7,n2(R3)
  1053			//	MOVB  n3(R4),R7
  1054			//	MOVB  R7,n3(R3)
  1055	
  1056			// Each loop iteration moves 32 bytes
  1057			ctr := v.AuxInt / 32
  1058	
  1059			// Remainder after the loop
  1060			rem := v.AuxInt % 32
  1061	
  1062			dst_reg := v.Args[0].Reg()
  1063			src_reg := v.Args[1].Reg()
  1064	
  1065			// The set of registers used here, must match the clobbered reg list
  1066			// in PPC64Ops.go.
  1067			useregs := []int16{ppc64.REG_R7, ppc64.REG_R8, ppc64.REG_R9, ppc64.REG_R10}
  1068			offset := int64(0)
  1069	
  1070			// top of the loop
  1071			var top *obj.Prog
  1072			// Only generate looping code when loop counter is > 1 for >= 64 bytes
  1073			if ctr > 1 {
  1074				// Set up the CTR
  1075				p := s.Prog(ppc64.AMOVD)
  1076				p.From.Type = obj.TYPE_CONST
  1077				p.From.Offset = ctr
  1078				p.To.Type = obj.TYPE_REG
  1079				p.To.Reg = ppc64.REGTMP
  1080	
  1081				p = s.Prog(ppc64.AMOVD)
  1082				p.From.Type = obj.TYPE_REG
  1083				p.From.Reg = ppc64.REGTMP
  1084				p.To.Type = obj.TYPE_REG
  1085				p.To.Reg = ppc64.REG_CTR
  1086	
  1087				// Generate all the MOVDs for loads
  1088				// based off the same register, increasing
  1089				// the offset by 8 for each instruction
  1090				for _, rg := range useregs {
  1091					p := s.Prog(ppc64.AMOVD)
  1092					p.From.Type = obj.TYPE_MEM
  1093					p.From.Reg = src_reg
  1094					p.From.Offset = offset
  1095					p.To.Type = obj.TYPE_REG
  1096					p.To.Reg = rg
  1097					if top == nil {
  1098						top = p
  1099					}
  1100					offset += 8
  1101				}
  1102				// increment the src_reg for next iteration
  1103				p = s.Prog(ppc64.AADD)
  1104				p.Reg = src_reg
  1105				p.From.Type = obj.TYPE_CONST
  1106				p.From.Offset = 32
  1107				p.To.Type = obj.TYPE_REG
  1108				p.To.Reg = src_reg
  1109	
  1110				// generate the MOVDs for stores, based
  1111				// off the same register, using the same
  1112				// offsets as in the loads.
  1113				offset = int64(0)
  1114				for _, rg := range useregs {
  1115					p := s.Prog(ppc64.AMOVD)
  1116					p.From.Type = obj.TYPE_REG
  1117					p.From.Reg = rg
  1118					p.To.Type = obj.TYPE_MEM
  1119					p.To.Reg = dst_reg
  1120					p.To.Offset = offset
  1121					offset += 8
  1122				}
  1123				// increment the dst_reg for next iteration
  1124				p = s.Prog(ppc64.AADD)
  1125				p.Reg = dst_reg
  1126				p.From.Type = obj.TYPE_CONST
  1127				p.From.Offset = 32
  1128				p.To.Type = obj.TYPE_REG
  1129				p.To.Reg = dst_reg
  1130	
  1131				// BC with BO_BCTR generates bdnz to branch on nonzero CTR
  1132				// to loop top.
  1133				p = s.Prog(ppc64.ABC)
  1134				p.From.Type = obj.TYPE_CONST
  1135				p.From.Offset = ppc64.BO_BCTR
  1136				p.Reg = ppc64.REG_R0
  1137				p.To.Type = obj.TYPE_BRANCH
  1138				gc.Patch(p, top)
  1139	
  1140				// src_reg and dst_reg were incremented in the loop, so
  1141				// later instructions start with offset 0.
  1142				offset = int64(0)
  1143			}
  1144	
  1145			// No loop was generated for one iteration, so
  1146			// add 32 bytes to the remainder to move those bytes.
  1147			if ctr == 1 {
  1148				rem += 32
  1149			}
  1150	
  1151			// Generate all the remaining load and store pairs, starting with
  1152			// as many 8 byte moves as possible, then 4, 2, 1.
  1153			for rem > 0 {
  1154				op, size := ppc64.AMOVB, int64(1)
  1155				switch {
  1156				case rem >= 8:
  1157					op, size = ppc64.AMOVD, 8
  1158				case rem >= 4:
  1159					op, size = ppc64.AMOVW, 4
  1160				case rem >= 2:
  1161					op, size = ppc64.AMOVH, 2
  1162				}
  1163				// Load
  1164				p := s.Prog(op)
  1165				p.To.Type = obj.TYPE_REG
  1166				p.To.Reg = ppc64.REG_R7
  1167				p.From.Type = obj.TYPE_MEM
  1168				p.From.Reg = src_reg
  1169				p.From.Offset = offset
  1170	
  1171				// Store
  1172				p = s.Prog(op)
  1173				p.From.Type = obj.TYPE_REG
  1174				p.From.Reg = ppc64.REG_R7
  1175				p.To.Type = obj.TYPE_MEM
  1176				p.To.Reg = dst_reg
  1177				p.To.Offset = offset
  1178				rem -= size
  1179				offset += size
  1180			}
  1181	
  1182		case ssa.OpPPC64CALLstatic:
  1183			s.Call(v)
  1184	
  1185		case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter:
  1186			p := s.Prog(ppc64.AMOVD)
  1187			p.From.Type = obj.TYPE_REG
  1188			p.From.Reg = v.Args[0].Reg()
  1189			p.To.Type = obj.TYPE_REG
  1190			p.To.Reg = ppc64.REG_CTR
  1191	
  1192			if v.Args[0].Reg() != ppc64.REG_R12 {
  1193				v.Fatalf("Function address for %v should be in R12 %d but is in %d", v.LongString(), ppc64.REG_R12, p.From.Reg)
  1194			}
  1195	
  1196			pp := s.Call(v)
  1197			pp.To.Reg = ppc64.REG_CTR
  1198	
  1199			if gc.Ctxt.Flag_shared {
  1200				// When compiling Go into PIC, the function we just
  1201				// called via pointer might have been implemented in
  1202				// a separate module and so overwritten the TOC
  1203				// pointer in R2; reload it.
  1204				q := s.Prog(ppc64.AMOVD)
  1205				q.From.Type = obj.TYPE_MEM
  1206				q.From.Offset = 24
  1207				q.From.Reg = ppc64.REGSP
  1208				q.To.Type = obj.TYPE_REG
  1209				q.To.Reg = ppc64.REG_R2
  1210			}
  1211	
  1212		case ssa.OpPPC64LoweredWB:
  1213			p := s.Prog(obj.ACALL)
  1214			p.To.Type = obj.TYPE_MEM
  1215			p.To.Name = obj.NAME_EXTERN
  1216			p.To.Sym = v.Aux.(*obj.LSym)
  1217	
  1218		case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
  1219			p := s.Prog(obj.ACALL)
  1220			p.To.Type = obj.TYPE_MEM
  1221			p.To.Name = obj.NAME_EXTERN
  1222			p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
  1223			s.UseArgs(16) // space used in callee args area by assembly stubs
  1224	
  1225		case ssa.OpPPC64LoweredNilCheck:
  1226			if objabi.GOOS == "aix" {
  1227				// CMP Rarg0, R0
  1228				// BNE 2(PC)
  1229				// STW R0, 0(R0)
  1230				// NOP (so the BNE has somewhere to land)
  1231	
  1232				// CMP Rarg0, R0
  1233				p := s.Prog(ppc64.ACMP)
  1234				p.From.Type = obj.TYPE_REG
  1235				p.From.Reg = v.Args[0].Reg()
  1236				p.To.Type = obj.TYPE_REG
  1237				p.To.Reg = ppc64.REG_R0
  1238	
  1239				// BNE 2(PC)
  1240				p2 := s.Prog(ppc64.ABNE)
  1241				p2.To.Type = obj.TYPE_BRANCH
  1242	
  1243				// STW R0, 0(R0)
  1244				// Write at 0 is forbidden and will trigger a SIGSEGV
  1245				p = s.Prog(ppc64.AMOVW)
  1246				p.From.Type = obj.TYPE_REG
  1247				p.From.Reg = ppc64.REG_R0
  1248				p.To.Type = obj.TYPE_MEM
  1249				p.To.Reg = ppc64.REG_R0
  1250	
  1251				// NOP (so the BNE has somewhere to land)
  1252				nop := s.Prog(obj.ANOP)
  1253				gc.Patch(p2, nop)
  1254	
  1255			} else {
  1256				// Issue a load which will fault if arg is nil.
  1257				p := s.Prog(ppc64.AMOVBZ)
  1258				p.From.Type = obj.TYPE_MEM
  1259				p.From.Reg = v.Args[0].Reg()
  1260				gc.AddAux(&p.From, v)
  1261				p.To.Type = obj.TYPE_REG
  1262				p.To.Reg = ppc64.REGTMP
  1263			}
  1264			if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1265				gc.Warnl(v.Pos, "generated nil check")
  1266			}
  1267	
  1268		case ssa.OpPPC64InvertFlags:
  1269			v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1270		case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT:
  1271			v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1272		case ssa.OpClobber:
  1273			// TODO: implement for clobberdead experiment. Nop is ok for now.
  1274		default:
  1275			v.Fatalf("genValue not implemented: %s", v.LongString())
  1276		}
  1277	}
  1278	
  1279	var blockJump = [...]struct {
  1280		asm, invasm     obj.As
  1281		asmeq, invasmun bool
  1282	}{
  1283		ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
  1284		ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
  1285	
  1286		ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1287		ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
  1288		ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
  1289		ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1290	
  1291		// TODO: need to work FP comparisons into block jumps
  1292		ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGE, false, false},
  1293		ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, true}, // GE = GT or EQ; !GE = LT or UN
  1294		ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, true}, // LE = LT or EQ; !LE = GT or UN
  1295		ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLE, false, false},
  1296	}
  1297	
  1298	func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
  1299		switch b.Kind {
  1300		case ssa.BlockDefer:
  1301			// defer returns in R3:
  1302			// 0 if we should continue executing
  1303			// 1 if we should jump to deferreturn call
  1304			p := s.Prog(ppc64.ACMP)
  1305			p.From.Type = obj.TYPE_REG
  1306			p.From.Reg = ppc64.REG_R3
  1307			p.To.Type = obj.TYPE_REG
  1308			p.To.Reg = ppc64.REG_R0
  1309	
  1310			p = s.Prog(ppc64.ABNE)
  1311			p.To.Type = obj.TYPE_BRANCH
  1312			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1313			if b.Succs[0].Block() != next {
  1314				p := s.Prog(obj.AJMP)
  1315				p.To.Type = obj.TYPE_BRANCH
  1316				s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1317			}
  1318	
  1319		case ssa.BlockPlain:
  1320			if b.Succs[0].Block() != next {
  1321				p := s.Prog(obj.AJMP)
  1322				p.To.Type = obj.TYPE_BRANCH
  1323				s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1324			}
  1325		case ssa.BlockExit:
  1326		case ssa.BlockRet:
  1327			s.Prog(obj.ARET)
  1328		case ssa.BlockRetJmp:
  1329			p := s.Prog(obj.AJMP)
  1330			p.To.Type = obj.TYPE_MEM
  1331			p.To.Name = obj.NAME_EXTERN
  1332			p.To.Sym = b.Aux.(*obj.LSym)
  1333	
  1334		case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
  1335			ssa.BlockPPC64LT, ssa.BlockPPC64GE,
  1336			ssa.BlockPPC64LE, ssa.BlockPPC64GT,
  1337			ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
  1338			ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
  1339			jmp := blockJump[b.Kind]
  1340			switch next {
  1341			case b.Succs[0].Block():
  1342				s.Br(jmp.invasm, b.Succs[1].Block())
  1343				if jmp.invasmun {
  1344					// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  1345					s.Br(ppc64.ABVS, b.Succs[1].Block())
  1346				}
  1347			case b.Succs[1].Block():
  1348				s.Br(jmp.asm, b.Succs[0].Block())
  1349				if jmp.asmeq {
  1350					s.Br(ppc64.ABEQ, b.Succs[0].Block())
  1351				}
  1352			default:
  1353				if b.Likely != ssa.BranchUnlikely {
  1354					s.Br(jmp.asm, b.Succs[0].Block())
  1355					if jmp.asmeq {
  1356						s.Br(ppc64.ABEQ, b.Succs[0].Block())
  1357					}
  1358					s.Br(obj.AJMP, b.Succs[1].Block())
  1359				} else {
  1360					s.Br(jmp.invasm, b.Succs[1].Block())
  1361					if jmp.invasmun {
  1362						// TODO: The second branch is probably predict-not-taken since it is for FP unordered
  1363						s.Br(ppc64.ABVS, b.Succs[1].Block())
  1364					}
  1365					s.Br(obj.AJMP, b.Succs[0].Block())
  1366				}
  1367			}
  1368		default:
  1369			b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
  1370		}
  1371	}
  1372	

View as plain text