...

Source file src/pkg/cmd/compile/internal/ssa/branchelim.go

     1	// Copyright 2017 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package ssa
     6	
     7	// branchelim tries to eliminate branches by
     8	// generating CondSelect instructions.
     9	//
    10	// Search for basic blocks that look like
    11	//
    12	// bb0            bb0
    13	//  | \          /   \
    14	//  | bb1  or  bb1   bb2    <- trivial if/else blocks
    15	//  | /          \   /
    16	// bb2            bb3
    17	//
    18	// where the intermediate blocks are mostly empty (with no side-effects);
    19	// rewrite Phis in the postdominator as CondSelects.
    20	func branchelim(f *Func) {
    21		// FIXME: add support for lowering CondSelects on more architectures
    22		switch f.Config.arch {
    23		case "arm64", "amd64":
    24			// implemented
    25		default:
    26			return
    27		}
    28	
    29		// Find all the values used in computing the address of any load.
    30		// Typically these values have operations like AddPtr, Lsh64x64, etc.
    31		loadAddr := f.newSparseSet(f.NumValues())
    32		defer f.retSparseSet(loadAddr)
    33		for _, b := range f.Blocks {
    34			for _, v := range b.Values {
    35				switch v.Op {
    36				case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32:
    37					loadAddr.add(v.Args[0].ID)
    38				case OpMove:
    39					loadAddr.add(v.Args[1].ID)
    40				}
    41			}
    42		}
    43		po := f.postorder()
    44		for {
    45			n := loadAddr.size()
    46			for _, b := range po {
    47				for i := len(b.Values) - 1; i >= 0; i-- {
    48					v := b.Values[i]
    49					if !loadAddr.contains(v.ID) {
    50						continue
    51					}
    52					for _, a := range v.Args {
    53						if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
    54							loadAddr.add(a.ID)
    55						}
    56					}
    57				}
    58			}
    59			if loadAddr.size() == n {
    60				break
    61			}
    62		}
    63	
    64		change := true
    65		for change {
    66			change = false
    67			for _, b := range f.Blocks {
    68				change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
    69			}
    70		}
    71	}
    72	
    73	func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
    74		if loadAddr.contains(v.ID) {
    75			// The result of the soon-to-be conditional move is used to compute a load address.
    76			// We want to avoid generating a conditional move in this case
    77			// because the load address would now be data-dependent on the condition.
    78			// Previously it would only be control-dependent on the condition, which is faster
    79			// if the branch predicts well (or possibly even if it doesn't, if the load will
    80			// be an expensive cache miss).
    81			// See issue #26306.
    82			return false
    83		}
    84		// For now, stick to simple scalars that fit in registers
    85		switch {
    86		case v.Type.Size() > v.Block.Func.Config.RegSize:
    87			return false
    88		case v.Type.IsPtrShaped():
    89			return true
    90		case v.Type.IsInteger():
    91			if arch == "amd64" && v.Type.Size() < 2 {
    92				// amd64 doesn't support CMOV with byte registers
    93				return false
    94			}
    95			return true
    96		default:
    97			return false
    98		}
    99	}
   100	
   101	// elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
   102	// loadAddr is a set of values which are used to compute the address of a load.
   103	// Those values are exempt from CMOV generation.
   104	func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
   105		// See if dom is an If with one arm that
   106		// is trivial and succeeded by the other
   107		// successor of dom.
   108		if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
   109			return false
   110		}
   111		var simple, post *Block
   112		for i := range dom.Succs {
   113			bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
   114			if isLeafPlain(bb) && bb.Succs[0].Block() == other {
   115				simple = bb
   116				post = other
   117				break
   118			}
   119		}
   120		if simple == nil || len(post.Preds) != 2 || post == dom {
   121			return false
   122		}
   123	
   124		// We've found our diamond CFG of blocks.
   125		// Now decide if fusing 'simple' into dom+post
   126		// looks profitable.
   127	
   128		// Check that there are Phis, and that all of them
   129		// can be safely rewritten to CondSelect.
   130		hasphis := false
   131		for _, v := range post.Values {
   132			if v.Op == OpPhi {
   133				hasphis = true
   134				if !canCondSelect(v, f.Config.arch, loadAddr) {
   135					return false
   136				}
   137			}
   138		}
   139		if !hasphis {
   140			return false
   141		}
   142	
   143		// Pick some upper bound for the number of instructions
   144		// we'd be willing to execute just to generate a dead
   145		// argument to CondSelect. In the worst case, this is
   146		// the number of useless instructions executed.
   147		const maxfuseinsts = 2
   148	
   149		if len(simple.Values) > maxfuseinsts || !allTrivial(simple) {
   150			return false
   151		}
   152	
   153		// Replace Phi instructions in b with CondSelect instructions
   154		swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
   155		for _, v := range post.Values {
   156			if v.Op != OpPhi {
   157				continue
   158			}
   159			v.Op = OpCondSelect
   160			if swap {
   161				v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   162			}
   163			v.AddArg(dom.Control)
   164		}
   165	
   166		// Put all of the instructions into 'dom'
   167		// and update the CFG appropriately.
   168		dom.Kind = post.Kind
   169		dom.SetControl(post.Control)
   170		dom.Aux = post.Aux
   171		dom.Succs = append(dom.Succs[:0], post.Succs...)
   172		for i := range dom.Succs {
   173			e := dom.Succs[i]
   174			e.b.Preds[e.i].b = dom
   175		}
   176	
   177		for i := range simple.Values {
   178			simple.Values[i].Block = dom
   179		}
   180		for i := range post.Values {
   181			post.Values[i].Block = dom
   182		}
   183		dom.Values = append(dom.Values, simple.Values...)
   184		dom.Values = append(dom.Values, post.Values...)
   185	
   186		// Trash 'post' and 'simple'
   187		clobberBlock(post)
   188		clobberBlock(simple)
   189	
   190		f.invalidateCFG()
   191		return true
   192	}
   193	
   194	// is this a BlockPlain with one predecessor?
   195	func isLeafPlain(b *Block) bool {
   196		return b.Kind == BlockPlain && len(b.Preds) == 1
   197	}
   198	
   199	func clobberBlock(b *Block) {
   200		b.Values = nil
   201		b.Preds = nil
   202		b.Succs = nil
   203		b.Aux = nil
   204		b.SetControl(nil)
   205		b.Likely = BranchUnknown
   206		b.Kind = BlockInvalid
   207	}
   208	
   209	// elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
   210	// loadAddr is a set of values which are used to compute the address of a load.
   211	// Those values are exempt from CMOV generation.
   212	func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
   213		// See if 'b' ends in an if/else: it should
   214		// have two successors, both of which are BlockPlain
   215		// and succeeded by the same block.
   216		if b.Kind != BlockIf || b.Likely != BranchUnknown {
   217			return false
   218		}
   219		yes, no := b.Succs[0].Block(), b.Succs[1].Block()
   220		if !isLeafPlain(yes) || len(yes.Values) > 1 || !allTrivial(yes) {
   221			return false
   222		}
   223		if !isLeafPlain(no) || len(no.Values) > 1 || !allTrivial(no) {
   224			return false
   225		}
   226		if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
   227			return false
   228		}
   229		// block that postdominates the if/else
   230		post := b.Succs[0].Block().Succs[0].Block()
   231		if len(post.Preds) != 2 || post == b {
   232			return false
   233		}
   234		hasphis := false
   235		for _, v := range post.Values {
   236			if v.Op == OpPhi {
   237				hasphis = true
   238				if !canCondSelect(v, f.Config.arch, loadAddr) {
   239					return false
   240				}
   241			}
   242		}
   243		if !hasphis {
   244			return false
   245		}
   246	
   247		// Don't generate CondSelects if branch is cheaper.
   248		if !shouldElimIfElse(no, yes, post, f.Config.arch) {
   249			return false
   250		}
   251	
   252		// now we're committed: rewrite each Phi as a CondSelect
   253		swap := post.Preds[0].Block() != b.Succs[0].Block()
   254		for _, v := range post.Values {
   255			if v.Op != OpPhi {
   256				continue
   257			}
   258			v.Op = OpCondSelect
   259			if swap {
   260				v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   261			}
   262			v.AddArg(b.Control)
   263		}
   264	
   265		// Move the contents of all of these
   266		// blocks into 'b' and update CFG edges accordingly
   267		b.Kind = post.Kind
   268		b.SetControl(post.Control)
   269		b.Aux = post.Aux
   270		b.Succs = append(b.Succs[:0], post.Succs...)
   271		for i := range b.Succs {
   272			e := b.Succs[i]
   273			e.b.Preds[e.i].b = b
   274		}
   275		for i := range post.Values {
   276			post.Values[i].Block = b
   277		}
   278		for i := range yes.Values {
   279			yes.Values[i].Block = b
   280		}
   281		for i := range no.Values {
   282			no.Values[i].Block = b
   283		}
   284		b.Values = append(b.Values, yes.Values...)
   285		b.Values = append(b.Values, no.Values...)
   286		b.Values = append(b.Values, post.Values...)
   287	
   288		// trash post, yes, and no
   289		clobberBlock(yes)
   290		clobberBlock(no)
   291		clobberBlock(post)
   292	
   293		f.invalidateCFG()
   294		return true
   295	}
   296	
   297	// shouldElimIfElse reports whether estimated cost of eliminating branch
   298	// is lower than threshold.
   299	func shouldElimIfElse(no, yes, post *Block, arch string) bool {
   300		switch arch {
   301		default:
   302			return true
   303		case "amd64":
   304			const maxcost = 2
   305			phi := 0
   306			other := 0
   307			for _, v := range post.Values {
   308				if v.Op == OpPhi {
   309					// Each phi results in CondSelect, which lowers into CMOV,
   310					// CMOV has latency >1 on most CPUs.
   311					phi++
   312				}
   313				for _, x := range v.Args {
   314					if x.Block == no || x.Block == yes {
   315						other++
   316					}
   317				}
   318			}
   319			cost := phi * 1
   320			if phi > 1 {
   321				// If we have more than 1 phi and some values in post have args
   322				// in yes or no blocks, we may have to recalucalte condition, because
   323				// those args may clobber flags. For now assume that all operations clobber flags.
   324				cost += other * 1
   325			}
   326			return cost < maxcost
   327		}
   328	}
   329	
   330	func allTrivial(b *Block) bool {
   331		// don't fuse memory ops, Phi ops, divides (can panic),
   332		// or anything else with side-effects
   333		for _, v := range b.Values {
   334			if v.Op == OpPhi || isDivMod(v.Op) || v.Type.IsMemory() ||
   335				v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
   336				return false
   337			}
   338		}
   339		return true
   340	}
   341	
   342	func isDivMod(op Op) bool {
   343		switch op {
   344		case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
   345			OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
   346			OpDiv32F, OpDiv64F,
   347			OpMod8, OpMod8u, OpMod16, OpMod16u,
   348			OpMod32, OpMod32u, OpMod64, OpMod64u:
   349			return true
   350		default:
   351			return false
   352		}
   353	}
   354	

View as plain text