Source file src/pkg/cmd/compile/internal/ssa/rewrite.go

     1	// Copyright 2015 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package ssa
     6	
     7	import (
     8		"cmd/compile/internal/types"
     9		"cmd/internal/obj"
    10		"cmd/internal/objabi"
    11		"cmd/internal/src"
    12		"encoding/binary"
    13		"fmt"
    14		"io"
    15		"math"
    16		"math/bits"
    17		"os"
    18		"path/filepath"
    19	)
    20	
    21	func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) {
    22		// repeat rewrites until we find no more rewrites
    23		pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    24		pendingLines.clear()
    25		for {
    26			change := false
    27			for _, b := range f.Blocks {
    28				if b.Control != nil && b.Control.Op == OpCopy {
    29					for b.Control.Op == OpCopy {
    30						b.SetControl(b.Control.Args[0])
    31					}
    32				}
    33				if rb(b) {
    34					change = true
    35				}
    36				for j, v := range b.Values {
    37					change = phielimValue(v) || change
    38	
    39					// Eliminate copy inputs.
    40					// If any copy input becomes unused, mark it
    41					// as invalid and discard its argument. Repeat
    42					// recursively on the discarded argument.
    43					// This phase helps remove phantom "dead copy" uses
    44					// of a value so that a x.Uses==1 rule condition
    45					// fires reliably.
    46					for i, a := range v.Args {
    47						if a.Op != OpCopy {
    48							continue
    49						}
    50						aa := copySource(a)
    51						v.SetArg(i, aa)
    52						// If a, a copy, has a line boundary indicator, attempt to find a new value
    53						// to hold it.  The first candidate is the value that will replace a (aa),
    54						// if it shares the same block and line and is eligible.
    55						// The second option is v, which has a as an input.  Because aa is earlier in
    56						// the data flow, it is the better choice.
    57						if a.Pos.IsStmt() == src.PosIsStmt {
    58							if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
    59								aa.Pos = aa.Pos.WithIsStmt()
    60							} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
    61								v.Pos = v.Pos.WithIsStmt()
    62							} else {
    63								// Record the lost line and look for a new home after all rewrites are complete.
    64								// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
    65								// line to appear in more than one block, but only one block is stored, so if both end
    66								// up here, then one will be lost.
    67								pendingLines.set(a.Pos, int32(a.Block.ID))
    68							}
    69							a.Pos = a.Pos.WithNotStmt()
    70						}
    71						change = true
    72						for a.Uses == 0 {
    73							b := a.Args[0]
    74							a.reset(OpInvalid)
    75							a = b
    76						}
    77					}
    78	
    79					// apply rewrite function
    80					if rv(v) {
    81						change = true
    82						// If value changed to a poor choice for a statement boundary, move the boundary
    83						if v.Pos.IsStmt() == src.PosIsStmt {
    84							if k := nextGoodStatementIndex(v, j, b); k != j {
    85								v.Pos = v.Pos.WithNotStmt()
    86								b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
    87							}
    88						}
    89					}
    90				}
    91			}
    92			if !change {
    93				break
    94			}
    95		}
    96		// remove clobbered values
    97		for _, b := range f.Blocks {
    98			j := 0
    99			for i, v := range b.Values {
   100				vl := v.Pos
   101				if v.Op == OpInvalid {
   102					if v.Pos.IsStmt() == src.PosIsStmt {
   103						pendingLines.set(vl, int32(b.ID))
   104					}
   105					f.freeValue(v)
   106					continue
   107				}
   108				if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
   109					pendingLines.remove(vl)
   110					v.Pos = v.Pos.WithIsStmt()
   111				}
   112				if i != j {
   113					b.Values[j] = v
   114				}
   115				j++
   116			}
   117			if pendingLines.get(b.Pos) == int32(b.ID) {
   118				b.Pos = b.Pos.WithIsStmt()
   119				pendingLines.remove(b.Pos)
   120			}
   121			if j != len(b.Values) {
   122				tail := b.Values[j:]
   123				for j := range tail {
   124					tail[j] = nil
   125				}
   126				b.Values = b.Values[:j]
   127			}
   128		}
   129	}
   130	
   131	// Common functions called from rewriting rules
   132	
   133	func is64BitFloat(t *types.Type) bool {
   134		return t.Size() == 8 && t.IsFloat()
   135	}
   136	
   137	func is32BitFloat(t *types.Type) bool {
   138		return t.Size() == 4 && t.IsFloat()
   139	}
   140	
   141	func is64BitInt(t *types.Type) bool {
   142		return t.Size() == 8 && t.IsInteger()
   143	}
   144	
   145	func is32BitInt(t *types.Type) bool {
   146		return t.Size() == 4 && t.IsInteger()
   147	}
   148	
   149	func is16BitInt(t *types.Type) bool {
   150		return t.Size() == 2 && t.IsInteger()
   151	}
   152	
   153	func is8BitInt(t *types.Type) bool {
   154		return t.Size() == 1 && t.IsInteger()
   155	}
   156	
   157	func isPtr(t *types.Type) bool {
   158		return t.IsPtrShaped()
   159	}
   160	
   161	func isSigned(t *types.Type) bool {
   162		return t.IsSigned()
   163	}
   164	
   165	// mergeSym merges two symbolic offsets. There is no real merging of
   166	// offsets, we just pick the non-nil one.
   167	func mergeSym(x, y interface{}) interface{} {
   168		if x == nil {
   169			return y
   170		}
   171		if y == nil {
   172			return x
   173		}
   174		panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
   175	}
   176	func canMergeSym(x, y interface{}) bool {
   177		return x == nil || y == nil
   178	}
   179	
   180	// canMergeLoadClobber reports whether the load can be merged into target without
   181	// invalidating the schedule.
   182	// It also checks that the other non-load argument x is something we
   183	// are ok with clobbering.
   184	func canMergeLoadClobber(target, load, x *Value) bool {
   185		// The register containing x is going to get clobbered.
   186		// Don't merge if we still need the value of x.
   187		// We don't have liveness information here, but we can
   188		// approximate x dying with:
   189		//  1) target is x's only use.
   190		//  2) target is not in a deeper loop than x.
   191		if x.Uses != 1 {
   192			return false
   193		}
   194		loopnest := x.Block.Func.loopnest()
   195		loopnest.calculateDepths()
   196		if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   197			return false
   198		}
   199		return canMergeLoad(target, load)
   200	}
   201	
   202	// canMergeLoad reports whether the load can be merged into target without
   203	// invalidating the schedule.
   204	func canMergeLoad(target, load *Value) bool {
   205		if target.Block.ID != load.Block.ID {
   206			// If the load is in a different block do not merge it.
   207			return false
   208		}
   209	
   210		// We can't merge the load into the target if the load
   211		// has more than one use.
   212		if load.Uses != 1 {
   213			return false
   214		}
   215	
   216		mem := load.MemoryArg()
   217	
   218		// We need the load's memory arg to still be alive at target. That
   219		// can't be the case if one of target's args depends on a memory
   220		// state that is a successor of load's memory arg.
   221		//
   222		// For example, it would be invalid to merge load into target in
   223		// the following situation because newmem has killed oldmem
   224		// before target is reached:
   225		//     load = read ... oldmem
   226		//   newmem = write ... oldmem
   227		//     arg0 = read ... newmem
   228		//   target = add arg0 load
   229		//
   230		// If the argument comes from a different block then we can exclude
   231		// it immediately because it must dominate load (which is in the
   232		// same block as target).
   233		var args []*Value
   234		for _, a := range target.Args {
   235			if a != load && a.Block.ID == target.Block.ID {
   236				args = append(args, a)
   237			}
   238		}
   239	
   240		// memPreds contains memory states known to be predecessors of load's
   241		// memory state. It is lazily initialized.
   242		var memPreds map[*Value]bool
   243		for i := 0; len(args) > 0; i++ {
   244			const limit = 100
   245			if i >= limit {
   246				// Give up if we have done a lot of iterations.
   247				return false
   248			}
   249			v := args[len(args)-1]
   250			args = args[:len(args)-1]
   251			if target.Block.ID != v.Block.ID {
   252				// Since target and load are in the same block
   253				// we can stop searching when we leave the block.
   254				continue
   255			}
   256			if v.Op == OpPhi {
   257				// A Phi implies we have reached the top of the block.
   258				// The memory phi, if it exists, is always
   259				// the first logical store in the block.
   260				continue
   261			}
   262			if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   263				// We could handle this situation however it is likely
   264				// to be very rare.
   265				return false
   266			}
   267			if v.Op.SymEffect()&SymAddr != 0 {
   268				// This case prevents an operation that calculates the
   269				// address of a local variable from being forced to schedule
   270				// before its corresponding VarDef.
   271				// See issue 28445.
   272				//   v1 = LOAD ...
   273				//   v2 = VARDEF
   274				//   v3 = LEAQ
   275				//   v4 = CMPQ v1 v3
   276				// We don't want to combine the CMPQ with the load, because
   277				// that would force the CMPQ to schedule before the VARDEF, which
   278				// in turn requires the LEAQ to schedule before the VARDEF.
   279				return false
   280			}
   281			if v.Type.IsMemory() {
   282				if memPreds == nil {
   283					// Initialise a map containing memory states
   284					// known to be predecessors of load's memory
   285					// state.
   286					memPreds = make(map[*Value]bool)
   287					m := mem
   288					const limit = 50
   289					for i := 0; i < limit; i++ {
   290						if m.Op == OpPhi {
   291							// The memory phi, if it exists, is always
   292							// the first logical store in the block.
   293							break
   294						}
   295						if m.Block.ID != target.Block.ID {
   296							break
   297						}
   298						if !m.Type.IsMemory() {
   299							break
   300						}
   301						memPreds[m] = true
   302						if len(m.Args) == 0 {
   303							break
   304						}
   305						m = m.MemoryArg()
   306					}
   307				}
   308	
   309				// We can merge if v is a predecessor of mem.
   310				//
   311				// For example, we can merge load into target in the
   312				// following scenario:
   313				//      x = read ... v
   314				//    mem = write ... v
   315				//   load = read ... mem
   316				// target = add x load
   317				if memPreds[v] {
   318					continue
   319				}
   320				return false
   321			}
   322			if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   323				// If v takes mem as an input then we know mem
   324				// is valid at this point.
   325				continue
   326			}
   327			for _, a := range v.Args {
   328				if target.Block.ID == a.Block.ID {
   329					args = append(args, a)
   330				}
   331			}
   332		}
   333	
   334		return true
   335	}
   336	
   337	// isSameSym reports whether sym is the same as the given named symbol
   338	func isSameSym(sym interface{}, name string) bool {
   339		s, ok := sym.(fmt.Stringer)
   340		return ok && s.String() == name
   341	}
   342	
   343	// nlz returns the number of leading zeros.
   344	func nlz(x int64) int64 {
   345		return int64(bits.LeadingZeros64(uint64(x)))
   346	}
   347	
   348	// ntz returns the number of trailing zeros.
   349	func ntz(x int64) int64 {
   350		return int64(bits.TrailingZeros64(uint64(x)))
   351	}
   352	
   353	func oneBit(x int64) bool {
   354		return bits.OnesCount64(uint64(x)) == 1
   355	}
   356	
   357	// nlo returns the number of leading ones.
   358	func nlo(x int64) int64 {
   359		return nlz(^x)
   360	}
   361	
   362	// nto returns the number of trailing ones.
   363	func nto(x int64) int64 {
   364		return ntz(^x)
   365	}
   366	
   367	// log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
   368	// Rounds down.
   369	func log2(n int64) int64 {
   370		return int64(bits.Len64(uint64(n))) - 1
   371	}
   372	
   373	// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   374	// Rounds down.
   375	func log2uint32(n int64) int64 {
   376		return int64(bits.Len32(uint32(n))) - 1
   377	}
   378	
   379	// isPowerOfTwo reports whether n is a power of 2.
   380	func isPowerOfTwo(n int64) bool {
   381		return n > 0 && n&(n-1) == 0
   382	}
   383	
   384	// isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   385	func isUint64PowerOfTwo(in int64) bool {
   386		n := uint64(in)
   387		return n > 0 && n&(n-1) == 0
   388	}
   389	
   390	// isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   391	func isUint32PowerOfTwo(in int64) bool {
   392		n := uint64(uint32(in))
   393		return n > 0 && n&(n-1) == 0
   394	}
   395	
   396	// is32Bit reports whether n can be represented as a signed 32 bit integer.
   397	func is32Bit(n int64) bool {
   398		return n == int64(int32(n))
   399	}
   400	
   401	// is16Bit reports whether n can be represented as a signed 16 bit integer.
   402	func is16Bit(n int64) bool {
   403		return n == int64(int16(n))
   404	}
   405	
   406	// isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   407	func isU12Bit(n int64) bool {
   408		return 0 <= n && n < (1<<12)
   409	}
   410	
   411	// isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   412	func isU16Bit(n int64) bool {
   413		return n == int64(uint16(n))
   414	}
   415	
   416	// isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   417	func isU32Bit(n int64) bool {
   418		return n == int64(uint32(n))
   419	}
   420	
   421	// is20Bit reports whether n can be represented as a signed 20 bit integer.
   422	func is20Bit(n int64) bool {
   423		return -(1<<19) <= n && n < (1<<19)
   424	}
   425	
   426	// b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   427	func b2i(b bool) int64 {
   428		if b {
   429			return 1
   430		}
   431		return 0
   432	}
   433	
   434	// shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   435	// A shift is bounded if it is shifting by less than the width of the shifted value.
   436	func shiftIsBounded(v *Value) bool {
   437		return v.AuxInt != 0
   438	}
   439	
   440	// truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   441	// of the mantissa. It will panic if the truncation results in lost information.
   442	func truncate64Fto32F(f float64) float32 {
   443		if !isExactFloat32(f) {
   444			panic("truncate64Fto32F: truncation is not exact")
   445		}
   446		if !math.IsNaN(f) {
   447			return float32(f)
   448		}
   449		// NaN bit patterns aren't necessarily preserved across conversion
   450		// instructions so we need to do the conversion manually.
   451		b := math.Float64bits(f)
   452		m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   453		//          | sign                  | exponent   | mantissa       |
   454		r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   455		return math.Float32frombits(r)
   456	}
   457	
   458	// extend32Fto64F converts a float32 value to a float64 value preserving the bit
   459	// pattern of the mantissa.
   460	func extend32Fto64F(f float32) float64 {
   461		if !math.IsNaN(float64(f)) {
   462			return float64(f)
   463		}
   464		// NaN bit patterns aren't necessarily preserved across conversion
   465		// instructions so we need to do the conversion manually.
   466		b := uint64(math.Float32bits(f))
   467		//   | sign                  | exponent      | mantissa                    |
   468		r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   469		return math.Float64frombits(r)
   470	}
   471	
   472	// NeedsFixUp reports whether the division needs fix-up code.
   473	func NeedsFixUp(v *Value) bool {
   474		return v.AuxInt == 0
   475	}
   476	
   477	// i2f is used in rules for converting from an AuxInt to a float.
   478	func i2f(i int64) float64 {
   479		return math.Float64frombits(uint64(i))
   480	}
   481	
   482	// auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   483	func auxFrom64F(f float64) int64 {
   484		return int64(math.Float64bits(f))
   485	}
   486	
   487	// auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   488	func auxFrom32F(f float32) int64 {
   489		return int64(math.Float64bits(extend32Fto64F(f)))
   490	}
   491	
   492	// auxTo32F decodes a float32 from the AuxInt value provided.
   493	func auxTo32F(i int64) float32 {
   494		return truncate64Fto32F(math.Float64frombits(uint64(i)))
   495	}
   496	
   497	// auxTo64F decodes a float64 from the AuxInt value provided.
   498	func auxTo64F(i int64) float64 {
   499		return math.Float64frombits(uint64(i))
   500	}
   501	
   502	// uaddOvf reports whether unsigned a+b would overflow.
   503	func uaddOvf(a, b int64) bool {
   504		return uint64(a)+uint64(b) < uint64(a)
   505	}
   506	
   507	// de-virtualize an InterCall
   508	// 'sym' is the symbol for the itab
   509	func devirt(v *Value, sym interface{}, offset int64) *obj.LSym {
   510		f := v.Block.Func
   511		n, ok := sym.(*obj.LSym)
   512		if !ok {
   513			return nil
   514		}
   515		lsym := f.fe.DerefItab(n, offset)
   516		if f.pass.debug > 0 {
   517			if lsym != nil {
   518				f.Warnl(v.Pos, "de-virtualizing call")
   519			} else {
   520				f.Warnl(v.Pos, "couldn't de-virtualize call")
   521			}
   522		}
   523		return lsym
   524	}
   525	
   526	// isSamePtr reports whether p1 and p2 point to the same address.
   527	func isSamePtr(p1, p2 *Value) bool {
   528		if p1 == p2 {
   529			return true
   530		}
   531		if p1.Op != p2.Op {
   532			return false
   533		}
   534		switch p1.Op {
   535		case OpOffPtr:
   536			return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   537		case OpAddr, OpLocalAddr:
   538			// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
   539			// Checking for value equality only works after [z]cse has run.
   540			return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
   541		case OpAddPtr:
   542			return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   543		}
   544		return false
   545	}
   546	
   547	func isStackPtr(v *Value) bool {
   548		for v.Op == OpOffPtr || v.Op == OpAddPtr {
   549			v = v.Args[0]
   550		}
   551		return v.Op == OpSP || v.Op == OpLocalAddr
   552	}
   553	
   554	// disjoint reports whether the memory region specified by [p1:p1+n1)
   555	// does not overlap with [p2:p2+n2).
   556	// A return value of false does not imply the regions overlap.
   557	func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   558		if n1 == 0 || n2 == 0 {
   559			return true
   560		}
   561		if p1 == p2 {
   562			return false
   563		}
   564		baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   565			base, offset = ptr, 0
   566			for base.Op == OpOffPtr {
   567				offset += base.AuxInt
   568				base = base.Args[0]
   569			}
   570			return base, offset
   571		}
   572		p1, off1 := baseAndOffset(p1)
   573		p2, off2 := baseAndOffset(p2)
   574		if isSamePtr(p1, p2) {
   575			return !overlap(off1, n1, off2, n2)
   576		}
   577		// p1 and p2 are not the same, so if they are both OpAddrs then
   578		// they point to different variables.
   579		// If one pointer is on the stack and the other is an argument
   580		// then they can't overlap.
   581		switch p1.Op {
   582		case OpAddr, OpLocalAddr:
   583			if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   584				return true
   585			}
   586			return p2.Op == OpArg && p1.Args[0].Op == OpSP
   587		case OpArg:
   588			if p2.Op == OpSP || p2.Op == OpLocalAddr {
   589				return true
   590			}
   591		case OpSP:
   592			return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
   593		}
   594		return false
   595	}
   596	
   597	// moveSize returns the number of bytes an aligned MOV instruction moves
   598	func moveSize(align int64, c *Config) int64 {
   599		switch {
   600		case align%8 == 0 && c.PtrSize == 8:
   601			return 8
   602		case align%4 == 0:
   603			return 4
   604		case align%2 == 0:
   605			return 2
   606		}
   607		return 1
   608	}
   609	
   610	// mergePoint finds a block among a's blocks which dominates b and is itself
   611	// dominated by all of a's blocks. Returns nil if it can't find one.
   612	// Might return nil even if one does exist.
   613	func mergePoint(b *Block, a ...*Value) *Block {
   614		// Walk backward from b looking for one of the a's blocks.
   615	
   616		// Max distance
   617		d := 100
   618	
   619		for d > 0 {
   620			for _, x := range a {
   621				if b == x.Block {
   622					goto found
   623				}
   624			}
   625			if len(b.Preds) > 1 {
   626				// Don't know which way to go back. Abort.
   627				return nil
   628			}
   629			b = b.Preds[0].b
   630			d--
   631		}
   632		return nil // too far away
   633	found:
   634		// At this point, r is the first value in a that we find by walking backwards.
   635		// if we return anything, r will be it.
   636		r := b
   637	
   638		// Keep going, counting the other a's that we find. They must all dominate r.
   639		na := 0
   640		for d > 0 {
   641			for _, x := range a {
   642				if b == x.Block {
   643					na++
   644				}
   645			}
   646			if na == len(a) {
   647				// Found all of a in a backwards walk. We can return r.
   648				return r
   649			}
   650			if len(b.Preds) > 1 {
   651				return nil
   652			}
   653			b = b.Preds[0].b
   654			d--
   655	
   656		}
   657		return nil // too far away
   658	}
   659	
   660	// clobber invalidates v.  Returns true.
   661	// clobber is used by rewrite rules to:
   662	//   A) make sure v is really dead and never used again.
   663	//   B) decrement use counts of v's args.
   664	func clobber(v *Value) bool {
   665		v.reset(OpInvalid)
   666		// Note: leave v.Block intact.  The Block field is used after clobber.
   667		return true
   668	}
   669	
   670	// clobberIfDead resets v when use count is 1. Returns true.
   671	// clobberIfDead is used by rewrite rules to decrement
   672	// use counts of v's args when v is dead and never used.
   673	func clobberIfDead(v *Value) bool {
   674		if v.Uses == 1 {
   675			v.reset(OpInvalid)
   676		}
   677		// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   678		return true
   679	}
   680	
   681	// noteRule is an easy way to track if a rule is matched when writing
   682	// new ones.  Make the rule of interest also conditional on
   683	//     noteRule("note to self: rule of interest matched")
   684	// and that message will print when the rule matches.
   685	func noteRule(s string) bool {
   686		fmt.Println(s)
   687		return true
   688	}
   689	
   690	// countRule increments Func.ruleMatches[key].
   691	// If Func.ruleMatches is non-nil at the end
   692	// of compilation, it will be printed to stdout.
   693	// This is intended to make it easier to find which functions
   694	// which contain lots of rules matches when developing new rules.
   695	func countRule(v *Value, key string) bool {
   696		f := v.Block.Func
   697		if f.ruleMatches == nil {
   698			f.ruleMatches = make(map[string]int)
   699		}
   700		f.ruleMatches[key]++
   701		return true
   702	}
   703	
   704	// warnRule generates compiler debug output with string s when
   705	// v is not in autogenerated code, cond is true and the rule has fired.
   706	func warnRule(cond bool, v *Value, s string) bool {
   707		if pos := v.Pos; pos.Line() > 1 && cond {
   708			v.Block.Func.Warnl(pos, s)
   709		}
   710		return true
   711	}
   712	
   713	// for a pseudo-op like (LessThan x), extract x
   714	func flagArg(v *Value) *Value {
   715		if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
   716			return nil
   717		}
   718		return v.Args[0]
   719	}
   720	
   721	// arm64Negate finds the complement to an ARM64 condition code,
   722	// for example Equal -> NotEqual or LessThan -> GreaterEqual
   723	//
   724	// TODO: add floating-point conditions
   725	func arm64Negate(op Op) Op {
   726		switch op {
   727		case OpARM64LessThan:
   728			return OpARM64GreaterEqual
   729		case OpARM64LessThanU:
   730			return OpARM64GreaterEqualU
   731		case OpARM64GreaterThan:
   732			return OpARM64LessEqual
   733		case OpARM64GreaterThanU:
   734			return OpARM64LessEqualU
   735		case OpARM64LessEqual:
   736			return OpARM64GreaterThan
   737		case OpARM64LessEqualU:
   738			return OpARM64GreaterThanU
   739		case OpARM64GreaterEqual:
   740			return OpARM64LessThan
   741		case OpARM64GreaterEqualU:
   742			return OpARM64LessThanU
   743		case OpARM64Equal:
   744			return OpARM64NotEqual
   745		case OpARM64NotEqual:
   746			return OpARM64Equal
   747		case OpARM64LessThanF:
   748			return OpARM64GreaterEqualF
   749		case OpARM64GreaterThanF:
   750			return OpARM64LessEqualF
   751		case OpARM64LessEqualF:
   752			return OpARM64GreaterThanF
   753		case OpARM64GreaterEqualF:
   754			return OpARM64LessThanF
   755		default:
   756			panic("unreachable")
   757		}
   758	}
   759	
   760	// arm64Invert evaluates (InvertFlags op), which
   761	// is the same as altering the condition codes such
   762	// that the same result would be produced if the arguments
   763	// to the flag-generating instruction were reversed, e.g.
   764	// (InvertFlags (CMP x y)) -> (CMP y x)
   765	//
   766	// TODO: add floating-point conditions
   767	func arm64Invert(op Op) Op {
   768		switch op {
   769		case OpARM64LessThan:
   770			return OpARM64GreaterThan
   771		case OpARM64LessThanU:
   772			return OpARM64GreaterThanU
   773		case OpARM64GreaterThan:
   774			return OpARM64LessThan
   775		case OpARM64GreaterThanU:
   776			return OpARM64LessThanU
   777		case OpARM64LessEqual:
   778			return OpARM64GreaterEqual
   779		case OpARM64LessEqualU:
   780			return OpARM64GreaterEqualU
   781		case OpARM64GreaterEqual:
   782			return OpARM64LessEqual
   783		case OpARM64GreaterEqualU:
   784			return OpARM64LessEqualU
   785		case OpARM64Equal, OpARM64NotEqual:
   786			return op
   787		case OpARM64LessThanF:
   788			return OpARM64GreaterThanF
   789		case OpARM64GreaterThanF:
   790			return OpARM64LessThanF
   791		case OpARM64LessEqualF:
   792			return OpARM64GreaterEqualF
   793		case OpARM64GreaterEqualF:
   794			return OpARM64LessEqualF
   795		default:
   796			panic("unreachable")
   797		}
   798	}
   799	
   800	// evaluate an ARM64 op against a flags value
   801	// that is potentially constant; return 1 for true,
   802	// -1 for false, and 0 for not constant.
   803	func ccARM64Eval(cc interface{}, flags *Value) int {
   804		op := cc.(Op)
   805		fop := flags.Op
   806		switch fop {
   807		case OpARM64InvertFlags:
   808			return -ccARM64Eval(op, flags.Args[0])
   809		case OpARM64FlagEQ:
   810			switch op {
   811			case OpARM64Equal, OpARM64GreaterEqual, OpARM64LessEqual,
   812				OpARM64GreaterEqualU, OpARM64LessEqualU:
   813				return 1
   814			default:
   815				return -1
   816			}
   817		case OpARM64FlagLT_ULT:
   818			switch op {
   819			case OpARM64LessThan, OpARM64LessThanU,
   820				OpARM64LessEqual, OpARM64LessEqualU:
   821				return 1
   822			default:
   823				return -1
   824			}
   825		case OpARM64FlagLT_UGT:
   826			switch op {
   827			case OpARM64LessThan, OpARM64GreaterThanU,
   828				OpARM64LessEqual, OpARM64GreaterEqualU:
   829				return 1
   830			default:
   831				return -1
   832			}
   833		case OpARM64FlagGT_ULT:
   834			switch op {
   835			case OpARM64GreaterThan, OpARM64LessThanU,
   836				OpARM64GreaterEqual, OpARM64LessEqualU:
   837				return 1
   838			default:
   839				return -1
   840			}
   841		case OpARM64FlagGT_UGT:
   842			switch op {
   843			case OpARM64GreaterThan, OpARM64GreaterThanU,
   844				OpARM64GreaterEqual, OpARM64GreaterEqualU:
   845				return 1
   846			default:
   847				return -1
   848			}
   849		default:
   850			return 0
   851		}
   852	}
   853	
   854	// logRule logs the use of the rule s. This will only be enabled if
   855	// rewrite rules were generated with the -log option, see gen/rulegen.go.
   856	func logRule(s string) {
   857		if ruleFile == nil {
   858			// Open a log file to write log to. We open in append
   859			// mode because all.bash runs the compiler lots of times,
   860			// and we want the concatenation of all of those logs.
   861			// This means, of course, that users need to rm the old log
   862			// to get fresh data.
   863			// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
   864			w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
   865				os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
   866			if err != nil {
   867				panic(err)
   868			}
   869			ruleFile = w
   870		}
   871		_, err := fmt.Fprintln(ruleFile, s)
   872		if err != nil {
   873			panic(err)
   874		}
   875	}
   876	
   877	var ruleFile io.Writer
   878	
   879	func min(x, y int64) int64 {
   880		if x < y {
   881			return x
   882		}
   883		return y
   884	}
   885	
   886	func isConstZero(v *Value) bool {
   887		switch v.Op {
   888		case OpConstNil:
   889			return true
   890		case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
   891			return v.AuxInt == 0
   892		}
   893		return false
   894	}
   895	
   896	// reciprocalExact64 reports whether 1/c is exactly representable.
   897	func reciprocalExact64(c float64) bool {
   898		b := math.Float64bits(c)
   899		man := b & (1<<52 - 1)
   900		if man != 0 {
   901			return false // not a power of 2, denormal, or NaN
   902		}
   903		exp := b >> 52 & (1<<11 - 1)
   904		// exponent bias is 0x3ff.  So taking the reciprocal of a number
   905		// changes the exponent to 0x7fe-exp.
   906		switch exp {
   907		case 0:
   908			return false // ±0
   909		case 0x7ff:
   910			return false // ±inf
   911		case 0x7fe:
   912			return false // exponent is not representable
   913		default:
   914			return true
   915		}
   916	}
   917	
   918	// reciprocalExact32 reports whether 1/c is exactly representable.
   919	func reciprocalExact32(c float32) bool {
   920		b := math.Float32bits(c)
   921		man := b & (1<<23 - 1)
   922		if man != 0 {
   923			return false // not a power of 2, denormal, or NaN
   924		}
   925		exp := b >> 23 & (1<<8 - 1)
   926		// exponent bias is 0x7f.  So taking the reciprocal of a number
   927		// changes the exponent to 0xfe-exp.
   928		switch exp {
   929		case 0:
   930			return false // ±0
   931		case 0xff:
   932			return false // ±inf
   933		case 0xfe:
   934			return false // exponent is not representable
   935		default:
   936			return true
   937		}
   938	}
   939	
   940	// check if an immediate can be directly encoded into an ARM's instruction
   941	func isARMImmRot(v uint32) bool {
   942		for i := 0; i < 16; i++ {
   943			if v&^0xff == 0 {
   944				return true
   945			}
   946			v = v<<2 | v>>30
   947		}
   948	
   949		return false
   950	}
   951	
   952	// overlap reports whether the ranges given by the given offset and
   953	// size pairs overlap.
   954	func overlap(offset1, size1, offset2, size2 int64) bool {
   955		if offset1 >= offset2 && offset2+size2 > offset1 {
   956			return true
   957		}
   958		if offset2 >= offset1 && offset1+size1 > offset2 {
   959			return true
   960		}
   961		return false
   962	}
   963	
   964	func areAdjacentOffsets(off1, off2, size int64) bool {
   965		return off1+size == off2 || off1 == off2+size
   966	}
   967	
   968	// check if value zeroes out upper 32-bit of 64-bit register.
   969	// depth limits recursion depth. In AMD64.rules 3 is used as limit,
   970	// because it catches same amount of cases as 4.
   971	func zeroUpper32Bits(x *Value, depth int) bool {
   972		switch x.Op {
   973		case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
   974			OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
   975			OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
   976			OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
   977			OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
   978			OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
   979			OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL:
   980			return true
   981		case OpArg:
   982			return x.Type.Width == 4
   983		case OpPhi, OpSelect0, OpSelect1:
   984			// Phis can use each-other as an arguments, instead of tracking visited values,
   985			// just limit recursion depth.
   986			if depth <= 0 {
   987				return false
   988			}
   989			for i := range x.Args {
   990				if !zeroUpper32Bits(x.Args[i], depth-1) {
   991					return false
   992				}
   993			}
   994			return true
   995	
   996		}
   997		return false
   998	}
   999	
  1000	// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
  1001	func zeroUpper48Bits(x *Value, depth int) bool {
  1002		switch x.Op {
  1003		case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1004			return true
  1005		case OpArg:
  1006			return x.Type.Width == 2
  1007		case OpPhi, OpSelect0, OpSelect1:
  1008			// Phis can use each-other as an arguments, instead of tracking visited values,
  1009			// just limit recursion depth.
  1010			if depth <= 0 {
  1011				return false
  1012			}
  1013			for i := range x.Args {
  1014				if !zeroUpper48Bits(x.Args[i], depth-1) {
  1015					return false
  1016				}
  1017			}
  1018			return true
  1019	
  1020		}
  1021		return false
  1022	}
  1023	
  1024	// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
  1025	func zeroUpper56Bits(x *Value, depth int) bool {
  1026		switch x.Op {
  1027		case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1028			return true
  1029		case OpArg:
  1030			return x.Type.Width == 1
  1031		case OpPhi, OpSelect0, OpSelect1:
  1032			// Phis can use each-other as an arguments, instead of tracking visited values,
  1033			// just limit recursion depth.
  1034			if depth <= 0 {
  1035				return false
  1036			}
  1037			for i := range x.Args {
  1038				if !zeroUpper56Bits(x.Args[i], depth-1) {
  1039					return false
  1040				}
  1041			}
  1042			return true
  1043	
  1044		}
  1045		return false
  1046	}
  1047	
  1048	// isInlinableMemmove reports whether the given arch performs a Move of the given size
  1049	// faster than memmove. It will only return true if replacing the memmove with a Move is
  1050	// safe, either because Move is small or because the arguments are disjoint.
  1051	// This is used as a check for replacing memmove with Move ops.
  1052	func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1053		// It is always safe to convert memmove into Move when its arguments are disjoint.
  1054		// Move ops may or may not be faster for large sizes depending on how the platform
  1055		// lowers them, so we only perform this optimization on platforms that we know to
  1056		// have fast Move ops.
  1057		switch c.arch {
  1058		case "amd64", "amd64p32":
  1059			return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1060		case "386", "ppc64", "ppc64le", "arm64":
  1061			return sz <= 8
  1062		case "s390x":
  1063			return sz <= 8 || disjoint(dst, sz, src, sz)
  1064		case "arm", "mips", "mips64", "mipsle", "mips64le":
  1065			return sz <= 4
  1066		}
  1067		return false
  1068	}
  1069	
  1070	// hasSmallRotate reports whether the architecture has rotate instructions
  1071	// for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1072	func hasSmallRotate(c *Config) bool {
  1073		switch c.arch {
  1074		case "amd64", "amd64p32", "386":
  1075			return true
  1076		default:
  1077			return false
  1078		}
  1079	}
  1080	
  1081	// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1082	func armBFAuxInt(lsb, width int64) int64 {
  1083		if lsb < 0 || lsb > 63 {
  1084			panic("ARM(64) bit field lsb constant out of range")
  1085		}
  1086		if width < 1 || width > 64 {
  1087			panic("ARM(64) bit field width constant out of range")
  1088		}
  1089		return width | lsb<<8
  1090	}
  1091	
  1092	// returns the lsb part of the auxInt field of arm64 bitfield ops.
  1093	func getARM64BFlsb(bfc int64) int64 {
  1094		return int64(uint64(bfc) >> 8)
  1095	}
  1096	
  1097	// returns the width part of the auxInt field of arm64 bitfield ops.
  1098	func getARM64BFwidth(bfc int64) int64 {
  1099		return bfc & 0xff
  1100	}
  1101	
  1102	// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1103	func isARM64BFMask(lsb, mask, rshift int64) bool {
  1104		shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1105		return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1106	}
  1107	
  1108	// returns the bitfield width of mask >> rshift for arm64 bitfield ops
  1109	func arm64BFWidth(mask, rshift int64) int64 {
  1110		shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1111		if shiftedMask == 0 {
  1112			panic("ARM64 BF mask is zero")
  1113		}
  1114		return nto(shiftedMask)
  1115	}
  1116	
  1117	// sizeof returns the size of t in bytes.
  1118	// It will panic if t is not a *types.Type.
  1119	func sizeof(t interface{}) int64 {
  1120		return t.(*types.Type).Size()
  1121	}
  1122	
  1123	// alignof returns the alignment of t in bytes.
  1124	// It will panic if t is not a *types.Type.
  1125	func alignof(t interface{}) int64 {
  1126		return t.(*types.Type).Alignment()
  1127	}
  1128	
  1129	// registerizable reports whether t is a primitive type that fits in
  1130	// a register. It assumes float64 values will always fit into registers
  1131	// even if that isn't strictly true.
  1132	// It will panic if t is not a *types.Type.
  1133	func registerizable(b *Block, t interface{}) bool {
  1134		typ := t.(*types.Type)
  1135		if typ.IsPtrShaped() || typ.IsFloat() {
  1136			return true
  1137		}
  1138		if typ.IsInteger() {
  1139			return typ.Size() <= b.Func.Config.RegSize
  1140		}
  1141		return false
  1142	}
  1143	
  1144	// needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1145	func needRaceCleanup(sym interface{}, v *Value) bool {
  1146		f := v.Block.Func
  1147		if !f.Config.Race {
  1148			return false
  1149		}
  1150		if !isSameSym(sym, "runtime.racefuncenter") && !isSameSym(sym, "runtime.racefuncexit") {
  1151			return false
  1152		}
  1153		for _, b := range f.Blocks {
  1154			for _, v := range b.Values {
  1155				switch v.Op {
  1156				case OpStaticCall:
  1157					// Check for racefuncenter will encounter racefuncexit and vice versa.
  1158					// Allow calls to panic*
  1159					s := v.Aux.(fmt.Stringer).String()
  1160					switch s {
  1161					case "runtime.racefuncenter", "runtime.racefuncexit",
  1162						"runtime.panicdivide", "runtime.panicwrap",
  1163						"runtime.panicshift":
  1164						continue
  1165					}
  1166					// If we encountered any call, we need to keep racefunc*,
  1167					// for accurate stacktraces.
  1168					return false
  1169				case OpPanicBounds, OpPanicExtend:
  1170					// Note: these are panic generators that are ok (like the static calls above).
  1171				case OpClosureCall, OpInterCall:
  1172					// We must keep the race functions if there are any other call types.
  1173					return false
  1174				}
  1175			}
  1176		}
  1177		return true
  1178	}
  1179	
  1180	// symIsRO reports whether sym is a read-only global.
  1181	func symIsRO(sym interface{}) bool {
  1182		lsym := sym.(*obj.LSym)
  1183		return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1184	}
  1185	
  1186	// read8 reads one byte from the read-only global sym at offset off.
  1187	func read8(sym interface{}, off int64) uint8 {
  1188		lsym := sym.(*obj.LSym)
  1189		if off >= int64(len(lsym.P)) || off < 0 {
  1190			// Invalid index into the global sym.
  1191			// This can happen in dead code, so we don't want to panic.
  1192			// Just return any value, it will eventually get ignored.
  1193			// See issue 29215.
  1194			return 0
  1195		}
  1196		return lsym.P[off]
  1197	}
  1198	
  1199	// read16 reads two bytes from the read-only global sym at offset off.
  1200	func read16(sym interface{}, off int64, bigEndian bool) uint16 {
  1201		lsym := sym.(*obj.LSym)
  1202		if off >= int64(len(lsym.P))-1 || off < 0 {
  1203			return 0
  1204		}
  1205		if bigEndian {
  1206			return binary.BigEndian.Uint16(lsym.P[off:])
  1207		} else {
  1208			return binary.LittleEndian.Uint16(lsym.P[off:])
  1209		}
  1210	}
  1211	
  1212	// read32 reads four bytes from the read-only global sym at offset off.
  1213	func read32(sym interface{}, off int64, bigEndian bool) uint32 {
  1214		lsym := sym.(*obj.LSym)
  1215		if off >= int64(len(lsym.P))-3 || off < 0 {
  1216			return 0
  1217		}
  1218		if bigEndian {
  1219			return binary.BigEndian.Uint32(lsym.P[off:])
  1220		} else {
  1221			return binary.LittleEndian.Uint32(lsym.P[off:])
  1222		}
  1223	}
  1224	
  1225	// read64 reads eight bytes from the read-only global sym at offset off.
  1226	func read64(sym interface{}, off int64, bigEndian bool) uint64 {
  1227		lsym := sym.(*obj.LSym)
  1228		if off >= int64(len(lsym.P))-7 || off < 0 {
  1229			return 0
  1230		}
  1231		if bigEndian {
  1232			return binary.BigEndian.Uint64(lsym.P[off:])
  1233		} else {
  1234			return binary.LittleEndian.Uint64(lsym.P[off:])
  1235		}
  1236	}
  1237
View as plain text