Source file src/pkg/cmd/compile/internal/amd64/ggen.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package amd64
     6	
     7	import (
     8		"cmd/compile/internal/gc"
     9		"cmd/internal/obj"
    10		"cmd/internal/obj/x86"
    11		"cmd/internal/objabi"
    12	)
    13	
    14	// no floating point in note handlers on Plan 9
    15	var isPlan9 = objabi.GOOS == "plan9"
    16	
    17	// DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
    18	// See runtime/mkduff.go.
    19	const (
    20		dzBlocks    = 16 // number of MOV/ADD blocks
    21		dzBlockLen  = 4  // number of clears per block
    22		dzBlockSize = 19 // size of instructions in a single block
    23		dzMovSize   = 4  // size of single MOV instruction w/ offset
    24		dzLeaqSize  = 4  // size of single LEAQ instruction
    25		dzClearStep = 16 // number of bytes cleared by each MOV instruction
    26	
    27		dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
    28		dzSize     = dzBlocks * dzBlockSize
    29	)
    30	
    31	// dzOff returns the offset for a jump into DUFFZERO.
    32	// b is the number of bytes to zero.
    33	func dzOff(b int64) int64 {
    34		off := int64(dzSize)
    35		off -= b / dzClearLen * dzBlockSize
    36		tailLen := b % dzClearLen
    37		if tailLen >= dzClearStep {
    38			off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep)
    39		}
    40		return off
    41	}
    42	
    43	// duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
    44	// b is the number of bytes to zero.
    45	func dzDI(b int64) int64 {
    46		tailLen := b % dzClearLen
    47		if tailLen < dzClearStep {
    48			return 0
    49		}
    50		tailSteps := tailLen / dzClearStep
    51		return -dzClearStep * (dzBlockLen - tailSteps)
    52	}
    53	
    54	func zerorange(pp *gc.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
    55		const (
    56			ax = 1 << iota
    57			x0
    58		)
    59	
    60		if cnt == 0 {
    61			return p
    62		}
    63	
    64		if cnt%int64(gc.Widthreg) != 0 {
    65			// should only happen with nacl
    66			if cnt%int64(gc.Widthptr) != 0 {
    67				gc.Fatalf("zerorange count not a multiple of widthptr %d", cnt)
    68			}
    69			if *state&ax == 0 {
    70				p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
    71				*state |= ax
    72			}
    73			p = pp.Appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, off)
    74			off += int64(gc.Widthptr)
    75			cnt -= int64(gc.Widthptr)
    76		}
    77	
    78		if cnt == 8 {
    79			if *state&ax == 0 {
    80				p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
    81				*state |= ax
    82			}
    83			p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, off)
    84		} else if !isPlan9 && cnt <= int64(8*gc.Widthreg) {
    85			if *state&x0 == 0 {
    86				p = pp.Appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0)
    87				*state |= x0
    88			}
    89	
    90			for i := int64(0); i < cnt/16; i++ {
    91				p = pp.Appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16)
    92			}
    93	
    94			if cnt%16 != 0 {
    95				p = pp.Appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16))
    96			}
    97		} else if !gc.Nacl && !isPlan9 && (cnt <= int64(128*gc.Widthreg)) {
    98			if *state&x0 == 0 {
    99				p = pp.Appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0)
   100				*state |= x0
   101			}
   102			p = pp.Appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
   103			p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
   104			p.To.Sym = gc.Duffzero
   105	
   106			if cnt%16 != 0 {
   107				p = pp.Appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
   108			}
   109		} else {
   110			if *state&ax == 0 {
   111				p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
   112				*state |= ax
   113			}
   114	
   115			p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
   116			p = pp.Appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
   117			p = pp.Appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   118			p = pp.Appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   119		}
   120	
   121		return p
   122	}
   123	
   124	func zeroAuto(pp *gc.Progs, n *gc.Node) {
   125		// Note: this code must not clobber any registers.
   126		op := x86.AMOVQ
   127		if gc.Widthptr == 4 {
   128			op = x86.AMOVL
   129		}
   130		sym := n.Sym.Linksym()
   131		size := n.Type.Size()
   132		for i := int64(0); i < size; i += int64(gc.Widthptr) {
   133			p := pp.Prog(op)
   134			p.From.Type = obj.TYPE_CONST
   135			p.From.Offset = 0
   136			p.To.Type = obj.TYPE_MEM
   137			p.To.Name = obj.NAME_AUTO
   138			p.To.Reg = x86.REG_SP
   139			p.To.Offset = n.Xoffset + i
   140			p.To.Sym = sym
   141		}
   142	}
   143	
   144	func ginsnop(pp *gc.Progs) *obj.Prog {
   145		// This is a hardware nop (1-byte 0x90) instruction,
   146		// even though we describe it as an explicit XCHGL here.
   147		// Particularly, this does not zero the high 32 bits
   148		// like typical *L opcodes.
   149		// (gas assembles "xchg %eax,%eax" to 0x87 0xc0, which
   150		// does zero the high 32 bits.)
   151		p := pp.Prog(x86.AXCHGL)
   152		p.From.Type = obj.TYPE_REG
   153		p.From.Reg = x86.REG_AX
   154		p.To.Type = obj.TYPE_REG
   155		p.To.Reg = x86.REG_AX
   156		return p
   157	}
   158
View as plain text