Source file src/cmd/compile/internal/ssa/gen/PPC64Ops.go

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build ignore
     6	
     7	package main
     8	
     9	import "strings"
    10	
    11	// Notes:
    12	//  - Less-than-64-bit integer types live in the low portion of registers.
    13	//    For now, the upper portion is junk; sign/zero-extension might be optimized in the future, but not yet.
    14	//  - Boolean types are zero or 1; stored in a byte, but loaded with AMOVBZ so the upper bytes of a register are zero.
    15	//  - *const instructions may use a constant larger than the instruction can encode.
    16	//    In this case the assembler expands to multiple instructions and uses tmp
    17	//    register (R31).
    18	
    19	var regNamesPPC64 = []string{
    20		"R0", // REGZERO, not used, but simplifies counting in regalloc
    21		"SP", // REGSP
    22		"SB", // REGSB
    23		"R3",
    24		"R4",
    25		"R5",
    26		"R6",
    27		"R7",
    28		"R8",
    29		"R9",
    30		"R10",
    31		"R11", // REGCTXT for closures
    32		"R12",
    33		"R13", // REGTLS
    34		"R14",
    35		"R15",
    36		"R16",
    37		"R17",
    38		"R18",
    39		"R19",
    40		"R20",
    41		"R21",
    42		"R22",
    43		"R23",
    44		"R24",
    45		"R25",
    46		"R26",
    47		"R27",
    48		"R28",
    49		"R29",
    50		"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
    51		"R31", // REGTMP
    52	
    53		"F0",
    54		"F1",
    55		"F2",
    56		"F3",
    57		"F4",
    58		"F5",
    59		"F6",
    60		"F7",
    61		"F8",
    62		"F9",
    63		"F10",
    64		"F11",
    65		"F12",
    66		"F13",
    67		"F14",
    68		"F15",
    69		"F16",
    70		"F17",
    71		"F18",
    72		"F19",
    73		"F20",
    74		"F21",
    75		"F22",
    76		"F23",
    77		"F24",
    78		"F25",
    79		"F26",
    80		"F27",
    81		"F28",
    82		"F29",
    83		"F30",
    84		"F31",
    85	
    86		// "CR0",
    87		// "CR1",
    88		// "CR2",
    89		// "CR3",
    90		// "CR4",
    91		// "CR5",
    92		// "CR6",
    93		// "CR7",
    94	
    95		// "CR",
    96		// "XER",
    97		// "LR",
    98		// "CTR",
    99	}
   100	
   101	func init() {
   102		// Make map from reg names to reg integers.
   103		if len(regNamesPPC64) > 64 {
   104			panic("too many registers")
   105		}
   106		num := map[string]int{}
   107		for i, name := range regNamesPPC64 {
   108			num[name] = i
   109		}
   110		buildReg := func(s string) regMask {
   111			m := regMask(0)
   112			for _, r := range strings.Split(s, " ") {
   113				if n, ok := num[r]; ok {
   114					m |= regMask(1) << uint(n)
   115					continue
   116				}
   117				panic("register " + r + " not found")
   118			}
   119			return m
   120		}
   121	
   122		var (
   123			gp = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
   124			fp = buildReg("F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26")
   125			sp = buildReg("SP")
   126			sb = buildReg("SB")
   127			gr = buildReg("g")
   128			// cr  = buildReg("CR")
   129			// ctr = buildReg("CTR")
   130			// lr  = buildReg("LR")
   131			tmp     = buildReg("R31")
   132			ctxt    = buildReg("R11")
   133			callptr = buildReg("R12")
   134			// tls = buildReg("R13")
   135			gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
   136			gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   137			gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   138			gp22        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
   139			gp32        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
   140			gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
   141			gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   142			crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
   143			gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   144			gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   145			gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   146			gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
   147			gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
   148			gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   149			gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
   150			fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
   151			fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
   152			fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
   153			gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
   154			fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
   155			fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
   156			fp2cr       = regInfo{inputs: []regMask{fp, fp}}
   157			fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
   158			fploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}}
   159			fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
   160			fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
   161			callerSave  = regMask(gp | fp | gr)
   162			r3          = buildReg("R3")
   163			r4          = buildReg("R4")
   164			r5          = buildReg("R5")
   165			r6          = buildReg("R6")
   166		)
   167		ops := []opData{
   168			{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},     // arg0 + arg1
   169			{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"},     // arg0 + auxInt
   170			{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},   // arg0+arg1
   171			{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
   172			{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0-arg1
   173			{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                      // arg0-arg1
   174			{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                    // arg0-arg1
   175	
   176			{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
   177			{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
   178	
   179			{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
   180			{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
   181			{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
   182			{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
   183			{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true},   // arg0 * arg1, returns (hi, lo)
   184	
   185			{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
   186			{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
   187	
   188			{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
   189			{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
   190			{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
   191			{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
   192	
   193			{name: "SRAD", argLength: 2, reg: gp21, asm: "SRAD"}, // arg0 >>a arg1, 64 bits (all sign if arg1 & 64 != 0)
   194			{name: "SRAW", argLength: 2, reg: gp21, asm: "SRAW"}, // arg0 >>a arg1, 32 bits (all sign if arg1 & 32 != 0)
   195			{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},   // arg0 >> arg1, 64 bits  (0 if arg1 & 64 != 0)
   196			{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},   // arg0 >> arg1, 32 bits  (0 if arg1 & 32 != 0)
   197			{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},   // arg0 << arg1, 64 bits  (0 if arg1 & 64 != 0)
   198			{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},   // arg0 << arg1, 32 bits  (0 if arg1 & 32 != 0)
   199	
   200			{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
   201			{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
   202	
   203			{name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true},                                                                     // arg0 + arg1 + carry, returns (sum, carry)
   204			{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
   205			{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"},                                                                   // carry - 1 (if carry then 0 else -1)
   206	
   207			{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64"}, // arg0 >>a aux, 64 bits
   208			{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int64"}, // arg0 >>a aux, 32 bits
   209			{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},   // arg0 >> aux, 64 bits
   210			{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},   // arg0 >> aux, 32 bits
   211			{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},   // arg0 << aux, 64 bits
   212			{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},   // arg0 << aux, 32 bits
   213	
   214			{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
   215			{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
   216	
   217			{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
   218			{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
   219	
   220			{name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
   221			{name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
   222	
   223			{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
   224			{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
   225			{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresonding byte
   226	
   227			{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
   228			{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
   229	
   230			{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
   231			{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
   232			{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
   233			{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
   234	
   235			// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
   236	
   237			// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
   238			{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
   239			{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
   240			{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
   241			{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
   242			{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
   243	
   244			// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
   245			// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
   246			// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
   247			// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
   248			// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
   249	
   250			{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
   251			{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
   252	
   253			{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                   // arg0&arg1
   254			{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                    // arg0&^arg1
   255			{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, typ: "Flags"}, // arg0&arg1 sets CC
   256			{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                     // arg0|arg1
   257			{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                      // arg0|^arg1
   258			{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, typ: "Flags"},   // arg0|arg1 sets CC
   259			{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                   // ^(arg0|arg1)
   260			{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},     // arg0^arg1
   261			{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, typ: "Flags"}, // arg0^arg1 sets CC
   262			{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},     // arg0^^arg1
   263			{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                      // -arg0 (integer)
   264			{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                    // -arg0 (floating point)
   265			{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                  // sqrt(arg0) (floating point)
   266			{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                // sqrt(arg0) (floating point, single precision)
   267			{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                  // floor(arg0), float64
   268			{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                   // ceil(arg0), float64
   269			{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                  // trunc(arg0), float64
   270			{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                  // round(arg0), float64
   271			{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                    // abs(arg0), float64
   272			{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                  // -abs(arg0), float64
   273			{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                // copysign arg0 -> arg1, float64
   274	
   275			{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                     // arg0|aux
   276			{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                   // arg0^aux
   277			{name: "ANDconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", clobberFlags: true}, // arg0&aux // and-immediate sets CC on PPC, always.
   278			{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}}, asm: "ANDCC", aux: "Int64", typ: "Flags"},                             // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   279	
   280			{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},   // sign extend int8 to int64
   281			{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
   282			{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},   // sign extend int16 to int64
   283			{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
   284			{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},   // sign extend int32 to int64
   285			{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
   286	
   287			// Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register.
   288			{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte zero extend
   289			{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // load 2 bytes sign extend
   290			{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend
   291			{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // load 4 bytes sign extend
   292			{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend
   293			{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},    // load 8 bytes
   294	
   295			// Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
   296			// The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
   297			// In these cases the index register field is set to 0 and the full address is in the base register.
   298			{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes reverse order
   299			{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend reverse order
   300			{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend reverse order
   301	
   302			// In these cases an index register is used in addition to a base register
   303			{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // zero extend uint8 to uint64
   304			{name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // sign extend int16 to int64
   305			{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // zero extend uint16 to uint64
   306			{name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // sign extend int32 to int64
   307			{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // zero extend uint32 to uint64
   308			{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},
   309			{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // sign extend int16 to int64
   310			{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // sign extend int32 to int64
   311			{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},
   312			{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"},
   313			{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"},
   314	
   315			// Store bytes in the reverse endian order of the arch into arg0.
   316			// These are indexes stores with no offset field in the instruction so the aux fields are not used.
   317			{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes reverse order
   318			{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes reverse order
   319			{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes reverse order
   320	
   321			// Floating point loads from arg0+aux+auxint
   322			{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
   323			{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float
   324	
   325			// Store bytes in the endian order of the arch into arg0+aux+auxint
   326			{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte
   327			{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes
   328			{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes
   329			{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes
   330	
   331			// Store floating point value into arg0+aux+auxint
   332			{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot
   333			{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float
   334	
   335			// Stores using index and base registers
   336			{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},     // store bye
   337			{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},     // store half word
   338			{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},     // store word
   339			{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},     // store double word
   340			{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store double float
   341			{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store single float
   342			{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store half word reversed byte using index reg
   343			{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store word reversed byte using index reg
   344			{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double word reversed byte using index reg
   345	
   346			// The following ops store 0 into arg0+aux+auxint arg1=mem
   347			{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte
   348			{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes
   349			{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes
   350			{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes
   351	
   352			{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP
   353	
   354			{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
   355			{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
   356			{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
   357			{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
   358	
   359			{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
   360			{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   361			{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
   362			{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   363			{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
   364			{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
   365			{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
   366			{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
   367	
   368			// pseudo-ops
   369			{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
   370			{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
   371			{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
   372			{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
   373			{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
   374			{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
   375			{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
   376			{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
   377			{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
   378			{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
   379	
   380			// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   381			// and sorts it to the very beginning of the block to prevent other
   382			// use of the closure pointer.
   383			{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true},
   384	
   385			// LoweredGetCallerSP returns the SP of the caller of the current function.
   386			{name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
   387	
   388			// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   389			// I.e., if f calls g "calls" getcallerpc,
   390			// the result should be the PC within f that g will return to.
   391			// See runtime/stubs.go for a more detailed discussion.
   392			{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   393	
   394			//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   395			{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   396			// Round ops to block fused-multiply-add extraction.
   397			{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   398			{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   399	
   400			{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true, call: true, symEffect: "None"},                   // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   401			{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "Int64", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   402			{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "Int64", clobberFlags: true, call: true},            // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   403	
   404			// large or unaligned zeroing
   405			// arg0 = address of memory to zero (in R3, changed as side effect)
   406			// returns mem
   407			//
   408			// a loop is generated when there is more than one iteration
   409			// needed to clear 4 doublewords
   410			//
   411			// 	MOVD	$len/32,R31
   412			//	MOVD	R31,CTR
   413			//	loop:
   414			//	MOVD	R0,(R3)
   415			//	MOVD	R0,8(R3)
   416			//	MOVD	R0,16(R3)
   417			//	MOVD	R0,24(R3)
   418			//	ADD	R3,32
   419			//	BC	loop
   420	
   421			// remaining doubleword clears generated as needed
   422			//	MOVD	R0,(R3)
   423			//	MOVD	R0,8(R3)
   424			//	MOVD	R0,16(R3)
   425			//	MOVD	R0,24(R3)
   426	
   427			// one or more of these to clear remainder < 8 bytes
   428			//	MOVW	R0,n1(R3)
   429			//	MOVH	R0,n2(R3)
   430			//	MOVB	R0,n3(R3)
   431			{
   432				name:      "LoweredZero",
   433				aux:       "Int64",
   434				argLength: 2,
   435				reg: regInfo{
   436					inputs:   []regMask{buildReg("R3")},
   437					clobbers: buildReg("R3"),
   438				},
   439				clobberFlags:   true,
   440				typ:            "Mem",
   441				faultOnNilArg0: true,
   442			},
   443			// Loop code:
   444			//	MOVD len/32,REG_TMP  only for loop
   445			//	MOVD REG_TMP,CTR     only for loop
   446			// loop:
   447			//	MOVD (R4),R7
   448			//	MOVD 8(R4),R8
   449			//	MOVD 16(R4),R9
   450			//	MOVD 24(R4),R10
   451			//	ADD  R4,$32          only with loop
   452			//	MOVD R7,(R3)
   453			//	MOVD R8,8(R3)
   454			//	MOVD R9,16(R3)
   455			//	MOVD R10,24(R3)
   456			//	ADD  R3,$32          only with loop
   457			//	BC 16,0,loop         only with loop
   458			// Bytes not moved by this loop are moved
   459			// with a combination of the following instructions,
   460			// starting with the largest sizes and generating as
   461			// many as needed, using the appropriate offset value.
   462			//	MOVD  n(R4),R7
   463			//	MOVD  R7,n(R3)
   464			//	MOVW  n1(R4),R7
   465			//	MOVW  R7,n1(R3)
   466			//	MOVH  n2(R4),R7
   467			//	MOVH  R7,n2(R3)
   468			//	MOVB  n3(R4),R7
   469			//	MOVB  R7,n3(R3)
   470	
   471			{
   472				name:      "LoweredMove",
   473				aux:       "Int64",
   474				argLength: 3,
   475				reg: regInfo{
   476					inputs:   []regMask{buildReg("R3"), buildReg("R4")},
   477					clobbers: buildReg("R3 R4 R7 R8 R9 R10"),
   478				},
   479				clobberFlags:   true,
   480				typ:            "Mem",
   481				faultOnNilArg0: true,
   482				faultOnNilArg1: true,
   483			},
   484	
   485			{name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   486			{name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   487	
   488			{name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, typ: "UInt8", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   489			{name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   490			{name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   491			{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   492	
   493			// atomic add32, 64
   494			// SYNC
   495			// LDAR         (Rarg0), Rout
   496			// ADD		Rarg1, Rout
   497			// STDCCC       Rout, (Rarg0)
   498			// BNE          -3(PC)
   499			// ISYNC
   500			// return new sum
   501	
   502			{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   503			{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   504	
   505			// atomic exchange32, 64
   506			// SYNC
   507			// LDAR         (Rarg0), Rout
   508			// STDCCC       Rarg1, (Rarg0)
   509			// BNE          -2(PC)
   510			// ISYNC
   511			// return old val
   512	
   513			{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   514			{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   515	
   516			// atomic compare and swap.
   517			// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
   518			// if *arg0 == arg1 {
   519			//   *arg0 = arg2
   520			//   return (true, memory)
   521			// } else {
   522			//   return (false, memory)
   523			// }
   524			// SYNC
   525			// LDAR		(Rarg0), Rtmp
   526			// CMP		Rarg1, Rtmp
   527			// BNE		3(PC)
   528			// STDCCC	Rarg2, (Rarg0)
   529			// BNE		-4(PC)
   530			// CBNZ         Rtmp, -4(PC)
   531			// CSET         EQ, Rout
   532			{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   533			{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   534	
   535			// atomic 8 and/or.
   536			// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
   537			// LBAR		(Rarg0), Rtmp
   538			// AND/OR	Rarg1, Rtmp
   539			// STBCCC	Rtmp, (Rarg0), Rtmp
   540			// BNE		Rtmp, -3(PC)
   541	
   542			{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   543			{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   544	
   545			// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
   546			// It preserves R0 through R15, g, and its arguments R20 and R21,
   547			// but may clobber anything else, including R31 (REGTMP).
   548			{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
   549	
   550			// There are three of these functions so that they can have three different register inputs.
   551			// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
   552			// default registers to match so we don't need to copy registers around unnecessarily.
   553			{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r6}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   554			{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r5}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   555			{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   556	
   557			// (InvertFlags (CMP a b)) == (CMP b a)
   558			// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
   559			// then we do (LessThan (InvertFlags (CMP b a))) instead.
   560			// Rewrites will convert this to (GreaterThan (CMP b a)).
   561			// InvertFlags is a pseudo-op which can't appear in assembly output.
   562			{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   563	
   564			// Constant flag values. For any comparison, there are 3 possible
   565			// outcomes: either the three from the signed total order (<,==,>)
   566			// or the three from the unsigned total order, depending on which
   567			// comparison operation was used (CMP or CMPU -- PPC is different from
   568			// the other architectures, which have a single comparison producing
   569			// both signed and unsigned comparison results.)
   570	
   571			// These ops are for temporary use by rewrite rules. They
   572			// cannot appear in the generated assembly.
   573			{name: "FlagEQ"}, // equal
   574			{name: "FlagLT"}, // signed < or unsigned <
   575			{name: "FlagGT"}, // signed > or unsigned >
   576	
   577		}
   578	
   579		blocks := []blockData{
   580			{name: "EQ"},
   581			{name: "NE"},
   582			{name: "LT"},
   583			{name: "LE"},
   584			{name: "GT"},
   585			{name: "GE"},
   586			{name: "FLT"},
   587			{name: "FLE"},
   588			{name: "FGT"},
   589			{name: "FGE"},
   590		}
   591	
   592		archs = append(archs, arch{
   593			name:            "PPC64",
   594			pkg:             "cmd/internal/obj/ppc64",
   595			genfile:         "../../ppc64/ssa.go",
   596			ops:             ops,
   597			blocks:          blocks,
   598			regnames:        regNamesPPC64,
   599			gpregmask:       gp,
   600			fpregmask:       fp,
   601			framepointerreg: int8(num["SP"]),
   602			linkreg:         -1, // not used
   603		})
   604	}
   605
View as plain text