...

Source file src/pkg/cmd/vendor/golang.org/x/arch/x86/x86asm/decode.go

     1	// Copyright 2014 The Go Authors.  All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Table-driven decoding of x86 instructions.
     6	
     7	package x86asm
     8	
     9	import (
    10		"encoding/binary"
    11		"errors"
    12		"fmt"
    13		"runtime"
    14	)
    15	
    16	// Set trace to true to cause the decoder to print the PC sequence
    17	// of the executed instruction codes. This is typically only useful
    18	// when you are running a test of a single input case.
    19	const trace = false
    20	
    21	// A decodeOp is a single instruction in the decoder bytecode program.
    22	//
    23	// The decodeOps correspond to consuming and conditionally branching
    24	// on input bytes, consuming additional fields, and then interpreting
    25	// consumed data as instruction arguments. The names of the xRead and xArg
    26	// operations are taken from the Intel manual conventions, for example
    27	// Volume 2, Section 3.1.1, page 487 of
    28	// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
    29	//
    30	// The actual decoding program is generated by ../x86map.
    31	//
    32	// TODO(rsc): We may be able to merge various of the memory operands
    33	// since we don't care about, say, the distinction between m80dec and m80bcd.
    34	// Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
    35	
    36	type decodeOp uint16
    37	
    38	const (
    39		xFail  decodeOp = iota // invalid instruction (return)
    40		xMatch                 // completed match
    41		xJump                  // jump to pc
    42	
    43		xCondByte     // switch on instruction byte value
    44		xCondSlashR   // read and switch on instruction /r value
    45		xCondPrefix   // switch on presence of instruction prefix
    46		xCondIs64     // switch on 64-bit processor mode
    47		xCondDataSize // switch on operand size
    48		xCondAddrSize // switch on address size
    49		xCondIsMem    // switch on memory vs register argument
    50	
    51		xSetOp // set instruction opcode
    52	
    53		xReadSlashR // read /r
    54		xReadIb     // read ib
    55		xReadIw     // read iw
    56		xReadId     // read id
    57		xReadIo     // read io
    58		xReadCb     // read cb
    59		xReadCw     // read cw
    60		xReadCd     // read cd
    61		xReadCp     // read cp
    62		xReadCm     // read cm
    63	
    64		xArg1            // arg 1
    65		xArg3            // arg 3
    66		xArgAL           // arg AL
    67		xArgAX           // arg AX
    68		xArgCL           // arg CL
    69		xArgCR0dashCR7   // arg CR0-CR7
    70		xArgCS           // arg CS
    71		xArgDR0dashDR7   // arg DR0-DR7
    72		xArgDS           // arg DS
    73		xArgDX           // arg DX
    74		xArgEAX          // arg EAX
    75		xArgEDX          // arg EDX
    76		xArgES           // arg ES
    77		xArgFS           // arg FS
    78		xArgGS           // arg GS
    79		xArgImm16        // arg imm16
    80		xArgImm32        // arg imm32
    81		xArgImm64        // arg imm64
    82		xArgImm8         // arg imm8
    83		xArgImm8u        // arg imm8 but record as unsigned
    84		xArgImm16u       // arg imm8 but record as unsigned
    85		xArgM            // arg m
    86		xArgM128         // arg m128
    87		xArgM256         // arg m256
    88		xArgM1428byte    // arg m14/28byte
    89		xArgM16          // arg m16
    90		xArgM16and16     // arg m16&16
    91		xArgM16and32     // arg m16&32
    92		xArgM16and64     // arg m16&64
    93		xArgM16colon16   // arg m16:16
    94		xArgM16colon32   // arg m16:32
    95		xArgM16colon64   // arg m16:64
    96		xArgM16int       // arg m16int
    97		xArgM2byte       // arg m2byte
    98		xArgM32          // arg m32
    99		xArgM32and32     // arg m32&32
   100		xArgM32fp        // arg m32fp
   101		xArgM32int       // arg m32int
   102		xArgM512byte     // arg m512byte
   103		xArgM64          // arg m64
   104		xArgM64fp        // arg m64fp
   105		xArgM64int       // arg m64int
   106		xArgM8           // arg m8
   107		xArgM80bcd       // arg m80bcd
   108		xArgM80dec       // arg m80dec
   109		xArgM80fp        // arg m80fp
   110		xArgM94108byte   // arg m94/108byte
   111		xArgMm           // arg mm
   112		xArgMm1          // arg mm1
   113		xArgMm2          // arg mm2
   114		xArgMm2M64       // arg mm2/m64
   115		xArgMmM32        // arg mm/m32
   116		xArgMmM64        // arg mm/m64
   117		xArgMem          // arg mem
   118		xArgMoffs16      // arg moffs16
   119		xArgMoffs32      // arg moffs32
   120		xArgMoffs64      // arg moffs64
   121		xArgMoffs8       // arg moffs8
   122		xArgPtr16colon16 // arg ptr16:16
   123		xArgPtr16colon32 // arg ptr16:32
   124		xArgR16          // arg r16
   125		xArgR16op        // arg r16 with +rw in opcode
   126		xArgR32          // arg r32
   127		xArgR32M16       // arg r32/m16
   128		xArgR32M8        // arg r32/m8
   129		xArgR32op        // arg r32 with +rd in opcode
   130		xArgR64          // arg r64
   131		xArgR64M16       // arg r64/m16
   132		xArgR64op        // arg r64 with +rd in opcode
   133		xArgR8           // arg r8
   134		xArgR8op         // arg r8 with +rb in opcode
   135		xArgRAX          // arg RAX
   136		xArgRDX          // arg RDX
   137		xArgRM           // arg r/m
   138		xArgRM16         // arg r/m16
   139		xArgRM32         // arg r/m32
   140		xArgRM64         // arg r/m64
   141		xArgRM8          // arg r/m8
   142		xArgReg          // arg reg
   143		xArgRegM16       // arg reg/m16
   144		xArgRegM32       // arg reg/m32
   145		xArgRegM8        // arg reg/m8
   146		xArgRel16        // arg rel16
   147		xArgRel32        // arg rel32
   148		xArgRel8         // arg rel8
   149		xArgSS           // arg SS
   150		xArgST           // arg ST, aka ST(0)
   151		xArgSTi          // arg ST(i) with +i in opcode
   152		xArgSreg         // arg Sreg
   153		xArgTR0dashTR7   // arg TR0-TR7
   154		xArgXmm          // arg xmm
   155		xArgXMM0         // arg <XMM0>
   156		xArgXmm1         // arg xmm1
   157		xArgXmm2         // arg xmm2
   158		xArgXmm2M128     // arg xmm2/m128
   159		xArgYmm2M256     // arg ymm2/m256
   160		xArgXmm2M16      // arg xmm2/m16
   161		xArgXmm2M32      // arg xmm2/m32
   162		xArgXmm2M64      // arg xmm2/m64
   163		xArgXmmM128      // arg xmm/m128
   164		xArgXmmM32       // arg xmm/m32
   165		xArgXmmM64       // arg xmm/m64
   166		xArgYmm1         // arg ymm1
   167		xArgRmf16        // arg r/m16 but force mod=3
   168		xArgRmf32        // arg r/m32 but force mod=3
   169		xArgRmf64        // arg r/m64 but force mod=3
   170	)
   171	
   172	// instPrefix returns an Inst describing just one prefix byte.
   173	// It is only used if there is a prefix followed by an unintelligible
   174	// or invalid instruction byte sequence.
   175	func instPrefix(b byte, mode int) (Inst, error) {
   176		// When tracing it is useful to see what called instPrefix to report an error.
   177		if trace {
   178			_, file, line, _ := runtime.Caller(1)
   179			fmt.Printf("%s:%d\n", file, line)
   180		}
   181		p := Prefix(b)
   182		switch p {
   183		case PrefixDataSize:
   184			if mode == 16 {
   185				p = PrefixData32
   186			} else {
   187				p = PrefixData16
   188			}
   189		case PrefixAddrSize:
   190			if mode == 32 {
   191				p = PrefixAddr16
   192			} else {
   193				p = PrefixAddr32
   194			}
   195		}
   196		// Note: using composite literal with Prefix key confuses 'bundle' tool.
   197		inst := Inst{Len: 1}
   198		inst.Prefix = Prefixes{p}
   199		return inst, nil
   200	}
   201	
   202	// truncated reports a truncated instruction.
   203	// For now we use instPrefix but perhaps later we will return
   204	// a specific error here.
   205	func truncated(src []byte, mode int) (Inst, error) {
   206		if len(src) == 0 {
   207			return Inst{}, ErrTruncated
   208		}
   209		return instPrefix(src[0], mode) // too long
   210	}
   211	
   212	// These are the errors returned by Decode.
   213	var (
   214		ErrInvalidMode  = errors.New("invalid x86 mode in Decode")
   215		ErrTruncated    = errors.New("truncated instruction")
   216		ErrUnrecognized = errors.New("unrecognized instruction")
   217	)
   218	
   219	// decoderCover records coverage information for which parts
   220	// of the byte code have been executed.
   221	var decoderCover []bool
   222	
   223	// Decode decodes the leading bytes in src as a single instruction.
   224	// The mode arguments specifies the assumed processor mode:
   225	// 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
   226	func Decode(src []byte, mode int) (inst Inst, err error) {
   227		return decode1(src, mode, false)
   228	}
   229	
   230	// decode1 is the implementation of Decode but takes an extra
   231	// gnuCompat flag to cause it to change its behavior to mimic
   232	// bugs (or at least unique features) of GNU libopcodes as used
   233	// by objdump. We don't believe that logic is the right thing to do
   234	// in general, but when testing against libopcodes it simplifies the
   235	// comparison if we adjust a few small pieces of logic.
   236	// The affected logic is in the conditional branch for "mandatory" prefixes,
   237	// case xCondPrefix.
   238	func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
   239		switch mode {
   240		case 16, 32, 64:
   241			// ok
   242			// TODO(rsc): 64-bit mode not tested, probably not working.
   243		default:
   244			return Inst{}, ErrInvalidMode
   245		}
   246	
   247		// Maximum instruction size is 15 bytes.
   248		// If we need to read more, return 'truncated instruction.
   249		if len(src) > 15 {
   250			src = src[:15]
   251		}
   252	
   253		var (
   254			// prefix decoding information
   255			pos           = 0    // position reading src
   256			nprefix       = 0    // number of prefixes
   257			lockIndex     = -1   // index of LOCK prefix in src and inst.Prefix
   258			repIndex      = -1   // index of REP/REPN prefix in src and inst.Prefix
   259			segIndex      = -1   // index of Group 2 prefix in src and inst.Prefix
   260			dataSizeIndex = -1   // index of Group 3 prefix in src and inst.Prefix
   261			addrSizeIndex = -1   // index of Group 4 prefix in src and inst.Prefix
   262			rex           Prefix // rex byte if present (or 0)
   263			rexUsed       Prefix // bits used in rex byte
   264			rexIndex      = -1   // index of rex byte
   265			vex           Prefix // use vex encoding
   266			vexIndex      = -1   // index of vex prefix
   267	
   268			addrMode = mode // address mode (width in bits)
   269			dataMode = mode // operand mode (width in bits)
   270	
   271			// decoded ModR/M fields
   272			haveModrm bool
   273			modrm     int
   274			mod       int
   275			regop     int
   276			rm        int
   277	
   278			// if ModR/M is memory reference, Mem form
   279			mem     Mem
   280			haveMem bool
   281	
   282			// decoded SIB fields
   283			haveSIB bool
   284			sib     int
   285			scale   int
   286			index   int
   287			base    int
   288			displen int
   289			dispoff int
   290	
   291			// decoded immediate values
   292			imm     int64
   293			imm8    int8
   294			immc    int64
   295			immcpos int
   296	
   297			// output
   298			opshift int
   299			inst    Inst
   300			narg    int // number of arguments written to inst
   301		)
   302	
   303		if mode == 64 {
   304			dataMode = 32
   305		}
   306	
   307		// Prefixes are certainly the most complex and underspecified part of
   308		// decoding x86 instructions. Although the manuals say things like
   309		// up to four prefixes, one from each group, nearly everyone seems to
   310		// agree that in practice as many prefixes as possible, including multiple
   311		// from a particular group or repetitions of a given prefix, can be used on
   312		// an instruction, provided the total instruction length including prefixes
   313		// does not exceed the agreed-upon maximum of 15 bytes.
   314		// Everyone also agrees that if one of these prefixes is the LOCK prefix
   315		// and the instruction is not one of the instructions that can be used with
   316		// the LOCK prefix or if the destination is not a memory operand,
   317		// then the instruction is invalid and produces the #UD exception.
   318		// However, that is the end of any semblance of agreement.
   319		//
   320		// What happens if prefixes are given that conflict with other prefixes?
   321		// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
   322		// conflict with each other: only one segment can be in effect.
   323		// Disassemblers seem to agree that later prefixes take priority over
   324		// earlier ones. I have not taken the time to write assembly programs
   325		// to check to see if the hardware agrees.
   326		//
   327		// What happens if prefixes are given that have no meaning for the
   328		// specific instruction to which they are attached? It depends.
   329		// If they really have no meaning, they are ignored. However, a future
   330		// processor may assign a different meaning. As a disassembler, we
   331		// don't really know whether we're seeing a meaningless prefix or one
   332		// whose meaning we simply haven't been told yet.
   333		//
   334		// Combining the two questions, what happens when conflicting
   335		// extension prefixes are given? No one seems to know for sure.
   336		// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
   337		// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
   338		// Which prefix wins? See the xCondPrefix prefix for more.
   339		//
   340		// Writing assembly test cases to divine which interpretation the
   341		// CPU uses might clarify the situation, but more likely it would
   342		// make the situation even less clear.
   343	
   344		// Read non-REX prefixes.
   345	ReadPrefixes:
   346		for ; pos < len(src); pos++ {
   347			p := Prefix(src[pos])
   348			switch p {
   349			default:
   350				nprefix = pos
   351				break ReadPrefixes
   352	
   353			// Group 1 - lock and repeat prefixes
   354			// According to Intel, there should only be one from this set,
   355			// but according to AMD both can be present.
   356			case 0xF0:
   357				if lockIndex >= 0 {
   358					inst.Prefix[lockIndex] |= PrefixIgnored
   359				}
   360				lockIndex = pos
   361			case 0xF2, 0xF3:
   362				if repIndex >= 0 {
   363					inst.Prefix[repIndex] |= PrefixIgnored
   364				}
   365				repIndex = pos
   366	
   367			// Group 2 - segment override / branch hints
   368			case 0x26, 0x2E, 0x36, 0x3E:
   369				if mode == 64 {
   370					p |= PrefixIgnored
   371					break
   372				}
   373				fallthrough
   374			case 0x64, 0x65:
   375				if segIndex >= 0 {
   376					inst.Prefix[segIndex] |= PrefixIgnored
   377				}
   378				segIndex = pos
   379	
   380			// Group 3 - operand size override
   381			case 0x66:
   382				if mode == 16 {
   383					dataMode = 32
   384					p = PrefixData32
   385				} else {
   386					dataMode = 16
   387					p = PrefixData16
   388				}
   389				if dataSizeIndex >= 0 {
   390					inst.Prefix[dataSizeIndex] |= PrefixIgnored
   391				}
   392				dataSizeIndex = pos
   393	
   394			// Group 4 - address size override
   395			case 0x67:
   396				if mode == 32 {
   397					addrMode = 16
   398					p = PrefixAddr16
   399				} else {
   400					addrMode = 32
   401					p = PrefixAddr32
   402				}
   403				if addrSizeIndex >= 0 {
   404					inst.Prefix[addrSizeIndex] |= PrefixIgnored
   405				}
   406				addrSizeIndex = pos
   407	
   408			//Group 5 - Vex encoding
   409			case 0xC5:
   410				if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
   411					vex = p
   412					vexIndex = pos
   413					inst.Prefix[pos] = p
   414					inst.Prefix[pos+1] = Prefix(src[pos+1])
   415					pos += 1
   416					continue
   417				} else {
   418					nprefix = pos
   419					break ReadPrefixes
   420				}
   421			case 0xC4:
   422				if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
   423					vex = p
   424					vexIndex = pos
   425					inst.Prefix[pos] = p
   426					inst.Prefix[pos+1] = Prefix(src[pos+1])
   427					inst.Prefix[pos+2] = Prefix(src[pos+2])
   428					pos += 2
   429					continue
   430				} else {
   431					nprefix = pos
   432					break ReadPrefixes
   433				}
   434			}
   435	
   436			if pos >= len(inst.Prefix) {
   437				return instPrefix(src[0], mode) // too long
   438			}
   439	
   440			inst.Prefix[pos] = p
   441		}
   442	
   443		// Read REX prefix.
   444		if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 {
   445			rex = Prefix(src[pos])
   446			rexIndex = pos
   447			if pos >= len(inst.Prefix) {
   448				return instPrefix(src[0], mode) // too long
   449			}
   450			inst.Prefix[pos] = rex
   451			pos++
   452			if rex&PrefixREXW != 0 {
   453				dataMode = 64
   454				if dataSizeIndex >= 0 {
   455					inst.Prefix[dataSizeIndex] |= PrefixIgnored
   456				}
   457			}
   458		}
   459	
   460		// Decode instruction stream, interpreting decoding instructions.
   461		// opshift gives the shift to use when saving the next
   462		// opcode byte into inst.Opcode.
   463		opshift = 24
   464	
   465		// Decode loop, executing decoder program.
   466		var oldPC, prevPC int
   467	Decode:
   468		for pc := 1; ; { // TODO uint
   469			oldPC = prevPC
   470			prevPC = pc
   471			if trace {
   472				println("run", pc)
   473			}
   474			x := decoder[pc]
   475			if decoderCover != nil {
   476				decoderCover[pc] = true
   477			}
   478			pc++
   479	
   480			// Read and decode ModR/M if needed by opcode.
   481			switch decodeOp(x) {
   482			case xCondSlashR, xReadSlashR:
   483				if haveModrm {
   484					return Inst{Len: pos}, errInternal
   485				}
   486				haveModrm = true
   487				if pos >= len(src) {
   488					return truncated(src, mode)
   489				}
   490				modrm = int(src[pos])
   491				pos++
   492				if opshift >= 0 {
   493					inst.Opcode |= uint32(modrm) << uint(opshift)
   494					opshift -= 8
   495				}
   496				mod = modrm >> 6
   497				regop = (modrm >> 3) & 07
   498				rm = modrm & 07
   499				if rex&PrefixREXR != 0 {
   500					rexUsed |= PrefixREXR
   501					regop |= 8
   502				}
   503				if addrMode == 16 {
   504					// 16-bit modrm form
   505					if mod != 3 {
   506						haveMem = true
   507						mem = addr16[rm]
   508						if rm == 6 && mod == 0 {
   509							mem.Base = 0
   510						}
   511	
   512						// Consume disp16 if present.
   513						if mod == 0 && rm == 6 || mod == 2 {
   514							if pos+2 > len(src) {
   515								return truncated(src, mode)
   516							}
   517							mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
   518							pos += 2
   519						}
   520	
   521						// Consume disp8 if present.
   522						if mod == 1 {
   523							if pos >= len(src) {
   524								return truncated(src, mode)
   525							}
   526							mem.Disp = int64(int8(src[pos]))
   527							pos++
   528						}
   529					}
   530				} else {
   531					haveMem = mod != 3
   532	
   533					// 32-bit or 64-bit form
   534					// Consume SIB encoding if present.
   535					if rm == 4 && mod != 3 {
   536						haveSIB = true
   537						if pos >= len(src) {
   538							return truncated(src, mode)
   539						}
   540						sib = int(src[pos])
   541						pos++
   542						if opshift >= 0 {
   543							inst.Opcode |= uint32(sib) << uint(opshift)
   544							opshift -= 8
   545						}
   546						scale = sib >> 6
   547						index = (sib >> 3) & 07
   548						base = sib & 07
   549						if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 {
   550							rexUsed |= PrefixREXB
   551							base |= 8
   552						}
   553						if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 {
   554							rexUsed |= PrefixREXX
   555							index |= 8
   556						}
   557	
   558						mem.Scale = 1 << uint(scale)
   559						if index == 4 {
   560							// no mem.Index
   561						} else {
   562							mem.Index = baseRegForBits(addrMode) + Reg(index)
   563						}
   564						if base&7 == 5 && mod == 0 {
   565							// no mem.Base
   566						} else {
   567							mem.Base = baseRegForBits(addrMode) + Reg(base)
   568						}
   569					} else {
   570						if rex&PrefixREXB != 0 {
   571							rexUsed |= PrefixREXB
   572							rm |= 8
   573						}
   574						if mod == 0 && rm&7 == 5 || rm&7 == 4 {
   575							// base omitted
   576						} else if mod != 3 {
   577							mem.Base = baseRegForBits(addrMode) + Reg(rm)
   578						}
   579					}
   580	
   581					// Consume disp32 if present.
   582					if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
   583						if pos+4 > len(src) {
   584							return truncated(src, mode)
   585						}
   586						dispoff = pos
   587						displen = 4
   588						mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
   589						pos += 4
   590					}
   591	
   592					// Consume disp8 if present.
   593					if mod == 1 {
   594						if pos >= len(src) {
   595							return truncated(src, mode)
   596						}
   597						dispoff = pos
   598						displen = 1
   599						mem.Disp = int64(int8(src[pos]))
   600						pos++
   601					}
   602	
   603					// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
   604					// See Vol 2A. Table 2-7.
   605					if mode == 64 && mod == 0 && rm&7 == 5 {
   606						if addrMode == 32 {
   607							mem.Base = EIP
   608						} else {
   609							mem.Base = RIP
   610						}
   611					}
   612				}
   613	
   614				if segIndex >= 0 {
   615					mem.Segment = prefixToSegment(inst.Prefix[segIndex])
   616				}
   617			}
   618	
   619			// Execute single opcode.
   620			switch decodeOp(x) {
   621			default:
   622				println("bad op", x, "at", pc-1, "from", oldPC)
   623				return Inst{Len: pos}, errInternal
   624	
   625			case xFail:
   626				inst.Op = 0
   627				break Decode
   628	
   629			case xMatch:
   630				break Decode
   631	
   632			case xJump:
   633				pc = int(decoder[pc])
   634	
   635			// Conditional branches.
   636	
   637			case xCondByte:
   638				if pos >= len(src) {
   639					return truncated(src, mode)
   640				}
   641				b := src[pos]
   642				n := int(decoder[pc])
   643				pc++
   644				for i := 0; i < n; i++ {
   645					xb, xpc := decoder[pc], int(decoder[pc+1])
   646					pc += 2
   647					if b == byte(xb) {
   648						pc = xpc
   649						pos++
   650						if opshift >= 0 {
   651							inst.Opcode |= uint32(b) << uint(opshift)
   652							opshift -= 8
   653						}
   654						continue Decode
   655					}
   656				}
   657				// xCondByte is the only conditional with a fall through,
   658				// so that it can be used to pick off special cases before
   659				// an xCondSlash. If the fallthrough instruction is xFail,
   660				// advance the position so that the decoded instruction
   661				// size includes the byte we just compared against.
   662				if decodeOp(decoder[pc]) == xJump {
   663					pc = int(decoder[pc+1])
   664				}
   665				if decodeOp(decoder[pc]) == xFail {
   666					pos++
   667				}
   668	
   669			case xCondIs64:
   670				if mode == 64 {
   671					pc = int(decoder[pc+1])
   672				} else {
   673					pc = int(decoder[pc])
   674				}
   675	
   676			case xCondIsMem:
   677				mem := haveMem
   678				if !haveModrm {
   679					if pos >= len(src) {
   680						return instPrefix(src[0], mode) // too long
   681					}
   682					mem = src[pos]>>6 != 3
   683				}
   684				if mem {
   685					pc = int(decoder[pc+1])
   686				} else {
   687					pc = int(decoder[pc])
   688				}
   689	
   690			case xCondDataSize:
   691				switch dataMode {
   692				case 16:
   693					if dataSizeIndex >= 0 {
   694						inst.Prefix[dataSizeIndex] |= PrefixImplicit
   695					}
   696					pc = int(decoder[pc])
   697				case 32:
   698					if dataSizeIndex >= 0 {
   699						inst.Prefix[dataSizeIndex] |= PrefixImplicit
   700					}
   701					pc = int(decoder[pc+1])
   702				case 64:
   703					rexUsed |= PrefixREXW
   704					pc = int(decoder[pc+2])
   705				}
   706	
   707			case xCondAddrSize:
   708				switch addrMode {
   709				case 16:
   710					if addrSizeIndex >= 0 {
   711						inst.Prefix[addrSizeIndex] |= PrefixImplicit
   712					}
   713					pc = int(decoder[pc])
   714				case 32:
   715					if addrSizeIndex >= 0 {
   716						inst.Prefix[addrSizeIndex] |= PrefixImplicit
   717					}
   718					pc = int(decoder[pc+1])
   719				case 64:
   720					pc = int(decoder[pc+2])
   721				}
   722	
   723			case xCondPrefix:
   724				// Conditional branch based on presence or absence of prefixes.
   725				// The conflict cases here are completely undocumented and
   726				// differ significantly between GNU libopcodes and Intel xed.
   727				// I have not written assembly code to divine what various CPUs
   728				// do, but it wouldn't surprise me if they are not consistent either.
   729				//
   730				// The basic idea is to switch on the presence of a prefix, so that
   731				// for example:
   732				//
   733				//	xCondPrefix, 4
   734				//	0xF3, 123,
   735				//	0xF2, 234,
   736				//	0x66, 345,
   737				//	0, 456
   738				//
   739				// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
   740				// is present, 66 if the 345 prefix is present, and 456 otherwise.
   741				// The prefixes are given in descending order so that the 0 will be last.
   742				//
   743				// It is unclear what should happen if multiple conditions are
   744				// satisfied: what if F2 and F3 are both present, or if 66 and F2
   745				// are present, or if all three are present? The one chosen becomes
   746				// part of the opcode and the others do not. Perhaps the answer
   747				// depends on the specific opcodes in question.
   748				//
   749				// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
   750				// it comes in 16-bit and 32-bit forms based on the 66 prefix,
   751				// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
   752				// with the 66 being only an operand size override, and probably
   753				// F2 66 0F 38 F1 /r should be treated the same.
   754				// Perhaps that rule is specific to the case of CRC32, since no
   755				// 66 0F 38 F1 instruction is defined (today) (that we know of).
   756				// However, both libopcodes and xed seem to generalize this
   757				// example and choose F2/F3 in preference to 66, and we
   758				// do the same.
   759				//
   760				// Next, what if both F2 and F3 are present? Which wins?
   761				// The Intel xed rule, and ours, is that the one that occurs last wins.
   762				// The GNU libopcodes rule, which we implement only in gnuCompat mode,
   763				// is that F3 beats F2 unless F3 has no special meaning, in which
   764				// case F3 can be a modified on an F2 special meaning.
   765				//
   766				// Concretely,
   767				//	66 0F D6 /r is MOVQ
   768				//	F2 0F D6 /r is MOVDQ2Q
   769				//	F3 0F D6 /r is MOVQ2DQ.
   770				//
   771				//	F2 66 0F D6 /r is 66 + MOVDQ2Q always.
   772				//	66 F2 0F D6 /r is 66 + MOVDQ2Q always.
   773				//	F3 66 0F D6 /r is 66 + MOVQ2DQ always.
   774				//	66 F3 0F D6 /r is 66 + MOVQ2DQ always.
   775				//	F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
   776				//	F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
   777				//	Adding 66 anywhere in the prefix section of the
   778				//	last two cases does not change the outcome.
   779				//
   780				// Finally, what if there is a variant in which 66 is a mandatory
   781				// prefix rather than an operand size override, but we know of
   782				// no corresponding F2/F3 form, and we see both F2/F3 and 66.
   783				// Does F2/F3 still take priority, so that the result is an unknown
   784				// instruction, or does the 66 take priority, so that the extended
   785				// 66 instruction should be interpreted as having a REP/REPN prefix?
   786				// Intel xed does the former and GNU libopcodes does the latter.
   787				// We side with Intel xed, unless we are trying to match libopcodes
   788				// more closely during the comparison-based test suite.
   789				//
   790				// In 64-bit mode REX.W is another valid prefix to test for, but
   791				// there is less ambiguity about that. When present, REX.W is
   792				// always the first entry in the table.
   793				n := int(decoder[pc])
   794				pc++
   795				sawF3 := false
   796				for j := 0; j < n; j++ {
   797					prefix := Prefix(decoder[pc+2*j])
   798					if prefix.IsREX() {
   799						rexUsed |= prefix
   800						if rex&prefix == prefix {
   801							pc = int(decoder[pc+2*j+1])
   802							continue Decode
   803						}
   804						continue
   805					}
   806					ok := false
   807					if prefix == 0 {
   808						ok = true
   809					} else if prefix.IsREX() {
   810						rexUsed |= prefix
   811						if rex&prefix == prefix {
   812							ok = true
   813						}
   814					} else if prefix == 0xC5 || prefix == 0xC4 {
   815						if vex == prefix {
   816							ok = true
   817						}
   818					} else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A ||
   819						prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) {
   820						var vexM, vexP Prefix
   821						if vex == 0xC5 {
   822							vexM = 1 // 2 byte vex always implies 0F
   823							vexP = inst.Prefix[vexIndex+1]
   824						} else {
   825							vexM = inst.Prefix[vexIndex+1]
   826							vexP = inst.Prefix[vexIndex+2]
   827						}
   828						switch prefix {
   829						case 0x66:
   830							ok = vexP&3 == 1
   831						case 0xF3:
   832							ok = vexP&3 == 2
   833						case 0xF2:
   834							ok = vexP&3 == 3
   835						case 0x0F:
   836							ok = vexM&3 == 1
   837						case 0x0F38:
   838							ok = vexM&3 == 2
   839						case 0x0F3A:
   840							ok = vexM&3 == 3
   841						}
   842					} else {
   843						if prefix == 0xF3 {
   844							sawF3 = true
   845						}
   846						switch prefix {
   847						case PrefixLOCK:
   848							if lockIndex >= 0 {
   849								inst.Prefix[lockIndex] |= PrefixImplicit
   850								ok = true
   851							}
   852						case PrefixREP, PrefixREPN:
   853							if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
   854								inst.Prefix[repIndex] |= PrefixImplicit
   855								ok = true
   856							}
   857							if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
   858								// Check to see if earlier prefix F3 is present.
   859								for i := repIndex - 1; i >= 0; i-- {
   860									if inst.Prefix[i]&0xFF == prefix {
   861										inst.Prefix[i] |= PrefixImplicit
   862										ok = true
   863									}
   864								}
   865							}
   866							if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
   867								// Check to see if earlier prefix F2 is present.
   868								for i := repIndex - 1; i >= 0; i-- {
   869									if inst.Prefix[i]&0xFF == prefix {
   870										inst.Prefix[i] |= PrefixImplicit
   871										ok = true
   872									}
   873								}
   874							}
   875						case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
   876							if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
   877								inst.Prefix[segIndex] |= PrefixImplicit
   878								ok = true
   879							}
   880						case PrefixDataSize:
   881							// Looking for 66 mandatory prefix.
   882							// The F2/F3 mandatory prefixes take priority when both are present.
   883							// If we got this far in the xCondPrefix table and an F2/F3 is present,
   884							// it means the table didn't have any entry for that prefix. But if 66 has
   885							// special meaning, perhaps F2/F3 have special meaning that we don't know.
   886							// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
   887							// GNU libopcodes allows the 66 to match. We do what Intel xed does
   888							// except in gnuCompat mode.
   889							if repIndex >= 0 && !gnuCompat {
   890								inst.Op = 0
   891								break Decode
   892							}
   893							if dataSizeIndex >= 0 {
   894								inst.Prefix[dataSizeIndex] |= PrefixImplicit
   895								ok = true
   896							}
   897						case PrefixAddrSize:
   898							if addrSizeIndex >= 0 {
   899								inst.Prefix[addrSizeIndex] |= PrefixImplicit
   900								ok = true
   901							}
   902						}
   903					}
   904					if ok {
   905						pc = int(decoder[pc+2*j+1])
   906						continue Decode
   907					}
   908				}
   909				inst.Op = 0
   910				break Decode
   911	
   912			case xCondSlashR:
   913				pc = int(decoder[pc+regop&7])
   914	
   915			// Input.
   916	
   917			case xReadSlashR:
   918				// done above
   919	
   920			case xReadIb:
   921				if pos >= len(src) {
   922					return truncated(src, mode)
   923				}
   924				imm8 = int8(src[pos])
   925				pos++
   926	
   927			case xReadIw:
   928				if pos+2 > len(src) {
   929					return truncated(src, mode)
   930				}
   931				imm = int64(binary.LittleEndian.Uint16(src[pos:]))
   932				pos += 2
   933	
   934			case xReadId:
   935				if pos+4 > len(src) {
   936					return truncated(src, mode)
   937				}
   938				imm = int64(binary.LittleEndian.Uint32(src[pos:]))
   939				pos += 4
   940	
   941			case xReadIo:
   942				if pos+8 > len(src) {
   943					return truncated(src, mode)
   944				}
   945				imm = int64(binary.LittleEndian.Uint64(src[pos:]))
   946				pos += 8
   947	
   948			case xReadCb:
   949				if pos >= len(src) {
   950					return truncated(src, mode)
   951				}
   952				immcpos = pos
   953				immc = int64(src[pos])
   954				pos++
   955	
   956			case xReadCw:
   957				if pos+2 > len(src) {
   958					return truncated(src, mode)
   959				}
   960				immcpos = pos
   961				immc = int64(binary.LittleEndian.Uint16(src[pos:]))
   962				pos += 2
   963	
   964			case xReadCm:
   965				immcpos = pos
   966				if addrMode == 16 {
   967					if pos+2 > len(src) {
   968						return truncated(src, mode)
   969					}
   970					immc = int64(binary.LittleEndian.Uint16(src[pos:]))
   971					pos += 2
   972				} else if addrMode == 32 {
   973					if pos+4 > len(src) {
   974						return truncated(src, mode)
   975					}
   976					immc = int64(binary.LittleEndian.Uint32(src[pos:]))
   977					pos += 4
   978				} else {
   979					if pos+8 > len(src) {
   980						return truncated(src, mode)
   981					}
   982					immc = int64(binary.LittleEndian.Uint64(src[pos:]))
   983					pos += 8
   984				}
   985			case xReadCd:
   986				immcpos = pos
   987				if pos+4 > len(src) {
   988					return truncated(src, mode)
   989				}
   990				immc = int64(binary.LittleEndian.Uint32(src[pos:]))
   991				pos += 4
   992	
   993			case xReadCp:
   994				immcpos = pos
   995				if pos+6 > len(src) {
   996					return truncated(src, mode)
   997				}
   998				w := binary.LittleEndian.Uint32(src[pos:])
   999				w2 := binary.LittleEndian.Uint16(src[pos+4:])
  1000				immc = int64(w2)<<32 | int64(w)
  1001				pos += 6
  1002	
  1003			// Output.
  1004	
  1005			case xSetOp:
  1006				inst.Op = Op(decoder[pc])
  1007				pc++
  1008	
  1009			case xArg1,
  1010				xArg3,
  1011				xArgAL,
  1012				xArgAX,
  1013				xArgCL,
  1014				xArgCS,
  1015				xArgDS,
  1016				xArgDX,
  1017				xArgEAX,
  1018				xArgEDX,
  1019				xArgES,
  1020				xArgFS,
  1021				xArgGS,
  1022				xArgRAX,
  1023				xArgRDX,
  1024				xArgSS,
  1025				xArgST,
  1026				xArgXMM0:
  1027				inst.Args[narg] = fixedArg[x]
  1028				narg++
  1029	
  1030			case xArgImm8:
  1031				inst.Args[narg] = Imm(imm8)
  1032				narg++
  1033	
  1034			case xArgImm8u:
  1035				inst.Args[narg] = Imm(uint8(imm8))
  1036				narg++
  1037	
  1038			case xArgImm16:
  1039				inst.Args[narg] = Imm(int16(imm))
  1040				narg++
  1041	
  1042			case xArgImm16u:
  1043				inst.Args[narg] = Imm(uint16(imm))
  1044				narg++
  1045	
  1046			case xArgImm32:
  1047				inst.Args[narg] = Imm(int32(imm))
  1048				narg++
  1049	
  1050			case xArgImm64:
  1051				inst.Args[narg] = Imm(imm)
  1052				narg++
  1053	
  1054			case xArgM,
  1055				xArgM128,
  1056				xArgM256,
  1057				xArgM1428byte,
  1058				xArgM16,
  1059				xArgM16and16,
  1060				xArgM16and32,
  1061				xArgM16and64,
  1062				xArgM16colon16,
  1063				xArgM16colon32,
  1064				xArgM16colon64,
  1065				xArgM16int,
  1066				xArgM2byte,
  1067				xArgM32,
  1068				xArgM32and32,
  1069				xArgM32fp,
  1070				xArgM32int,
  1071				xArgM512byte,
  1072				xArgM64,
  1073				xArgM64fp,
  1074				xArgM64int,
  1075				xArgM8,
  1076				xArgM80bcd,
  1077				xArgM80dec,
  1078				xArgM80fp,
  1079				xArgM94108byte,
  1080				xArgMem:
  1081				if !haveMem {
  1082					inst.Op = 0
  1083					break Decode
  1084				}
  1085				inst.Args[narg] = mem
  1086				inst.MemBytes = int(memBytes[decodeOp(x)])
  1087				if mem.Base == RIP {
  1088					inst.PCRel = displen
  1089					inst.PCRelOff = dispoff
  1090				}
  1091				narg++
  1092	
  1093			case xArgPtr16colon16:
  1094				inst.Args[narg] = Imm(immc >> 16)
  1095				inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
  1096				narg += 2
  1097	
  1098			case xArgPtr16colon32:
  1099				inst.Args[narg] = Imm(immc >> 32)
  1100				inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
  1101				narg += 2
  1102	
  1103			case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
  1104				// TODO(rsc): Can address be 64 bits?
  1105				mem = Mem{Disp: int64(immc)}
  1106				if segIndex >= 0 {
  1107					mem.Segment = prefixToSegment(inst.Prefix[segIndex])
  1108					inst.Prefix[segIndex] |= PrefixImplicit
  1109				}
  1110				inst.Args[narg] = mem
  1111				inst.MemBytes = int(memBytes[decodeOp(x)])
  1112				if mem.Base == RIP {
  1113					inst.PCRel = displen
  1114					inst.PCRelOff = dispoff
  1115				}
  1116				narg++
  1117	
  1118			case xArgYmm1:
  1119				base := baseReg[x]
  1120				index := Reg(regop)
  1121				if inst.Prefix[vexIndex+1]&0x80 == 0 {
  1122					index += 8
  1123				}
  1124				inst.Args[narg] = base + index
  1125				narg++
  1126	
  1127			case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
  1128				base := baseReg[x]
  1129				index := Reg(regop)
  1130				if rex != 0 && base == AL && index >= 4 {
  1131					rexUsed |= PrefixREX
  1132					index -= 4
  1133					base = SPB
  1134				}
  1135				inst.Args[narg] = base + index
  1136				narg++
  1137	
  1138			case xArgMm, xArgMm1, xArgTR0dashTR7:
  1139				inst.Args[narg] = baseReg[x] + Reg(regop&7)
  1140				narg++
  1141	
  1142			case xArgCR0dashCR7:
  1143				// AMD documents an extension that the LOCK prefix
  1144				// can be used in place of a REX prefix in order to access
  1145				// CR8 from 32-bit mode. The LOCK prefix is allowed in
  1146				// all modes, provided the corresponding CPUID bit is set.
  1147				if lockIndex >= 0 {
  1148					inst.Prefix[lockIndex] |= PrefixImplicit
  1149					regop += 8
  1150				}
  1151				inst.Args[narg] = CR0 + Reg(regop)
  1152				narg++
  1153	
  1154			case xArgSreg:
  1155				regop &= 7
  1156				if regop >= 6 {
  1157					inst.Op = 0
  1158					break Decode
  1159				}
  1160				inst.Args[narg] = ES + Reg(regop)
  1161				narg++
  1162	
  1163			case xArgRmf16, xArgRmf32, xArgRmf64:
  1164				base := baseReg[x]
  1165				index := Reg(modrm & 07)
  1166				if rex&PrefixREXB != 0 {
  1167					rexUsed |= PrefixREXB
  1168					index += 8
  1169				}
  1170				inst.Args[narg] = base + index
  1171				narg++
  1172	
  1173			case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
  1174				n := inst.Opcode >> uint(opshift+8) & 07
  1175				base := baseReg[x]
  1176				index := Reg(n)
  1177				if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
  1178					rexUsed |= PrefixREXB
  1179					index += 8
  1180				}
  1181				if rex != 0 && base == AL && index >= 4 {
  1182					rexUsed |= PrefixREX
  1183					index -= 4
  1184					base = SPB
  1185				}
  1186				inst.Args[narg] = base + index
  1187				narg++
  1188			case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
  1189				xArgMmM32, xArgMmM64, xArgMm2M64,
  1190				xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128,
  1191				xArgYmm2M256:
  1192				if haveMem {
  1193					inst.Args[narg] = mem
  1194					inst.MemBytes = int(memBytes[decodeOp(x)])
  1195					if mem.Base == RIP {
  1196						inst.PCRel = displen
  1197						inst.PCRelOff = dispoff
  1198					}
  1199				} else {
  1200					base := baseReg[x]
  1201					index := Reg(rm)
  1202					switch decodeOp(x) {
  1203					case xArgMmM32, xArgMmM64, xArgMm2M64:
  1204						// There are only 8 MMX registers, so these ignore the REX.X bit.
  1205						index &= 7
  1206					case xArgRM8:
  1207						if rex != 0 && index >= 4 {
  1208							rexUsed |= PrefixREX
  1209							index -= 4
  1210							base = SPB
  1211						}
  1212					case xArgYmm2M256:
  1213						if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 {
  1214							index += 8
  1215						}
  1216					}
  1217					inst.Args[narg] = base + index
  1218				}
  1219				narg++
  1220	
  1221			case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1222				if haveMem {
  1223					inst.Op = 0
  1224					break Decode
  1225				}
  1226				inst.Args[narg] = baseReg[x] + Reg(rm&7)
  1227				narg++
  1228	
  1229			case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1230				if haveMem {
  1231					inst.Op = 0
  1232					break Decode
  1233				}
  1234				inst.Args[narg] = baseReg[x] + Reg(rm)
  1235				narg++
  1236	
  1237			case xArgRel8:
  1238				inst.PCRelOff = immcpos
  1239				inst.PCRel = 1
  1240				inst.Args[narg] = Rel(int8(immc))
  1241				narg++
  1242	
  1243			case xArgRel16:
  1244				inst.PCRelOff = immcpos
  1245				inst.PCRel = 2
  1246				inst.Args[narg] = Rel(int16(immc))
  1247				narg++
  1248	
  1249			case xArgRel32:
  1250				inst.PCRelOff = immcpos
  1251				inst.PCRel = 4
  1252				inst.Args[narg] = Rel(int32(immc))
  1253				narg++
  1254			}
  1255		}
  1256	
  1257		if inst.Op == 0 {
  1258			// Invalid instruction.
  1259			if nprefix > 0 {
  1260				return instPrefix(src[0], mode) // invalid instruction
  1261			}
  1262			return Inst{Len: pos}, ErrUnrecognized
  1263		}
  1264	
  1265		// Matched! Hooray!
  1266	
  1267		// 90 decodes as XCHG EAX, EAX but is NOP.
  1268		// 66 90 decodes as XCHG AX, AX and is NOP too.
  1269		// 48 90 decodes as XCHG RAX, RAX and is NOP too.
  1270		// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
  1271		// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
  1272		// It's all too special to handle in the decoding tables, at least for now.
  1273		if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
  1274			if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
  1275				inst.Op = NOP
  1276				if dataSizeIndex >= 0 {
  1277					inst.Prefix[dataSizeIndex] &^= PrefixImplicit
  1278				}
  1279				inst.Args[0] = nil
  1280				inst.Args[1] = nil
  1281			}
  1282			if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
  1283				inst.Prefix[repIndex] |= PrefixImplicit
  1284				inst.Op = PAUSE
  1285				inst.Args[0] = nil
  1286				inst.Args[1] = nil
  1287			} else if gnuCompat {
  1288				for i := nprefix - 1; i >= 0; i-- {
  1289					if inst.Prefix[i]&0xFF == 0xF3 {
  1290						inst.Prefix[i] |= PrefixImplicit
  1291						inst.Op = PAUSE
  1292						inst.Args[0] = nil
  1293						inst.Args[1] = nil
  1294						break
  1295					}
  1296				}
  1297			}
  1298		}
  1299	
  1300		// defaultSeg returns the default segment for an implicit
  1301		// memory reference: the final override if present, or else DS.
  1302		defaultSeg := func() Reg {
  1303			if segIndex >= 0 {
  1304				inst.Prefix[segIndex] |= PrefixImplicit
  1305				return prefixToSegment(inst.Prefix[segIndex])
  1306			}
  1307			return DS
  1308		}
  1309	
  1310		// Add implicit arguments not present in the tables.
  1311		// Normally we shy away from making implicit arguments explicit,
  1312		// following the Intel manuals, but adding the arguments seems
  1313		// the best way to express the effect of the segment override prefixes.
  1314		// TODO(rsc): Perhaps add these to the tables and
  1315		// create bytecode instructions for them.
  1316		usedAddrSize := false
  1317		switch inst.Op {
  1318		case INSB, INSW, INSD:
  1319			inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1320			inst.Args[1] = DX
  1321			usedAddrSize = true
  1322	
  1323		case OUTSB, OUTSW, OUTSD:
  1324			inst.Args[0] = DX
  1325			inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1326			usedAddrSize = true
  1327	
  1328		case MOVSB, MOVSW, MOVSD, MOVSQ:
  1329			inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1330			inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1331			usedAddrSize = true
  1332	
  1333		case CMPSB, CMPSW, CMPSD, CMPSQ:
  1334			inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1335			inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1336			usedAddrSize = true
  1337	
  1338		case LODSB, LODSW, LODSD, LODSQ:
  1339			switch inst.Op {
  1340			case LODSB:
  1341				inst.Args[0] = AL
  1342			case LODSW:
  1343				inst.Args[0] = AX
  1344			case LODSD:
  1345				inst.Args[0] = EAX
  1346			case LODSQ:
  1347				inst.Args[0] = RAX
  1348			}
  1349			inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1350			usedAddrSize = true
  1351	
  1352		case STOSB, STOSW, STOSD, STOSQ:
  1353			inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1354			switch inst.Op {
  1355			case STOSB:
  1356				inst.Args[1] = AL
  1357			case STOSW:
  1358				inst.Args[1] = AX
  1359			case STOSD:
  1360				inst.Args[1] = EAX
  1361			case STOSQ:
  1362				inst.Args[1] = RAX
  1363			}
  1364			usedAddrSize = true
  1365	
  1366		case SCASB, SCASW, SCASD, SCASQ:
  1367			inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1368			switch inst.Op {
  1369			case SCASB:
  1370				inst.Args[0] = AL
  1371			case SCASW:
  1372				inst.Args[0] = AX
  1373			case SCASD:
  1374				inst.Args[0] = EAX
  1375			case SCASQ:
  1376				inst.Args[0] = RAX
  1377			}
  1378			usedAddrSize = true
  1379	
  1380		case XLATB:
  1381			inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
  1382			usedAddrSize = true
  1383		}
  1384	
  1385		// If we used the address size annotation to construct the
  1386		// argument list, mark that prefix as implicit: it doesn't need
  1387		// to be shown when printing the instruction.
  1388		if haveMem || usedAddrSize {
  1389			if addrSizeIndex >= 0 {
  1390				inst.Prefix[addrSizeIndex] |= PrefixImplicit
  1391			}
  1392		}
  1393	
  1394		// Similarly, if there's some memory operand, the segment
  1395		// will be shown there and doesn't need to be shown as an
  1396		// explicit prefix.
  1397		if haveMem {
  1398			if segIndex >= 0 {
  1399				inst.Prefix[segIndex] |= PrefixImplicit
  1400			}
  1401		}
  1402	
  1403		// Branch predict prefixes are overloaded segment prefixes,
  1404		// since segment prefixes don't make sense on conditional jumps.
  1405		// Rewrite final instance to prediction prefix.
  1406		// The set of instructions to which the prefixes apply (other then the
  1407		// Jcc conditional jumps) is not 100% clear from the manuals, but
  1408		// the disassemblers seem to agree about the LOOP and JCXZ instructions,
  1409		// so we'll follow along.
  1410		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1411		if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
  1412		PredictLoop:
  1413			for i := nprefix - 1; i >= 0; i-- {
  1414				p := inst.Prefix[i]
  1415				switch p & 0xFF {
  1416				case PrefixCS:
  1417					inst.Prefix[i] = PrefixPN
  1418					break PredictLoop
  1419				case PrefixDS:
  1420					inst.Prefix[i] = PrefixPT
  1421					break PredictLoop
  1422				}
  1423			}
  1424		}
  1425	
  1426		// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
  1427		// A REPN applied to certain control transfers is a BND prefix to bound
  1428		// the range of possible destinations. There's surprisingly little documentation
  1429		// about this, so we just do what libopcodes and xed agree on.
  1430		// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
  1431		// does not turn into a BND.
  1432		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1433		if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
  1434			for i := nprefix - 1; i >= 0; i-- {
  1435				p := inst.Prefix[i]
  1436				if p&^PrefixIgnored == PrefixREPN {
  1437					inst.Prefix[i] = PrefixBND
  1438					break
  1439				}
  1440			}
  1441		}
  1442	
  1443		// The LOCK prefix only applies to certain instructions, and then only
  1444		// to instances of the instruction with a memory destination.
  1445		// Other uses of LOCK are invalid and cause a processor exception,
  1446		// in contrast to the "just ignore it" spirit applied to all other prefixes.
  1447		// Mark invalid lock prefixes.
  1448		hasLock := false
  1449		if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
  1450			switch inst.Op {
  1451			// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1452			case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
  1453				if isMem(inst.Args[0]) {
  1454					hasLock = true
  1455					break
  1456				}
  1457				fallthrough
  1458			default:
  1459				inst.Prefix[lockIndex] |= PrefixInvalid
  1460			}
  1461		}
  1462	
  1463		// In certain cases, all of which require a memory destination,
  1464		// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
  1465		// from the Intel Transactional Synchroniation Extensions (TSX).
  1466		//
  1467		// The specific rules are:
  1468		// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
  1469		// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
  1470		// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
  1471		if isMem(inst.Args[0]) {
  1472			if inst.Op == XCHG {
  1473				hasLock = true
  1474			}
  1475	
  1476			for i := len(inst.Prefix) - 1; i >= 0; i-- {
  1477				p := inst.Prefix[i] &^ PrefixIgnored
  1478				switch p {
  1479				case PrefixREPN:
  1480					if hasLock {
  1481						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
  1482					}
  1483	
  1484				case PrefixREP:
  1485					if hasLock {
  1486						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1487					}
  1488	
  1489					if inst.Op == MOV {
  1490						op := (inst.Opcode >> 24) &^ 1
  1491						if op == 0x88 || op == 0xC6 {
  1492							inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1493						}
  1494					}
  1495				}
  1496			}
  1497		}
  1498	
  1499		// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
  1500		if repIndex >= 0 {
  1501			switch inst.Prefix[repIndex] {
  1502			case PrefixREP, PrefixREPN:
  1503				switch inst.Op {
  1504				// According to the manuals, the REP/REPE prefix applies to all of these,
  1505				// while the REPN applies only to some of them. However, both libopcodes
  1506				// and xed show both prefixes explicitly for all instructions, so we do the same.
  1507				// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1508				case INSB, INSW, INSD,
  1509					MOVSB, MOVSW, MOVSD, MOVSQ,
  1510					OUTSB, OUTSW, OUTSD,
  1511					LODSB, LODSW, LODSD, LODSQ,
  1512					CMPSB, CMPSW, CMPSD, CMPSQ,
  1513					SCASB, SCASW, SCASD, SCASQ,
  1514					STOSB, STOSW, STOSD, STOSQ:
  1515					// ok
  1516				default:
  1517					inst.Prefix[repIndex] |= PrefixIgnored
  1518				}
  1519			}
  1520		}
  1521	
  1522		// If REX was present, mark implicit if all the 1 bits were consumed.
  1523		if rexIndex >= 0 {
  1524			if rexUsed != 0 {
  1525				rexUsed |= PrefixREX
  1526			}
  1527			if rex&^rexUsed == 0 {
  1528				inst.Prefix[rexIndex] |= PrefixImplicit
  1529			}
  1530		}
  1531	
  1532		inst.DataSize = dataMode
  1533		inst.AddrSize = addrMode
  1534		inst.Mode = mode
  1535		inst.Len = pos
  1536		return inst, nil
  1537	}
  1538	
  1539	var errInternal = errors.New("internal error")
  1540	
  1541	// addr16 records the eight 16-bit addressing modes.
  1542	var addr16 = [8]Mem{
  1543		{Base: BX, Scale: 1, Index: SI},
  1544		{Base: BX, Scale: 1, Index: DI},
  1545		{Base: BP, Scale: 1, Index: SI},
  1546		{Base: BP, Scale: 1, Index: DI},
  1547		{Base: SI},
  1548		{Base: DI},
  1549		{Base: BP},
  1550		{Base: BX},
  1551	}
  1552	
  1553	// baseReg returns the base register for a given register size in bits.
  1554	func baseRegForBits(bits int) Reg {
  1555		switch bits {
  1556		case 8:
  1557			return AL
  1558		case 16:
  1559			return AX
  1560		case 32:
  1561			return EAX
  1562		case 64:
  1563			return RAX
  1564		}
  1565		return 0
  1566	}
  1567	
  1568	// baseReg records the base register for argument types that specify
  1569	// a range of registers indexed by op, regop, or rm.
  1570	var baseReg = [...]Reg{
  1571		xArgDR0dashDR7: DR0,
  1572		xArgMm1:        M0,
  1573		xArgMm2:        M0,
  1574		xArgMm2M64:     M0,
  1575		xArgMm:         M0,
  1576		xArgMmM32:      M0,
  1577		xArgMmM64:      M0,
  1578		xArgR16:        AX,
  1579		xArgR16op:      AX,
  1580		xArgR32:        EAX,
  1581		xArgR32M16:     EAX,
  1582		xArgR32M8:      EAX,
  1583		xArgR32op:      EAX,
  1584		xArgR64:        RAX,
  1585		xArgR64M16:     RAX,
  1586		xArgR64op:      RAX,
  1587		xArgR8:         AL,
  1588		xArgR8op:       AL,
  1589		xArgRM16:       AX,
  1590		xArgRM32:       EAX,
  1591		xArgRM64:       RAX,
  1592		xArgRM8:        AL,
  1593		xArgRmf16:      AX,
  1594		xArgRmf32:      EAX,
  1595		xArgRmf64:      RAX,
  1596		xArgSTi:        F0,
  1597		xArgTR0dashTR7: TR0,
  1598		xArgXmm1:       X0,
  1599		xArgYmm1:       X0,
  1600		xArgXmm2:       X0,
  1601		xArgXmm2M128:   X0,
  1602		xArgYmm2M256:   X0,
  1603		xArgXmm2M16:    X0,
  1604		xArgXmm2M32:    X0,
  1605		xArgXmm2M64:    X0,
  1606		xArgXmm:        X0,
  1607		xArgXmmM128:    X0,
  1608		xArgXmmM32:     X0,
  1609		xArgXmmM64:     X0,
  1610	}
  1611	
  1612	// prefixToSegment returns the segment register
  1613	// corresponding to a particular segment prefix.
  1614	func prefixToSegment(p Prefix) Reg {
  1615		switch p &^ PrefixImplicit {
  1616		case PrefixCS:
  1617			return CS
  1618		case PrefixDS:
  1619			return DS
  1620		case PrefixES:
  1621			return ES
  1622		case PrefixFS:
  1623			return FS
  1624		case PrefixGS:
  1625			return GS
  1626		case PrefixSS:
  1627			return SS
  1628		}
  1629		return 0
  1630	}
  1631	
  1632	// fixedArg records the fixed arguments corresponding to the given bytecodes.
  1633	var fixedArg = [...]Arg{
  1634		xArg1:    Imm(1),
  1635		xArg3:    Imm(3),
  1636		xArgAL:   AL,
  1637		xArgAX:   AX,
  1638		xArgDX:   DX,
  1639		xArgEAX:  EAX,
  1640		xArgEDX:  EDX,
  1641		xArgRAX:  RAX,
  1642		xArgRDX:  RDX,
  1643		xArgCL:   CL,
  1644		xArgCS:   CS,
  1645		xArgDS:   DS,
  1646		xArgES:   ES,
  1647		xArgFS:   FS,
  1648		xArgGS:   GS,
  1649		xArgSS:   SS,
  1650		xArgST:   F0,
  1651		xArgXMM0: X0,
  1652	}
  1653	
  1654	// memBytes records the size of the memory pointed at
  1655	// by a memory argument of the given form.
  1656	var memBytes = [...]int8{
  1657		xArgM128:       128 / 8,
  1658		xArgM256:       256 / 8,
  1659		xArgM16:        16 / 8,
  1660		xArgM16and16:   (16 + 16) / 8,
  1661		xArgM16colon16: (16 + 16) / 8,
  1662		xArgM16colon32: (16 + 32) / 8,
  1663		xArgM16int:     16 / 8,
  1664		xArgM2byte:     2,
  1665		xArgM32:        32 / 8,
  1666		xArgM32and32:   (32 + 32) / 8,
  1667		xArgM32fp:      32 / 8,
  1668		xArgM32int:     32 / 8,
  1669		xArgM64:        64 / 8,
  1670		xArgM64fp:      64 / 8,
  1671		xArgM64int:     64 / 8,
  1672		xArgMm2M64:     64 / 8,
  1673		xArgMmM32:      32 / 8,
  1674		xArgMmM64:      64 / 8,
  1675		xArgMoffs16:    16 / 8,
  1676		xArgMoffs32:    32 / 8,
  1677		xArgMoffs64:    64 / 8,
  1678		xArgMoffs8:     8 / 8,
  1679		xArgR32M16:     16 / 8,
  1680		xArgR32M8:      8 / 8,
  1681		xArgR64M16:     16 / 8,
  1682		xArgRM16:       16 / 8,
  1683		xArgRM32:       32 / 8,
  1684		xArgRM64:       64 / 8,
  1685		xArgRM8:        8 / 8,
  1686		xArgXmm2M128:   128 / 8,
  1687		xArgYmm2M256:   256 / 8,
  1688		xArgXmm2M16:    16 / 8,
  1689		xArgXmm2M32:    32 / 8,
  1690		xArgXmm2M64:    64 / 8,
  1691		xArgXmm:        128 / 8,
  1692		xArgXmmM128:    128 / 8,
  1693		xArgXmmM32:     32 / 8,
  1694		xArgXmmM64:     64 / 8,
  1695	}
  1696	
  1697	// isCondJmp records the conditional jumps.
  1698	var isCondJmp = [maxOp + 1]bool{
  1699		JA:  true,
  1700		JAE: true,
  1701		JB:  true,
  1702		JBE: true,
  1703		JE:  true,
  1704		JG:  true,
  1705		JGE: true,
  1706		JL:  true,
  1707		JLE: true,
  1708		JNE: true,
  1709		JNO: true,
  1710		JNP: true,
  1711		JNS: true,
  1712		JO:  true,
  1713		JP:  true,
  1714		JS:  true,
  1715	}
  1716	
  1717	// isLoop records the loop operators.
  1718	var isLoop = [maxOp + 1]bool{
  1719		LOOP:   true,
  1720		LOOPE:  true,
  1721		LOOPNE: true,
  1722		JECXZ:  true,
  1723		JRCXZ:  true,
  1724	}
  1725	

View as plain text