...

Source file src/pkg/cmd/internal/obj/x86/asm6.go

     1	// Inferno utils/6l/span.c
     2	// https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3	//
     4	//	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5	//	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6	//	Portions Copyright © 1997-1999 Vita Nuova Limited
     7	//	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8	//	Portions Copyright © 2004,2006 Bruce Ellis
     9	//	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10	//	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11	//	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12	//
    13	// Permission is hereby granted, free of charge, to any person obtaining a copy
    14	// of this software and associated documentation files (the "Software"), to deal
    15	// in the Software without restriction, including without limitation the rights
    16	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17	// copies of the Software, and to permit persons to whom the Software is
    18	// furnished to do so, subject to the following conditions:
    19	//
    20	// The above copyright notice and this permission notice shall be included in
    21	// all copies or substantial portions of the Software.
    22	//
    23	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29	// THE SOFTWARE.
    30	
    31	package x86
    32	
    33	import (
    34		"cmd/internal/obj"
    35		"cmd/internal/objabi"
    36		"cmd/internal/sys"
    37		"encoding/binary"
    38		"fmt"
    39		"log"
    40		"strings"
    41	)
    42	
    43	var (
    44		plan9privates *obj.LSym
    45		deferreturn   *obj.LSym
    46	)
    47	
    48	// Instruction layout.
    49	
    50	// Loop alignment constants:
    51	// want to align loop entry to loopAlign-byte boundary,
    52	// and willing to insert at most maxLoopPad bytes of NOP to do so.
    53	// We define a loop entry as the target of a backward jump.
    54	//
    55	// gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56	// and it aligns all jump targets, not just backward jump targets.
    57	//
    58	// As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59	// is very slight but negative, so the alignment is disabled by
    60	// setting MaxLoopPad = 0. The code is here for reference and
    61	// for future experiments.
    62	//
    63	const (
    64		loopAlign  = 16
    65		maxLoopPad = 0
    66	)
    67	
    68	// Bit flags that are used to express jump target properties.
    69	const (
    70		// branchBackwards marks targets that are located behind.
    71		// Used to express jumps to loop headers.
    72		branchBackwards = (1 << iota)
    73		// branchShort marks branches those target is close,
    74		// with offset is in -128..127 range.
    75		branchShort
    76		// branchLoopHead marks loop entry.
    77		// Used to insert padding for misaligned loops.
    78		branchLoopHead
    79	)
    80	
    81	// opBytes holds optab encoding bytes.
    82	// Each ytab reserves fixed amount of bytes in this array.
    83	//
    84	// The size should be the minimal number of bytes that
    85	// are enough to hold biggest optab op lines.
    86	type opBytes [31]uint8
    87	
    88	type Optab struct {
    89		as     obj.As
    90		ytab   []ytab
    91		prefix uint8
    92		op     opBytes
    93	}
    94	
    95	type movtab struct {
    96		as   obj.As
    97		ft   uint8
    98		f3t  uint8
    99		tt   uint8
   100		code uint8
   101		op   [4]uint8
   102	}
   103	
   104	const (
   105		Yxxx = iota
   106		Ynone
   107		Yi0 // $0
   108		Yi1 // $1
   109		Yu2 // $x, x fits in uint2
   110		Yi8 // $x, x fits in int8
   111		Yu8 // $x, x fits in uint8
   112		Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   113		Ys32
   114		Yi32
   115		Yi64
   116		Yiauto
   117		Yal
   118		Ycl
   119		Yax
   120		Ycx
   121		Yrb
   122		Yrl
   123		Yrl32 // Yrl on 32-bit system
   124		Yrf
   125		Yf0
   126		Yrx
   127		Ymb
   128		Yml
   129		Ym
   130		Ybr
   131		Ycs
   132		Yss
   133		Yds
   134		Yes
   135		Yfs
   136		Ygs
   137		Ygdtr
   138		Yidtr
   139		Yldtr
   140		Ymsw
   141		Ytask
   142		Ycr0
   143		Ycr1
   144		Ycr2
   145		Ycr3
   146		Ycr4
   147		Ycr5
   148		Ycr6
   149		Ycr7
   150		Ycr8
   151		Ydr0
   152		Ydr1
   153		Ydr2
   154		Ydr3
   155		Ydr4
   156		Ydr5
   157		Ydr6
   158		Ydr7
   159		Ytr0
   160		Ytr1
   161		Ytr2
   162		Ytr3
   163		Ytr4
   164		Ytr5
   165		Ytr6
   166		Ytr7
   167		Ymr
   168		Ymm
   169		Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   170		YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   171		Yxr           // X0..X15
   172		YxrEvex       // X0..X31
   173		Yxm
   174		YxmEvex       // YxrEvex+Ym
   175		Yxvm          // VSIB vector array; vm32x/vm64x
   176		YxvmEvex      // Yxvm which permits High-16 X register as index.
   177		YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   178		Yyr           // Y0..Y15
   179		YyrEvex       // Y0..Y31
   180		Yym
   181		YymEvex   // YyrEvex+Ym
   182		Yyvm      // VSIB vector array; vm32y/vm64y
   183		YyvmEvex  // Yyvm which permits High-16 Y register as index.
   184		YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   185		Yzr       // Z0..Z31
   186		Yzm       // Yzr+Ym
   187		Yzvm      // VSIB vector array; vm32z/vm64z
   188		Yk0       // K0
   189		Yknot0    // K1..K7; write mask
   190		Yk        // K0..K7; used for KOP
   191		Ykm       // Yk+Ym; used for KOP
   192		Ytls
   193		Ytextsize
   194		Yindir
   195		Ymax
   196	)
   197	
   198	const (
   199		Zxxx = iota
   200		Zlit
   201		Zlitm_r
   202		Zlitr_m
   203		Zlit_m_r
   204		Z_rp
   205		Zbr
   206		Zcall
   207		Zcallcon
   208		Zcallduff
   209		Zcallind
   210		Zcallindreg
   211		Zib_
   212		Zib_rp
   213		Zibo_m
   214		Zibo_m_xm
   215		Zil_
   216		Zil_rp
   217		Ziq_rp
   218		Zilo_m
   219		Zjmp
   220		Zjmpcon
   221		Zloop
   222		Zo_iw
   223		Zm_o
   224		Zm_r
   225		Z_m_r
   226		Zm2_r
   227		Zm_r_xm
   228		Zm_r_i_xm
   229		Zm_r_xm_nr
   230		Zr_m_xm_nr
   231		Zibm_r // mmx1,mmx2/mem64,imm8
   232		Zibr_m
   233		Zmb_r
   234		Zaut_r
   235		Zo_m
   236		Zo_m64
   237		Zpseudo
   238		Zr_m
   239		Zr_m_xm
   240		Zrp_
   241		Z_ib
   242		Z_il
   243		Zm_ibo
   244		Zm_ilo
   245		Zib_rr
   246		Zil_rr
   247		Zbyte
   248	
   249		Zvex_rm_v_r
   250		Zvex_rm_v_ro
   251		Zvex_r_v_rm
   252		Zvex_i_rm_vo
   253		Zvex_v_rm_r
   254		Zvex_i_rm_r
   255		Zvex_i_r_v
   256		Zvex_i_rm_v_r
   257		Zvex
   258		Zvex_rm_r_vo
   259		Zvex_i_r_rm
   260		Zvex_hr_rm_v_r
   261	
   262		Zevex_first
   263		Zevex_i_r_k_rm
   264		Zevex_i_r_rm
   265		Zevex_i_rm_k_r
   266		Zevex_i_rm_k_vo
   267		Zevex_i_rm_r
   268		Zevex_i_rm_v_k_r
   269		Zevex_i_rm_v_r
   270		Zevex_i_rm_vo
   271		Zevex_k_rmo
   272		Zevex_r_k_rm
   273		Zevex_r_v_k_rm
   274		Zevex_r_v_rm
   275		Zevex_rm_k_r
   276		Zevex_rm_v_k_r
   277		Zevex_rm_v_r
   278		Zevex_last
   279	
   280		Zmax
   281	)
   282	
   283	const (
   284		Px   = 0
   285		Px1  = 1    // symbolic; exact value doesn't matter
   286		P32  = 0x32 // 32-bit only
   287		Pe   = 0x66 // operand escape
   288		Pm   = 0x0f // 2byte opcode escape
   289		Pq   = 0xff // both escapes: 66 0f
   290		Pb   = 0xfe // byte operands
   291		Pf2  = 0xf2 // xmm escape 1: f2 0f
   292		Pf3  = 0xf3 // xmm escape 2: f3 0f
   293		Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   294		Pq3  = 0x67 // xmm escape 3: 66 48 0f
   295		Pq4  = 0x68 // xmm escape 4: 66 0F 38
   296		Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   297		Pq5  = 0x6a // xmm escape 5: F3 0F 38
   298		Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   299		Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   300		Pw   = 0x48 // Rex.w
   301		Pw8  = 0x90 // symbolic; exact value doesn't matter
   302		Py   = 0x80 // defaults to 64-bit mode
   303		Py1  = 0x81 // symbolic; exact value doesn't matter
   304		Py3  = 0x83 // symbolic; exact value doesn't matter
   305		Pavx = 0x84 // symbolic: exact value doesn't matter
   306	
   307		RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   308		Rxw     = 1 << 3 // =1, 64-bit operand size
   309		Rxr     = 1 << 2 // extend modrm reg
   310		Rxx     = 1 << 1 // extend sib index
   311		Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   312	)
   313	
   314	const (
   315		// Encoding for VEX prefix in tables.
   316		// The P, L, and W fields are chosen to match
   317		// their eventual locations in the VEX prefix bytes.
   318	
   319		// Encoding for VEX prefix in tables.
   320		// The P, L, and W fields are chosen to match
   321		// their eventual locations in the VEX prefix bytes.
   322	
   323		// Using spare bit to make leading [E]VEX encoding byte different from
   324		// 0x0f even if all other VEX fields are 0.
   325		avxEscape = 1 << 6
   326	
   327		// P field - 2 bits
   328		vex66 = 1 << 0
   329		vexF3 = 2 << 0
   330		vexF2 = 3 << 0
   331		// L field - 1 bit
   332		vexLZ  = 0 << 2
   333		vexLIG = 0 << 2
   334		vex128 = 0 << 2
   335		vex256 = 1 << 2
   336		// W field - 1 bit
   337		vexWIG = 0 << 7
   338		vexW0  = 0 << 7
   339		vexW1  = 1 << 7
   340		// M field - 5 bits, but mostly reserved; we can store up to 3
   341		vex0F   = 1 << 3
   342		vex0F38 = 2 << 3
   343		vex0F3A = 3 << 3
   344	)
   345	
   346	var ycover [Ymax * Ymax]uint8
   347	
   348	var reg [MAXREG]int
   349	
   350	var regrex [MAXREG + 1]int
   351	
   352	var ynone = []ytab{
   353		{Zlit, 1, argList{}},
   354	}
   355	
   356	var ytext = []ytab{
   357		{Zpseudo, 0, argList{Ymb, Ytextsize}},
   358		{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   359	}
   360	
   361	var ynop = []ytab{
   362		{Zpseudo, 0, argList{}},
   363		{Zpseudo, 0, argList{Yiauto}},
   364		{Zpseudo, 0, argList{Yml}},
   365		{Zpseudo, 0, argList{Yrf}},
   366		{Zpseudo, 0, argList{Yxr}},
   367		{Zpseudo, 0, argList{Yiauto}},
   368		{Zpseudo, 0, argList{Yml}},
   369		{Zpseudo, 0, argList{Yrf}},
   370		{Zpseudo, 1, argList{Yxr}},
   371	}
   372	
   373	var yfuncdata = []ytab{
   374		{Zpseudo, 0, argList{Yi32, Ym}},
   375	}
   376	
   377	var ypcdata = []ytab{
   378		{Zpseudo, 0, argList{Yi32, Yi32}},
   379	}
   380	
   381	var yxorb = []ytab{
   382		{Zib_, 1, argList{Yi32, Yal}},
   383		{Zibo_m, 2, argList{Yi32, Ymb}},
   384		{Zr_m, 1, argList{Yrb, Ymb}},
   385		{Zm_r, 1, argList{Ymb, Yrb}},
   386	}
   387	
   388	var yaddl = []ytab{
   389		{Zibo_m, 2, argList{Yi8, Yml}},
   390		{Zil_, 1, argList{Yi32, Yax}},
   391		{Zilo_m, 2, argList{Yi32, Yml}},
   392		{Zr_m, 1, argList{Yrl, Yml}},
   393		{Zm_r, 1, argList{Yml, Yrl}},
   394	}
   395	
   396	var yincl = []ytab{
   397		{Z_rp, 1, argList{Yrl}},
   398		{Zo_m, 2, argList{Yml}},
   399	}
   400	
   401	var yincq = []ytab{
   402		{Zo_m, 2, argList{Yml}},
   403	}
   404	
   405	var ycmpb = []ytab{
   406		{Z_ib, 1, argList{Yal, Yi32}},
   407		{Zm_ibo, 2, argList{Ymb, Yi32}},
   408		{Zm_r, 1, argList{Ymb, Yrb}},
   409		{Zr_m, 1, argList{Yrb, Ymb}},
   410	}
   411	
   412	var ycmpl = []ytab{
   413		{Zm_ibo, 2, argList{Yml, Yi8}},
   414		{Z_il, 1, argList{Yax, Yi32}},
   415		{Zm_ilo, 2, argList{Yml, Yi32}},
   416		{Zm_r, 1, argList{Yml, Yrl}},
   417		{Zr_m, 1, argList{Yrl, Yml}},
   418	}
   419	
   420	var yshb = []ytab{
   421		{Zo_m, 2, argList{Yi1, Ymb}},
   422		{Zibo_m, 2, argList{Yu8, Ymb}},
   423		{Zo_m, 2, argList{Ycx, Ymb}},
   424	}
   425	
   426	var yshl = []ytab{
   427		{Zo_m, 2, argList{Yi1, Yml}},
   428		{Zibo_m, 2, argList{Yu8, Yml}},
   429		{Zo_m, 2, argList{Ycl, Yml}},
   430		{Zo_m, 2, argList{Ycx, Yml}},
   431	}
   432	
   433	var ytestl = []ytab{
   434		{Zil_, 1, argList{Yi32, Yax}},
   435		{Zilo_m, 2, argList{Yi32, Yml}},
   436		{Zr_m, 1, argList{Yrl, Yml}},
   437		{Zm_r, 1, argList{Yml, Yrl}},
   438	}
   439	
   440	var ymovb = []ytab{
   441		{Zr_m, 1, argList{Yrb, Ymb}},
   442		{Zm_r, 1, argList{Ymb, Yrb}},
   443		{Zib_rp, 1, argList{Yi32, Yrb}},
   444		{Zibo_m, 2, argList{Yi32, Ymb}},
   445	}
   446	
   447	var ybtl = []ytab{
   448		{Zibo_m, 2, argList{Yi8, Yml}},
   449		{Zr_m, 1, argList{Yrl, Yml}},
   450	}
   451	
   452	var ymovw = []ytab{
   453		{Zr_m, 1, argList{Yrl, Yml}},
   454		{Zm_r, 1, argList{Yml, Yrl}},
   455		{Zil_rp, 1, argList{Yi32, Yrl}},
   456		{Zilo_m, 2, argList{Yi32, Yml}},
   457		{Zaut_r, 2, argList{Yiauto, Yrl}},
   458	}
   459	
   460	var ymovl = []ytab{
   461		{Zr_m, 1, argList{Yrl, Yml}},
   462		{Zm_r, 1, argList{Yml, Yrl}},
   463		{Zil_rp, 1, argList{Yi32, Yrl}},
   464		{Zilo_m, 2, argList{Yi32, Yml}},
   465		{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   466		{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   467		{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   468		{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   469		{Zaut_r, 2, argList{Yiauto, Yrl}},
   470	}
   471	
   472	var yret = []ytab{
   473		{Zo_iw, 1, argList{}},
   474		{Zo_iw, 1, argList{Yi32}},
   475	}
   476	
   477	var ymovq = []ytab{
   478		// valid in 32-bit mode
   479		{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   480		{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   481		{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   482		{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   483		{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   484	
   485		// valid only in 64-bit mode, usually with 64-bit prefix
   486		{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   487		{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   488		{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   489		{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   490		{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   491		{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   492		{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   493		{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   494		{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   495		{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   496	}
   497	
   498	var ymovbe = []ytab{
   499		{Zlitm_r, 3, argList{Ym, Yrl}},
   500		{Zlitr_m, 3, argList{Yrl, Ym}},
   501	}
   502	
   503	var ym_rl = []ytab{
   504		{Zm_r, 1, argList{Ym, Yrl}},
   505	}
   506	
   507	var yrl_m = []ytab{
   508		{Zr_m, 1, argList{Yrl, Ym}},
   509	}
   510	
   511	var ymb_rl = []ytab{
   512		{Zmb_r, 1, argList{Ymb, Yrl}},
   513	}
   514	
   515	var yml_rl = []ytab{
   516		{Zm_r, 1, argList{Yml, Yrl}},
   517	}
   518	
   519	var yrl_ml = []ytab{
   520		{Zr_m, 1, argList{Yrl, Yml}},
   521	}
   522	
   523	var yml_mb = []ytab{
   524		{Zr_m, 1, argList{Yrb, Ymb}},
   525		{Zm_r, 1, argList{Ymb, Yrb}},
   526	}
   527	
   528	var yrb_mb = []ytab{
   529		{Zr_m, 1, argList{Yrb, Ymb}},
   530	}
   531	
   532	var yxchg = []ytab{
   533		{Z_rp, 1, argList{Yax, Yrl}},
   534		{Zrp_, 1, argList{Yrl, Yax}},
   535		{Zr_m, 1, argList{Yrl, Yml}},
   536		{Zm_r, 1, argList{Yml, Yrl}},
   537	}
   538	
   539	var ydivl = []ytab{
   540		{Zm_o, 2, argList{Yml}},
   541	}
   542	
   543	var ydivb = []ytab{
   544		{Zm_o, 2, argList{Ymb}},
   545	}
   546	
   547	var yimul = []ytab{
   548		{Zm_o, 2, argList{Yml}},
   549		{Zib_rr, 1, argList{Yi8, Yrl}},
   550		{Zil_rr, 1, argList{Yi32, Yrl}},
   551		{Zm_r, 2, argList{Yml, Yrl}},
   552	}
   553	
   554	var yimul3 = []ytab{
   555		{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   556		{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   557	}
   558	
   559	var ybyte = []ytab{
   560		{Zbyte, 1, argList{Yi64}},
   561	}
   562	
   563	var yin = []ytab{
   564		{Zib_, 1, argList{Yi32}},
   565		{Zlit, 1, argList{}},
   566	}
   567	
   568	var yint = []ytab{
   569		{Zib_, 1, argList{Yi32}},
   570	}
   571	
   572	var ypushl = []ytab{
   573		{Zrp_, 1, argList{Yrl}},
   574		{Zm_o, 2, argList{Ym}},
   575		{Zib_, 1, argList{Yi8}},
   576		{Zil_, 1, argList{Yi32}},
   577	}
   578	
   579	var ypopl = []ytab{
   580		{Z_rp, 1, argList{Yrl}},
   581		{Zo_m, 2, argList{Ym}},
   582	}
   583	
   584	var ywrfsbase = []ytab{
   585		{Zm_o, 2, argList{Yrl}},
   586	}
   587	
   588	var yrdrand = []ytab{
   589		{Zo_m, 2, argList{Yrl}},
   590	}
   591	
   592	var yclflush = []ytab{
   593		{Zo_m, 2, argList{Ym}},
   594	}
   595	
   596	var ybswap = []ytab{
   597		{Z_rp, 2, argList{Yrl}},
   598	}
   599	
   600	var yscond = []ytab{
   601		{Zo_m, 2, argList{Ymb}},
   602	}
   603	
   604	var yjcond = []ytab{
   605		{Zbr, 0, argList{Ybr}},
   606		{Zbr, 0, argList{Yi0, Ybr}},
   607		{Zbr, 1, argList{Yi1, Ybr}},
   608	}
   609	
   610	var yloop = []ytab{
   611		{Zloop, 1, argList{Ybr}},
   612	}
   613	
   614	var ycall = []ytab{
   615		{Zcallindreg, 0, argList{Yml}},
   616		{Zcallindreg, 2, argList{Yrx, Yrx}},
   617		{Zcallind, 2, argList{Yindir}},
   618		{Zcall, 0, argList{Ybr}},
   619		{Zcallcon, 1, argList{Yi32}},
   620	}
   621	
   622	var yduff = []ytab{
   623		{Zcallduff, 1, argList{Yi32}},
   624	}
   625	
   626	var yjmp = []ytab{
   627		{Zo_m64, 2, argList{Yml}},
   628		{Zjmp, 0, argList{Ybr}},
   629		{Zjmpcon, 1, argList{Yi32}},
   630	}
   631	
   632	var yfmvd = []ytab{
   633		{Zm_o, 2, argList{Ym, Yf0}},
   634		{Zo_m, 2, argList{Yf0, Ym}},
   635		{Zm_o, 2, argList{Yrf, Yf0}},
   636		{Zo_m, 2, argList{Yf0, Yrf}},
   637	}
   638	
   639	var yfmvdp = []ytab{
   640		{Zo_m, 2, argList{Yf0, Ym}},
   641		{Zo_m, 2, argList{Yf0, Yrf}},
   642	}
   643	
   644	var yfmvf = []ytab{
   645		{Zm_o, 2, argList{Ym, Yf0}},
   646		{Zo_m, 2, argList{Yf0, Ym}},
   647	}
   648	
   649	var yfmvx = []ytab{
   650		{Zm_o, 2, argList{Ym, Yf0}},
   651	}
   652	
   653	var yfmvp = []ytab{
   654		{Zo_m, 2, argList{Yf0, Ym}},
   655	}
   656	
   657	var yfcmv = []ytab{
   658		{Zm_o, 2, argList{Yrf, Yf0}},
   659	}
   660	
   661	var yfadd = []ytab{
   662		{Zm_o, 2, argList{Ym, Yf0}},
   663		{Zm_o, 2, argList{Yrf, Yf0}},
   664		{Zo_m, 2, argList{Yf0, Yrf}},
   665	}
   666	
   667	var yfxch = []ytab{
   668		{Zo_m, 2, argList{Yf0, Yrf}},
   669		{Zm_o, 2, argList{Yrf, Yf0}},
   670	}
   671	
   672	var ycompp = []ytab{
   673		{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   674	}
   675	
   676	var ystsw = []ytab{
   677		{Zo_m, 2, argList{Ym}},
   678		{Zlit, 1, argList{Yax}},
   679	}
   680	
   681	var ysvrs_mo = []ytab{
   682		{Zm_o, 2, argList{Ym}},
   683	}
   684	
   685	// unaryDst version of "ysvrs_mo".
   686	var ysvrs_om = []ytab{
   687		{Zo_m, 2, argList{Ym}},
   688	}
   689	
   690	var ymm = []ytab{
   691		{Zm_r_xm, 1, argList{Ymm, Ymr}},
   692		{Zm_r_xm, 2, argList{Yxm, Yxr}},
   693	}
   694	
   695	var yxm = []ytab{
   696		{Zm_r_xm, 1, argList{Yxm, Yxr}},
   697	}
   698	
   699	var yxm_q4 = []ytab{
   700		{Zm_r, 1, argList{Yxm, Yxr}},
   701	}
   702	
   703	var yxcvm1 = []ytab{
   704		{Zm_r_xm, 2, argList{Yxm, Yxr}},
   705		{Zm_r_xm, 2, argList{Yxm, Ymr}},
   706	}
   707	
   708	var yxcvm2 = []ytab{
   709		{Zm_r_xm, 2, argList{Yxm, Yxr}},
   710		{Zm_r_xm, 2, argList{Ymm, Yxr}},
   711	}
   712	
   713	var yxr = []ytab{
   714		{Zm_r_xm, 1, argList{Yxr, Yxr}},
   715	}
   716	
   717	var yxr_ml = []ytab{
   718		{Zr_m_xm, 1, argList{Yxr, Yml}},
   719	}
   720	
   721	var ymr = []ytab{
   722		{Zm_r, 1, argList{Ymr, Ymr}},
   723	}
   724	
   725	var ymr_ml = []ytab{
   726		{Zr_m_xm, 1, argList{Ymr, Yml}},
   727	}
   728	
   729	var yxcmpi = []ytab{
   730		{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   731	}
   732	
   733	var yxmov = []ytab{
   734		{Zm_r_xm, 1, argList{Yxm, Yxr}},
   735		{Zr_m_xm, 1, argList{Yxr, Yxm}},
   736	}
   737	
   738	var yxcvfl = []ytab{
   739		{Zm_r_xm, 1, argList{Yxm, Yrl}},
   740	}
   741	
   742	var yxcvlf = []ytab{
   743		{Zm_r_xm, 1, argList{Yml, Yxr}},
   744	}
   745	
   746	var yxcvfq = []ytab{
   747		{Zm_r_xm, 2, argList{Yxm, Yrl}},
   748	}
   749	
   750	var yxcvqf = []ytab{
   751		{Zm_r_xm, 2, argList{Yml, Yxr}},
   752	}
   753	
   754	var yps = []ytab{
   755		{Zm_r_xm, 1, argList{Ymm, Ymr}},
   756		{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   757		{Zm_r_xm, 2, argList{Yxm, Yxr}},
   758		{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   759	}
   760	
   761	var yxrrl = []ytab{
   762		{Zm_r, 1, argList{Yxr, Yrl}},
   763	}
   764	
   765	var ymrxr = []ytab{
   766		{Zm_r, 1, argList{Ymr, Yxr}},
   767		{Zm_r_xm, 1, argList{Yxm, Yxr}},
   768	}
   769	
   770	var ymshuf = []ytab{
   771		{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   772	}
   773	
   774	var ymshufb = []ytab{
   775		{Zm2_r, 2, argList{Yxm, Yxr}},
   776	}
   777	
   778	// It should never have more than 1 entry,
   779	// because some optab entries you opcode secuences that
   780	// are longer than 2 bytes (zoffset=2 here),
   781	// ROUNDPD and ROUNDPS and recently added BLENDPD,
   782	// to name a few.
   783	var yxshuf = []ytab{
   784		{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   785	}
   786	
   787	var yextrw = []ytab{
   788		{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   789		{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   790	}
   791	
   792	var yextr = []ytab{
   793		{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   794	}
   795	
   796	var yinsrw = []ytab{
   797		{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   798	}
   799	
   800	var yinsr = []ytab{
   801		{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   802	}
   803	
   804	var ypsdq = []ytab{
   805		{Zibo_m, 2, argList{Yi8, Yxr}},
   806	}
   807	
   808	var ymskb = []ytab{
   809		{Zm_r_xm, 2, argList{Yxr, Yrl}},
   810		{Zm_r_xm, 1, argList{Ymr, Yrl}},
   811	}
   812	
   813	var ycrc32l = []ytab{
   814		{Zlitm_r, 0, argList{Yml, Yrl}},
   815	}
   816	
   817	var ycrc32b = []ytab{
   818		{Zlitm_r, 0, argList{Ymb, Yrl}},
   819	}
   820	
   821	var yprefetch = []ytab{
   822		{Zm_o, 2, argList{Ym}},
   823	}
   824	
   825	var yaes = []ytab{
   826		{Zlitm_r, 2, argList{Yxm, Yxr}},
   827	}
   828	
   829	var yxbegin = []ytab{
   830		{Zjmp, 1, argList{Ybr}},
   831	}
   832	
   833	var yxabort = []ytab{
   834		{Zib_, 1, argList{Yu8}},
   835	}
   836	
   837	var ylddqu = []ytab{
   838		{Zm_r, 1, argList{Ym, Yxr}},
   839	}
   840	
   841	var ypalignr = []ytab{
   842		{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   843	}
   844	
   845	var ysha256rnds2 = []ytab{
   846		{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   847	}
   848	
   849	var yblendvpd = []ytab{
   850		{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   851	}
   852	
   853	var ymmxmm0f38 = []ytab{
   854		{Zlitm_r, 3, argList{Ymm, Ymr}},
   855		{Zlitm_r, 5, argList{Yxm, Yxr}},
   856	}
   857	
   858	var yextractps = []ytab{
   859		{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   860	}
   861	
   862	var ysha1rnds4 = []ytab{
   863		{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   864	}
   865	
   866	// You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   867	// ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   868	// to find the entry with the given p.As and then looks through the ytable for
   869	// that instruction (the second field in the optab struct) for a line whose
   870	// first two values match the Ytypes of the p.From and p.To operands.  The
   871	// function oclass computes the specific Ytype of an operand and then the set
   872	// of more general Ytypes that it satisfies is implied by the ycover table, set
   873	// up in instinit.  For example, oclass distinguishes the constants 0 and 1
   874	// from the more general 8-bit constants, but instinit says
   875	//
   876	//        ycover[Yi0*Ymax+Ys32] = 1
   877	//        ycover[Yi1*Ymax+Ys32] = 1
   878	//        ycover[Yi8*Ymax+Ys32] = 1
   879	//
   880	// which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   881	// if that's what an instruction can handle.
   882	//
   883	// In parallel with the scan through the ytable for the appropriate line, there
   884	// is a z pointer that starts out pointing at the strange magic byte list in
   885	// the Optab struct.  With each step past a non-matching ytable line, z
   886	// advances by the 4th entry in the line.  When a matching line is found, that
   887	// z pointer has the extra data to use in laying down the instruction bytes.
   888	// The actual bytes laid down are a function of the 3rd entry in the line (that
   889	// is, the Ztype) and the z bytes.
   890	//
   891	// For example, let's look at AADDL.  The optab line says:
   892	//        {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893	//
   894	// and yaddl says
   895	//        var yaddl = []ytab{
   896	//                {Yi8, Ynone, Yml, Zibo_m, 2},
   897	//                {Yi32, Ynone, Yax, Zil_, 1},
   898	//                {Yi32, Ynone, Yml, Zilo_m, 2},
   899	//                {Yrl, Ynone, Yml, Zr_m, 1},
   900	//                {Yml, Ynone, Yrl, Zm_r, 1},
   901	//        }
   902	//
   903	// so there are 5 possible types of ADDL instruction that can be laid down, and
   904	// possible states used to lay them down (Ztype and z pointer, assuming z
   905	// points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   906	//
   907	//        Yi8, Yml -> Zibo_m, z (0x83, 00)
   908	//        Yi32, Yax -> Zil_, z+2 (0x05)
   909	//        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   910	//        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   911	//        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   912	//
   913	// The Pconstant in the optab line controls the prefix bytes to emit.  That's
   914	// relatively straightforward as this program goes.
   915	//
   916	// The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   917	// example, is an opcode byte (z[0]) then an asmando (which is some kind of
   918	// encoded addressing mode for the Yml arg), and then a single immediate byte.
   919	// Zilo_m is the same but a long (32-bit) immediate.
   920	var optab =
   921	//	as, ytab, andproto, opcode
   922	[...]Optab{
   923		{obj.AXXX, nil, 0, opBytes{}},
   924		{AAAA, ynone, P32, opBytes{0x37}},
   925		{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   926		{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   927		{AAAS, ynone, P32, opBytes{0x3f}},
   928		{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   929		{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   930		{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931		{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932		{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   933		{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   934		{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   935		{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   936		{AADDPD, yxm, Pq, opBytes{0x58}},
   937		{AADDPS, yxm, Pm, opBytes{0x58}},
   938		{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   939		{AADDSD, yxm, Pf2, opBytes{0x58}},
   940		{AADDSS, yxm, Pf3, opBytes{0x58}},
   941		{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   942		{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   943		{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   944		{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   945		{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   946		{AADJSP, nil, 0, opBytes{}},
   947		{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   948		{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   949		{AANDNPD, yxm, Pq, opBytes{0x55}},
   950		{AANDNPS, yxm, Pm, opBytes{0x55}},
   951		{AANDPD, yxm, Pq, opBytes{0x54}},
   952		{AANDPS, yxm, Pm, opBytes{0x54}},
   953		{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   954		{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955		{AARPL, yrl_ml, P32, opBytes{0x63}},
   956		{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   957		{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   958		{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   959		{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   960		{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   961		{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   962		{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   963		{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   964		{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   965		{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   966		{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   967		{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   968		{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   969		{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   970		{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   971		{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   972		{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   973		{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   974		{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   975		{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   976		{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   977		{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   978		{ABYTE, ybyte, Px, opBytes{1}},
   979		{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   980		{ACBW, ynone, Pe, opBytes{0x98}},
   981		{ACDQ, ynone, Px, opBytes{0x99}},
   982		{ACDQE, ynone, Pw, opBytes{0x98}},
   983		{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   984		{ACLC, ynone, Px, opBytes{0xf8}},
   985		{ACLD, ynone, Px, opBytes{0xfc}},
   986		{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   987		{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   988		{ACLI, ynone, Px, opBytes{0xfa}},
   989		{ACLTS, ynone, Pm, opBytes{0x06}},
   990		{ACMC, ynone, Px, opBytes{0xf5}},
   991		{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   992		{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   993		{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   994		{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   995		{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   996		{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
   997		{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
   998		{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
   999		{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1000		{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1001		{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1002		{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1003		{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1004		{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1005		{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1006		{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1007		{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1008		{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1009		{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1010		{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1011		{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1012		{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1013		{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1014		{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1015		{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1016		{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1017		{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1018		{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1019		{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1020		{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1021		{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1022		{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1023		{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1024		{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1025		{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1026		{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1027		{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1028		{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1029		{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1030		{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1031		{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1032		{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1033		{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1034		{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1035		{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1036		{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1037		{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1038		{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1039		{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1040		{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1041		{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1042		{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1043		{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1044		{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1045		{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1046		{ACMPSL, ynone, Px, opBytes{0xa7}},
  1047		{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1048		{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1049		{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1050		{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1051		{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1052		{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1053		{ACPUID, ynone, Pm, opBytes{0xa2}},
  1054		{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1055		{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1056		{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1057		{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1058		{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1059		{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1060		{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1061		{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1062		{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1063		{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1064		{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1065		{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1066		{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1067		{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1068		{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1069		{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1070		{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1071		{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1072		{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1073		{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1074		{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1075		{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1076		{ACWD, ynone, Pe, opBytes{0x99}},
  1077		{ACWDE, ynone, Px, opBytes{0x98}},
  1078		{ACQO, ynone, Pw, opBytes{0x99}},
  1079		{ADAA, ynone, P32, opBytes{0x27}},
  1080		{ADAS, ynone, P32, opBytes{0x2f}},
  1081		{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1082		{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1083		{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1084		{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1085		{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1086		{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1087		{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1088		{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1089		{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1090		{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1091		{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1092		{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1093		{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1094		{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1095		{AEMMS, ynone, Pm, opBytes{0x77}},
  1096		{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1097		{AENTER, nil, 0, opBytes{}}, // botch
  1098		{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1099		{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1100		{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1101		{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1102		{AHLT, ynone, Px, opBytes{0xf4}},
  1103		{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1104		{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1105		{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1106		{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1107		{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1108		{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1109		{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1110		{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1111		{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1112		{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1113		{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1114		{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1115		{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1116		{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1117		{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1118		{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1119		{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1120		{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1121		{AINSB, ynone, Pb, opBytes{0x6c}},
  1122		{AINSL, ynone, Px, opBytes{0x6d}},
  1123		{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1124		{AINSW, ynone, Pe, opBytes{0x6d}},
  1125		{AICEBP, ynone, Px, opBytes{0xf1}},
  1126		{AINT, yint, Px, opBytes{0xcd}},
  1127		{AINTO, ynone, P32, opBytes{0xce}},
  1128		{AIRETL, ynone, Px, opBytes{0xcf}},
  1129		{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1130		{AIRETW, ynone, Pe, opBytes{0xcf}},
  1131		{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1132		{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1133		{AJCXZL, yloop, Px, opBytes{0xe3}},
  1134		{AJCXZW, yloop, Px, opBytes{0xe3}},
  1135		{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1136		{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1137		{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1138		{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1139		{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1140		{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1141		{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1142		{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1143		{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1144		{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1145		{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1146		{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1147		{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1148		{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1149		{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1150		{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1151		{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1152		{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1153		{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1154		{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1155		{ALAHF, ynone, Px, opBytes{0x9f}},
  1156		{ALARL, yml_rl, Pm, opBytes{0x02}},
  1157		{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1158		{ALARW, yml_rl, Pq, opBytes{0x02}},
  1159		{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1160		{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1161		{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1162		{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1163		{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1164		{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1165		{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1166		{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1167		{ALOCK, ynone, Px, opBytes{0xf0}},
  1168		{ALODSB, ynone, Pb, opBytes{0xac}},
  1169		{ALODSL, ynone, Px, opBytes{0xad}},
  1170		{ALODSQ, ynone, Pw, opBytes{0xad}},
  1171		{ALODSW, ynone, Pe, opBytes{0xad}},
  1172		{ALONG, ybyte, Px, opBytes{4}},
  1173		{ALOOP, yloop, Px, opBytes{0xe2}},
  1174		{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1175		{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1176		{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1177		{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1178		{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1179		{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1180		{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1181		{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1182		{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1183		{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1184		{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1185		{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1186		{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1187		{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1188		{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1189		{AMINPD, yxm, Pe, opBytes{0x5d}},
  1190		{AMINPS, yxm, Pm, opBytes{0x5d}},
  1191		{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1192		{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1193		{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1194		{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1195		{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1196		{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1197		{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1198		{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1199		{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1200		{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1201		{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1202		{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1203		{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1204		{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1205		{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1206		{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1207		{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1208		{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1209		{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1210		{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1211		{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1212		{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1213		{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1214		{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1215		{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1216		{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1217		{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1218		{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1219		{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1220		{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1221		{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1222		{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1223		{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1224		{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1225		{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1226		{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1227		{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1228		{AMOVSL, ynone, Px, opBytes{0xa5}},
  1229		{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1230		{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1231		{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1232		{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1233		{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1234		{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1235		{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1236		{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1237		{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1238		{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1239		{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1240		{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1241		{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1242		{AMULPD, yxm, Pe, opBytes{0x59}},
  1243		{AMULPS, yxm, Ym, opBytes{0x59}},
  1244		{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1245		{AMULSD, yxm, Pf2, opBytes{0x59}},
  1246		{AMULSS, yxm, Pf3, opBytes{0x59}},
  1247		{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1248		{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1249		{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1250		{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1251		{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1252		{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1253		{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1254		{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1255		{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1256		{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1257		{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1258		{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1259		{AORPD, yxm, Pq, opBytes{0x56}},
  1260		{AORPS, yxm, Pm, opBytes{0x56}},
  1261		{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1262		{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1263		{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1264		{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1265		{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1266		{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1267		{AOUTSL, ynone, Px, opBytes{0x6f}},
  1268		{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1269		{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1270		{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1271		{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1272		{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1273		{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1274		{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1275		{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1276		{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1277		{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1278		{APADDQ, yxm, Pe, opBytes{0xd4}},
  1279		{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1280		{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1281		{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1282		{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1283		{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1284		{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1285		{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1286		{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1287		{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1288		{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1289		{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1290		{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1291		{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1292		{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1293		{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1294		{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1295		{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1296		{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1297		{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1298		{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1299		{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1300		{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1301		{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1302		{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1303		{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1304		{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1305		{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1306		{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1307		{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1308		{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1309		{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1310		{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1311		{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1312		{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1313		{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1314		{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1315		{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1316		{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1317		{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1318		{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1319		{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1320		{APMAXSW, yxm, Pe, opBytes{0xee}},
  1321		{APMAXUB, yxm, Pe, opBytes{0xde}},
  1322		{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1323		{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1324		{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1325		{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1326		{APMINSW, yxm, Pe, opBytes{0xea}},
  1327		{APMINUB, yxm, Pe, opBytes{0xda}},
  1328		{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1329		{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1330		{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1331		{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1332		{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1333		{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1334		{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1335		{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1336		{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1337		{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1338		{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1339		{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1340		{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1341		{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1342		{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1343		{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1344		{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1345		{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1346		{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1347		{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1348		{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1349		{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1350		{APOPAL, ynone, P32, opBytes{0x61}},
  1351		{APOPAW, ynone, Pe, opBytes{0x61}},
  1352		{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1353		{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1354		{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1355		{APOPFL, ynone, P32, opBytes{0x9d}},
  1356		{APOPFQ, ynone, Py, opBytes{0x9d}},
  1357		{APOPFW, ynone, Pe, opBytes{0x9d}},
  1358		{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1359		{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1360		{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1361		{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1362		{APSADBW, yxm, Pq, opBytes{0xf6}},
  1363		{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1364		{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1365		{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1366		{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1367		{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1368		{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1369		{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1370		{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1371		{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1372		{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1373		{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1374		{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1375		{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1376		{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1377		{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1378		{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1379		{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1380		{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1381		{APSUBB, yxm, Pe, opBytes{0xf8}},
  1382		{APSUBL, yxm, Pe, opBytes{0xfa}},
  1383		{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1384		{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1385		{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1386		{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1387		{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1388		{APSUBW, yxm, Pe, opBytes{0xf9}},
  1389		{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1390		{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1391		{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1392		{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1393		{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1394		{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1395		{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1396		{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1397		{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1398		{APUSHAL, ynone, P32, opBytes{0x60}},
  1399		{APUSHAW, ynone, Pe, opBytes{0x60}},
  1400		{APUSHFL, ynone, P32, opBytes{0x9c}},
  1401		{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1402		{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1403		{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1404		{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1405		{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1406		{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1407		{AQUAD, ybyte, Px, opBytes{8}},
  1408		{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1409		{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1410		{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1411		{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1412		{ARCPPS, yxm, Pm, opBytes{0x53}},
  1413		{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1414		{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1415		{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1416		{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1417		{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1418		{AREP, ynone, Px, opBytes{0xf3}},
  1419		{AREPN, ynone, Px, opBytes{0xf2}},
  1420		{obj.ARET, ynone, Px, opBytes{0xc3}},
  1421		{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1422		{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1423		{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1424		{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1425		{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1426		{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1427		{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1428		{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1429		{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1430		{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1431		{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1432		{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1433		{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1434		{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1435		{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1436		{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1437		{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1438		{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1439		{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1440		{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1441		{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1442		{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1443		{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1444		{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1445		{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1446		{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1447		{ASCASB, ynone, Pb, opBytes{0xae}},
  1448		{ASCASL, ynone, Px, opBytes{0xaf}},
  1449		{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1450		{ASCASW, ynone, Pe, opBytes{0xaf}},
  1451		{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1452		{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1453		{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1454		{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1455		{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1456		{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1457		{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1458		{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1459		{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1460		{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1461		{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1462		{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1463		{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1464		{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1465		{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1466		{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1467		{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1468		{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1469		{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1470		{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1471		{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1472		{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1473		{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1474		{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1475		{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1476		{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1477		{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1478		{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1479		{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1480		{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1481		{ASTC, ynone, Px, opBytes{0xf9}},
  1482		{ASTD, ynone, Px, opBytes{0xfd}},
  1483		{ASTI, ynone, Px, opBytes{0xfb}},
  1484		{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1485		{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1486		{ASTOSL, ynone, Px, opBytes{0xab}},
  1487		{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1488		{ASTOSW, ynone, Pe, opBytes{0xab}},
  1489		{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1490		{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1491		{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1492		{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1493		{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1494		{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1495		{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1496		{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1497		{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1498		{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1499		{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1500		{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1501		{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1502		{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1503		{obj.ATEXT, ytext, Px, opBytes{}},
  1504		{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1505		{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1506		{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1507		{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1508		{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1509		{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1510		{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1511		{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1512		{AWAIT, ynone, Px, opBytes{0x9b}},
  1513		{AWORD, ybyte, Px, opBytes{2}},
  1514		{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1515		{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1516		{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1517		{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1518		{AXLAT, ynone, Px, opBytes{0xd7}},
  1519		{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1520		{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1521		{AXORPD, yxm, Pe, opBytes{0x57}},
  1522		{AXORPS, yxm, Pm, opBytes{0x57}},
  1523		{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1524		{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1525		{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1526		{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1527		{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1528		{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1529		{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1530		{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1531		{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1532		{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1533		{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1534		{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1535		{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1536		{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1537		{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1538		{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1539		{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1540		{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1541		{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1542		{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1543		{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1544		{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1545		{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1546		{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1547		{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1548		{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1549		{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1550		{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1551		{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1552		{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1553		{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1554		{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1555		{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1556		{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1557		{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1558		{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1559		{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1560		{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1561		{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1562		{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1563		{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1564		{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1565		{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1566		{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1567		{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1568		{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1569		{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1570		{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1571		{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1572		{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1573		{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1574		{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1575		{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1576		{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1577		{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1578		{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1579		{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1580		{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1581		{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1582		{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1583		{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1584		{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1585		{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1586		{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1587		{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1588		{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1589		{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1590		{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1591		{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1592		{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1593		{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1594		{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1595		{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1596		{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1597		{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1598		{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1599		{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1600		{AFFREE, nil, 0, opBytes{}},
  1601		{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1602		{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1603		{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1604		{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1605		{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1606		{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1607		{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1608		{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1609		{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1610		{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1611		{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1612		{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1613		{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1614		{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1615		{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1616		{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1617		{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1618		{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1619		{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1620		{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1621		{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1622		{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1623		{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1624		{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1625		{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1626		{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1627		{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1628		{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1629		{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1630		{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1631		{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1632		{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1633		{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1634		{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1635		{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1636		{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1637		{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1638		{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1639		{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1640		{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1641		{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1642		{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1643		{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1644		{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1645		{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1646		{AINVD, ynone, Pm, opBytes{0x08}},
  1647		{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1648		{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1649		{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1650		{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1651		{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1652		{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1653		{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1654		{ARDMSR, ynone, Pm, opBytes{0x32}},
  1655		{ARDPMC, ynone, Pm, opBytes{0x33}},
  1656		{ARDTSC, ynone, Pm, opBytes{0x31}},
  1657		{ARSM, ynone, Pm, opBytes{0xaa}},
  1658		{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1659		{ASYSRET, ynone, Pm, opBytes{0x07}},
  1660		{AWBINVD, ynone, Pm, opBytes{0x09}},
  1661		{AWRMSR, ynone, Pm, opBytes{0x30}},
  1662		{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1663		{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1664		{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1665		{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1666		{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1667		{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1668		{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1669		{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1670		{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1671		{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1672		{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1673		{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1674		{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1675		{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1676		{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1677		{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1678		{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1679		{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1680		{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1681		{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1682		{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1683		{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1684		{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1685		{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1686		{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1687		{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1688		{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1689		{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1690		{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1691		{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1692		{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1693		{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1694	
  1695		{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1696		{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1697		{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1698		{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1699		{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1700		{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1701		{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1702		{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1703		{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1704		{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1705		{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1706		{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1707		{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1708		{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1709		{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1710		{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1711		{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1712		{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1713		{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1714		{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1715		{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1716		{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1717		{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1718		{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1719		{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1720		{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1721		{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1722		{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1723		{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1724		{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1725		{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1726		{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1727		{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1728		{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1729		{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1730		{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1731		{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1732		{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1733		{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1734		{AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1735		{AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1736		{AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1737		{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1738		{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1739		{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1740		{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1741		{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1742		{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1743		{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1744		{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1745		{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1746		{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1747		{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1748		{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1749		{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1750		{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1751		{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1752		{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1753		{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1754		{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1755		{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1756		{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1757		{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1758		{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1759		{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1760		{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1761		{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1762		{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1763		{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1764		{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1765		{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1766		{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1767		{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1768		{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1769		{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1770		{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1771		{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1772	
  1773		{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1774		{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1775		{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1776		{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1777		{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1778		{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1779		{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1780		{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1781		{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1782		{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1783		{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1784		{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1785		{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1786	
  1787		{obj.AEND, nil, 0, opBytes{}},
  1788		{0, nil, 0, opBytes{}},
  1789	}
  1790	
  1791	var opindex [(ALAST + 1) & obj.AMask]*Optab
  1792	
  1793	// useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1794	// This happens on systems like Solaris that call .so functions instead of system calls.
  1795	// It does not seem to be necessary for any other systems. This is probably working
  1796	// around a Solaris-specific bug that should be fixed differently, but we don't know
  1797	// what that bug is. And this does fix it.
  1798	func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1799		if ctxt.Headtype == objabi.Hsolaris {
  1800			// All the Solaris dynamic imports from libc.so begin with "libc_".
  1801			return strings.HasPrefix(s.Name, "libc_")
  1802		}
  1803		return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1804	}
  1805	
  1806	// single-instruction no-ops of various lengths.
  1807	// constructed by hand and disassembled with gdb to verify.
  1808	// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1809	var nop = [][16]uint8{
  1810		{0x90},
  1811		{0x66, 0x90},
  1812		{0x0F, 0x1F, 0x00},
  1813		{0x0F, 0x1F, 0x40, 0x00},
  1814		{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1815		{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1816		{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1817		{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1818		{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1819	}
  1820	
  1821	// Native Client rejects the repeated 0x66 prefix.
  1822	// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1823	func fillnop(p []byte, n int) {
  1824		var m int
  1825	
  1826		for n > 0 {
  1827			m = n
  1828			if m > len(nop) {
  1829				m = len(nop)
  1830			}
  1831			copy(p[:m], nop[m-1][:m])
  1832			p = p[m:]
  1833			n -= m
  1834		}
  1835	}
  1836	
  1837	func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1838		s.Grow(int64(c) + int64(pad))
  1839		fillnop(s.P[c:], int(pad))
  1840		return c + pad
  1841	}
  1842	
  1843	func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1844		if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1845			return l
  1846		}
  1847		return q
  1848	}
  1849	
  1850	func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1851		if s.P != nil {
  1852			return
  1853		}
  1854	
  1855		if ycover[0] == 0 {
  1856			ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1857		}
  1858	
  1859		for p := s.Func.Text; p != nil; p = p.Link {
  1860			if p.To.Type == obj.TYPE_BRANCH && p.Pcond == nil {
  1861				p.Pcond = p
  1862			}
  1863			if p.As == AADJSP {
  1864				p.To.Type = obj.TYPE_REG
  1865				p.To.Reg = REG_SP
  1866				// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  1867				// One exception: It is smaller to encode $-0x80 than $0x80.
  1868				// For that case, flip the sign and the op:
  1869				// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  1870				switch v := p.From.Offset; {
  1871				case v == 0:
  1872					p.As = obj.ANOP
  1873				case v == 0x80 || (v < 0 && v != -0x80):
  1874					p.As = spadjop(ctxt, AADDL, AADDQ)
  1875					p.From.Offset *= -1
  1876				default:
  1877					p.As = spadjop(ctxt, ASUBL, ASUBQ)
  1878				}
  1879			}
  1880		}
  1881	
  1882		var count int64 // rough count of number of instructions
  1883		for p := s.Func.Text; p != nil; p = p.Link {
  1884			count++
  1885			p.Back = branchShort // use short branches first time through
  1886			if q := p.Pcond; q != nil && (q.Back&branchShort != 0) {
  1887				p.Back |= branchBackwards
  1888				q.Back |= branchLoopHead
  1889			}
  1890		}
  1891		s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1892	
  1893		var ab AsmBuf
  1894		var n int
  1895		var c int32
  1896		errors := ctxt.Errors
  1897		for {
  1898			// This loop continues while there are reasons to re-assemble
  1899			// whole block, like the presence of long forward jumps.
  1900			reAssemble := false
  1901			for i := range s.R {
  1902				s.R[i] = obj.Reloc{}
  1903			}
  1904			s.R = s.R[:0]
  1905			s.P = s.P[:0]
  1906			c = 0
  1907			for p := s.Func.Text; p != nil; p = p.Link {
  1908				if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
  1909					// pad everything to avoid crossing 32-byte boundary
  1910					if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1911						c = naclpad(ctxt, s, c, -c&31)
  1912					}
  1913	
  1914					// pad call deferreturn to start at 32-byte boundary
  1915					// so that subtracting 5 in jmpdefer will jump back
  1916					// to that boundary and rerun the call.
  1917					if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1918						c = naclpad(ctxt, s, c, -c&31)
  1919					}
  1920	
  1921					// pad call to end at 32-byte boundary
  1922					if p.As == obj.ACALL {
  1923						c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1924					}
  1925	
  1926					// the linker treats REP and STOSQ as different instructions
  1927					// but in fact the REP is a prefix on the STOSQ.
  1928					// make sure REP has room for 2 more bytes, so that
  1929					// padding will not be inserted before the next instruction.
  1930					if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1931						c = naclpad(ctxt, s, c, -c&31)
  1932					}
  1933	
  1934					// same for LOCK.
  1935					// various instructions follow; the longest is 4 bytes.
  1936					// give ourselves 8 bytes so as to avoid surprises.
  1937					if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1938						c = naclpad(ctxt, s, c, -c&31)
  1939					}
  1940				}
  1941	
  1942				if (p.Back&branchLoopHead != 0) && c&(loopAlign-1) != 0 {
  1943					// pad with NOPs
  1944					v := -c & (loopAlign - 1)
  1945	
  1946					if v <= maxLoopPad {
  1947						s.Grow(int64(c) + int64(v))
  1948						fillnop(s.P[c:], int(v))
  1949						c += v
  1950					}
  1951				}
  1952	
  1953				p.Pc = int64(c)
  1954	
  1955				// process forward jumps to p
  1956				for q := p.Rel; q != nil; q = q.Forwd {
  1957					v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1958					if q.Back&branchShort != 0 {
  1959						if v > 127 {
  1960							reAssemble = true
  1961							q.Back ^= branchShort
  1962						}
  1963	
  1964						if q.As == AJCXZL || q.As == AXBEGIN {
  1965							s.P[q.Pc+2] = byte(v)
  1966						} else {
  1967							s.P[q.Pc+1] = byte(v)
  1968						}
  1969					} else {
  1970						binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1971					}
  1972				}
  1973	
  1974				p.Rel = nil
  1975	
  1976				p.Pc = int64(c)
  1977				ab.asmins(ctxt, s, p)
  1978				m := ab.Len()
  1979				if int(p.Isize) != m {
  1980					p.Isize = uint8(m)
  1981					// When building for NaCl, we currently need
  1982					// at least 2 rounds to ensure proper 32-byte alignment.
  1983					if ctxt.Headtype == objabi.Hnacl {
  1984						reAssemble = true
  1985					}
  1986				}
  1987	
  1988				s.Grow(p.Pc + int64(m))
  1989				copy(s.P[p.Pc:], ab.Bytes())
  1990				c += int32(m)
  1991			}
  1992	
  1993			n++
  1994			if n > 20 {
  1995				ctxt.Diag("span must be looping")
  1996				log.Fatalf("loop")
  1997			}
  1998			if !reAssemble {
  1999				break
  2000			}
  2001			if ctxt.Errors > errors {
  2002				return
  2003			}
  2004		}
  2005	
  2006		if ctxt.Headtype == objabi.Hnacl {
  2007			c = naclpad(ctxt, s, c, -c&31)
  2008		}
  2009	
  2010		s.Size = int64(c)
  2011	
  2012		if false { /* debug['a'] > 1 */
  2013			fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2014			var i int
  2015			for i = 0; i < len(s.P); i++ {
  2016				fmt.Printf(" %.2x", s.P[i])
  2017				if i%16 == 15 {
  2018					fmt.Printf("\n  %.6x", uint(i+1))
  2019				}
  2020			}
  2021	
  2022			if i%16 != 0 {
  2023				fmt.Printf("\n")
  2024			}
  2025	
  2026			for i := 0; i < len(s.R); i++ {
  2027				r := &s.R[i]
  2028				fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2029			}
  2030		}
  2031	}
  2032	
  2033	func instinit(ctxt *obj.Link) {
  2034		if ycover[0] != 0 {
  2035			// Already initialized; stop now.
  2036			// This happens in the cmd/asm tests,
  2037			// each of which re-initializes the arch.
  2038			return
  2039		}
  2040	
  2041		switch ctxt.Headtype {
  2042		case objabi.Hplan9:
  2043			plan9privates = ctxt.Lookup("_privates")
  2044		case objabi.Hnacl:
  2045			deferreturn = ctxt.LookupABI("runtime.deferreturn", obj.ABIInternal)
  2046		}
  2047	
  2048		for i := range avxOptab {
  2049			c := avxOptab[i].as
  2050			if opindex[c&obj.AMask] != nil {
  2051				ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2052			}
  2053			opindex[c&obj.AMask] = &avxOptab[i]
  2054		}
  2055		for i := 1; optab[i].as != 0; i++ {
  2056			c := optab[i].as
  2057			if opindex[c&obj.AMask] != nil {
  2058				ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2059			}
  2060			opindex[c&obj.AMask] = &optab[i]
  2061		}
  2062	
  2063		for i := 0; i < Ymax; i++ {
  2064			ycover[i*Ymax+i] = 1
  2065		}
  2066	
  2067		ycover[Yi0*Ymax+Yu2] = 1
  2068		ycover[Yi1*Ymax+Yu2] = 1
  2069	
  2070		ycover[Yi0*Ymax+Yi8] = 1
  2071		ycover[Yi1*Ymax+Yi8] = 1
  2072		ycover[Yu2*Ymax+Yi8] = 1
  2073		ycover[Yu7*Ymax+Yi8] = 1
  2074	
  2075		ycover[Yi0*Ymax+Yu7] = 1
  2076		ycover[Yi1*Ymax+Yu7] = 1
  2077		ycover[Yu2*Ymax+Yu7] = 1
  2078	
  2079		ycover[Yi0*Ymax+Yu8] = 1
  2080		ycover[Yi1*Ymax+Yu8] = 1
  2081		ycover[Yu2*Ymax+Yu8] = 1
  2082		ycover[Yu7*Ymax+Yu8] = 1
  2083	
  2084		ycover[Yi0*Ymax+Ys32] = 1
  2085		ycover[Yi1*Ymax+Ys32] = 1
  2086		ycover[Yu2*Ymax+Ys32] = 1
  2087		ycover[Yu7*Ymax+Ys32] = 1
  2088		ycover[Yu8*Ymax+Ys32] = 1
  2089		ycover[Yi8*Ymax+Ys32] = 1
  2090	
  2091		ycover[Yi0*Ymax+Yi32] = 1
  2092		ycover[Yi1*Ymax+Yi32] = 1
  2093		ycover[Yu2*Ymax+Yi32] = 1
  2094		ycover[Yu7*Ymax+Yi32] = 1
  2095		ycover[Yu8*Ymax+Yi32] = 1
  2096		ycover[Yi8*Ymax+Yi32] = 1
  2097		ycover[Ys32*Ymax+Yi32] = 1
  2098	
  2099		ycover[Yi0*Ymax+Yi64] = 1
  2100		ycover[Yi1*Ymax+Yi64] = 1
  2101		ycover[Yu7*Ymax+Yi64] = 1
  2102		ycover[Yu2*Ymax+Yi64] = 1
  2103		ycover[Yu8*Ymax+Yi64] = 1
  2104		ycover[Yi8*Ymax+Yi64] = 1
  2105		ycover[Ys32*Ymax+Yi64] = 1
  2106		ycover[Yi32*Ymax+Yi64] = 1
  2107	
  2108		ycover[Yal*Ymax+Yrb] = 1
  2109		ycover[Ycl*Ymax+Yrb] = 1
  2110		ycover[Yax*Ymax+Yrb] = 1
  2111		ycover[Ycx*Ymax+Yrb] = 1
  2112		ycover[Yrx*Ymax+Yrb] = 1
  2113		ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2114	
  2115		ycover[Ycl*Ymax+Ycx] = 1
  2116	
  2117		ycover[Yax*Ymax+Yrx] = 1
  2118		ycover[Ycx*Ymax+Yrx] = 1
  2119	
  2120		ycover[Yax*Ymax+Yrl] = 1
  2121		ycover[Ycx*Ymax+Yrl] = 1
  2122		ycover[Yrx*Ymax+Yrl] = 1
  2123		ycover[Yrl32*Ymax+Yrl] = 1
  2124	
  2125		ycover[Yf0*Ymax+Yrf] = 1
  2126	
  2127		ycover[Yal*Ymax+Ymb] = 1
  2128		ycover[Ycl*Ymax+Ymb] = 1
  2129		ycover[Yax*Ymax+Ymb] = 1
  2130		ycover[Ycx*Ymax+Ymb] = 1
  2131		ycover[Yrx*Ymax+Ymb] = 1
  2132		ycover[Yrb*Ymax+Ymb] = 1
  2133		ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2134		ycover[Ym*Ymax+Ymb] = 1
  2135	
  2136		ycover[Yax*Ymax+Yml] = 1
  2137		ycover[Ycx*Ymax+Yml] = 1
  2138		ycover[Yrx*Ymax+Yml] = 1
  2139		ycover[Yrl*Ymax+Yml] = 1
  2140		ycover[Yrl32*Ymax+Yml] = 1
  2141		ycover[Ym*Ymax+Yml] = 1
  2142	
  2143		ycover[Yax*Ymax+Ymm] = 1
  2144		ycover[Ycx*Ymax+Ymm] = 1
  2145		ycover[Yrx*Ymax+Ymm] = 1
  2146		ycover[Yrl*Ymax+Ymm] = 1
  2147		ycover[Yrl32*Ymax+Ymm] = 1
  2148		ycover[Ym*Ymax+Ymm] = 1
  2149		ycover[Ymr*Ymax+Ymm] = 1
  2150	
  2151		ycover[Yxr0*Ymax+Yxr] = 1
  2152	
  2153		ycover[Ym*Ymax+Yxm] = 1
  2154		ycover[Yxr0*Ymax+Yxm] = 1
  2155		ycover[Yxr*Ymax+Yxm] = 1
  2156	
  2157		ycover[Ym*Ymax+Yym] = 1
  2158		ycover[Yyr*Ymax+Yym] = 1
  2159	
  2160		ycover[Yxr0*Ymax+YxrEvex] = 1
  2161		ycover[Yxr*Ymax+YxrEvex] = 1
  2162	
  2163		ycover[Ym*Ymax+YxmEvex] = 1
  2164		ycover[Yxr0*Ymax+YxmEvex] = 1
  2165		ycover[Yxr*Ymax+YxmEvex] = 1
  2166		ycover[YxrEvex*Ymax+YxmEvex] = 1
  2167	
  2168		ycover[Yyr*Ymax+YyrEvex] = 1
  2169	
  2170		ycover[Ym*Ymax+YymEvex] = 1
  2171		ycover[Yyr*Ymax+YymEvex] = 1
  2172		ycover[YyrEvex*Ymax+YymEvex] = 1
  2173	
  2174		ycover[Ym*Ymax+Yzm] = 1
  2175		ycover[Yzr*Ymax+Yzm] = 1
  2176	
  2177		ycover[Yk0*Ymax+Yk] = 1
  2178		ycover[Yknot0*Ymax+Yk] = 1
  2179	
  2180		ycover[Yk0*Ymax+Ykm] = 1
  2181		ycover[Yknot0*Ymax+Ykm] = 1
  2182		ycover[Yk*Ymax+Ykm] = 1
  2183		ycover[Ym*Ymax+Ykm] = 1
  2184	
  2185		ycover[Yxvm*Ymax+YxvmEvex] = 1
  2186	
  2187		ycover[Yyvm*Ymax+YyvmEvex] = 1
  2188	
  2189		for i := 0; i < MAXREG; i++ {
  2190			reg[i] = -1
  2191			if i >= REG_AL && i <= REG_R15B {
  2192				reg[i] = (i - REG_AL) & 7
  2193				if i >= REG_SPB && i <= REG_DIB {
  2194					regrex[i] = 0x40
  2195				}
  2196				if i >= REG_R8B && i <= REG_R15B {
  2197					regrex[i] = Rxr | Rxx | Rxb
  2198				}
  2199			}
  2200	
  2201			if i >= REG_AH && i <= REG_BH {
  2202				reg[i] = 4 + ((i - REG_AH) & 7)
  2203			}
  2204			if i >= REG_AX && i <= REG_R15 {
  2205				reg[i] = (i - REG_AX) & 7
  2206				if i >= REG_R8 {
  2207					regrex[i] = Rxr | Rxx | Rxb
  2208				}
  2209			}
  2210	
  2211			if i >= REG_F0 && i <= REG_F0+7 {
  2212				reg[i] = (i - REG_F0) & 7
  2213			}
  2214			if i >= REG_M0 && i <= REG_M0+7 {
  2215				reg[i] = (i - REG_M0) & 7
  2216			}
  2217			if i >= REG_K0 && i <= REG_K0+7 {
  2218				reg[i] = (i - REG_K0) & 7
  2219			}
  2220			if i >= REG_X0 && i <= REG_X0+15 {
  2221				reg[i] = (i - REG_X0) & 7
  2222				if i >= REG_X0+8 {
  2223					regrex[i] = Rxr | Rxx | Rxb
  2224				}
  2225			}
  2226			if i >= REG_X16 && i <= REG_X16+15 {
  2227				reg[i] = (i - REG_X16) & 7
  2228				if i >= REG_X16+8 {
  2229					regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2230				} else {
  2231					regrex[i] = RxrEvex
  2232				}
  2233			}
  2234			if i >= REG_Y0 && i <= REG_Y0+15 {
  2235				reg[i] = (i - REG_Y0) & 7
  2236				if i >= REG_Y0+8 {
  2237					regrex[i] = Rxr | Rxx | Rxb
  2238				}
  2239			}
  2240			if i >= REG_Y16 && i <= REG_Y16+15 {
  2241				reg[i] = (i - REG_Y16) & 7
  2242				if i >= REG_Y16+8 {
  2243					regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2244				} else {
  2245					regrex[i] = RxrEvex
  2246				}
  2247			}
  2248			if i >= REG_Z0 && i <= REG_Z0+15 {
  2249				reg[i] = (i - REG_Z0) & 7
  2250				if i > REG_Z0+7 {
  2251					regrex[i] = Rxr | Rxx | Rxb
  2252				}
  2253			}
  2254			if i >= REG_Z16 && i <= REG_Z16+15 {
  2255				reg[i] = (i - REG_Z16) & 7
  2256				if i >= REG_Z16+8 {
  2257					regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2258				} else {
  2259					regrex[i] = RxrEvex
  2260				}
  2261			}
  2262	
  2263			if i >= REG_CR+8 && i <= REG_CR+15 {
  2264				regrex[i] = Rxr
  2265			}
  2266		}
  2267	}
  2268	
  2269	var isAndroid = objabi.GOOS == "android"
  2270	
  2271	func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2272		if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2273			return 0
  2274		}
  2275		if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2276			switch a.Reg {
  2277			case REG_CS:
  2278				return 0x2e
  2279	
  2280			case REG_DS:
  2281				return 0x3e
  2282	
  2283			case REG_ES:
  2284				return 0x26
  2285	
  2286			case REG_FS:
  2287				return 0x64
  2288	
  2289			case REG_GS:
  2290				return 0x65
  2291	
  2292			case REG_TLS:
  2293				// NOTE: Systems listed here should be only systems that
  2294				// support direct TLS references like 8(TLS) implemented as
  2295				// direct references from FS or GS. Systems that require
  2296				// the initial-exec model, where you load the TLS base into
  2297				// a register and then index from that register, do not reach
  2298				// this code and should not be listed.
  2299				if ctxt.Arch.Family == sys.I386 {
  2300					switch ctxt.Headtype {
  2301					default:
  2302						if isAndroid {
  2303							return 0x65 // GS
  2304						}
  2305						log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2306	
  2307					case objabi.Hdarwin,
  2308						objabi.Hdragonfly,
  2309						objabi.Hfreebsd,
  2310						objabi.Hnetbsd,
  2311						objabi.Hopenbsd:
  2312						return 0x65 // GS
  2313					}
  2314				}
  2315	
  2316				switch ctxt.Headtype {
  2317				default:
  2318					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2319	
  2320				case objabi.Hlinux:
  2321					if isAndroid {
  2322						return 0x64 // FS
  2323					}
  2324	
  2325					if ctxt.Flag_shared {
  2326						log.Fatalf("unknown TLS base register for linux with -shared")
  2327					} else {
  2328						return 0x64 // FS
  2329					}
  2330	
  2331				case objabi.Hdragonfly,
  2332					objabi.Hfreebsd,
  2333					objabi.Hnetbsd,
  2334					objabi.Hopenbsd,
  2335					objabi.Hsolaris:
  2336					return 0x64 // FS
  2337	
  2338				case objabi.Hdarwin:
  2339					return 0x65 // GS
  2340				}
  2341			}
  2342		}
  2343	
  2344		if ctxt.Arch.Family == sys.I386 {
  2345			if a.Index == REG_TLS && ctxt.Flag_shared {
  2346				// When building for inclusion into a shared library, an instruction of the form
  2347				//     MOVL off(CX)(TLS*1), AX
  2348				// becomes
  2349				//     mov %gs:off(%ecx), %eax
  2350				// which assumes that the correct TLS offset has been loaded into %ecx (today
  2351				// there is only one TLS variable -- g -- so this is OK). When not building for
  2352				// a shared library the instruction it becomes
  2353				//     mov 0x0(%ecx), %eax
  2354				// and a R_TLS_LE relocation, and so does not require a prefix.
  2355				return 0x65 // GS
  2356			}
  2357			return 0
  2358		}
  2359	
  2360		switch a.Index {
  2361		case REG_CS:
  2362			return 0x2e
  2363	
  2364		case REG_DS:
  2365			return 0x3e
  2366	
  2367		case REG_ES:
  2368			return 0x26
  2369	
  2370		case REG_TLS:
  2371			if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2372				// When building for inclusion into a shared library, an instruction of the form
  2373				//     MOV off(CX)(TLS*1), AX
  2374				// becomes
  2375				//     mov %fs:off(%rcx), %rax
  2376				// which assumes that the correct TLS offset has been loaded into %rcx (today
  2377				// there is only one TLS variable -- g -- so this is OK). When not building for
  2378				// a shared library the instruction does not require a prefix.
  2379				return 0x64
  2380			}
  2381	
  2382		case REG_FS:
  2383			return 0x64
  2384	
  2385		case REG_GS:
  2386			return 0x65
  2387		}
  2388	
  2389		return 0
  2390	}
  2391	
  2392	// oclassRegList returns multisource operand class for addr.
  2393	func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2394		// TODO(quasilyte): when oclass register case is refactored into
  2395		// lookup table, use it here to get register kind more easily.
  2396		// Helper functions like regIsXmm should go away too (they will become redundant).
  2397	
  2398		regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2399		regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2400		regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2401	
  2402		reg0, reg1 := decodeRegisterRange(addr.Offset)
  2403		low := regIndex(int16(reg0))
  2404		high := regIndex(int16(reg1))
  2405	
  2406		if ctxt.Arch.Family == sys.I386 {
  2407			if low >= 8 || high >= 8 {
  2408				return Yxxx
  2409			}
  2410		}
  2411	
  2412		switch high - low {
  2413		case 3:
  2414			switch {
  2415			case regIsXmm(reg0) && regIsXmm(reg1):
  2416				return YxrEvexMulti4
  2417			case regIsYmm(reg0) && regIsYmm(reg1):
  2418				return YyrEvexMulti4
  2419			case regIsZmm(reg0) && regIsZmm(reg1):
  2420				return YzrMulti4
  2421			default:
  2422				return Yxxx
  2423			}
  2424		default:
  2425			return Yxxx
  2426		}
  2427	}
  2428	
  2429	// oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2430	// For addr that is not V-mem returns (Yxxx, false).
  2431	func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2432		switch addr.Index {
  2433		case REG_X0 + 0,
  2434			REG_X0 + 1,
  2435			REG_X0 + 2,
  2436			REG_X0 + 3,
  2437			REG_X0 + 4,
  2438			REG_X0 + 5,
  2439			REG_X0 + 6,
  2440			REG_X0 + 7:
  2441			return Yxvm, true
  2442		case REG_X8 + 0,
  2443			REG_X8 + 1,
  2444			REG_X8 + 2,
  2445			REG_X8 + 3,
  2446			REG_X8 + 4,
  2447			REG_X8 + 5,
  2448			REG_X8 + 6,
  2449			REG_X8 + 7:
  2450			if ctxt.Arch.Family == sys.I386 {
  2451				return Yxxx, true
  2452			}
  2453			return Yxvm, true
  2454		case REG_X16 + 0,
  2455			REG_X16 + 1,
  2456			REG_X16 + 2,
  2457			REG_X16 + 3,
  2458			REG_X16 + 4,
  2459			REG_X16 + 5,
  2460			REG_X16 + 6,
  2461			REG_X16 + 7,
  2462			REG_X16 + 8,
  2463			REG_X16 + 9,
  2464			REG_X16 + 10,
  2465			REG_X16 + 11,
  2466			REG_X16 + 12,
  2467			REG_X16 + 13,
  2468			REG_X16 + 14,
  2469			REG_X16 + 15:
  2470			if ctxt.Arch.Family == sys.I386 {
  2471				return Yxxx, true
  2472			}
  2473			return YxvmEvex, true
  2474	
  2475		case REG_Y0 + 0,
  2476			REG_Y0 + 1,
  2477			REG_Y0 + 2,
  2478			REG_Y0 + 3,
  2479			REG_Y0 + 4,
  2480			REG_Y0 + 5,
  2481			REG_Y0 + 6,
  2482			REG_Y0 + 7:
  2483			return Yyvm, true
  2484		case REG_Y8 + 0,
  2485			REG_Y8 + 1,
  2486			REG_Y8 + 2,
  2487			REG_Y8 + 3,
  2488			REG_Y8 + 4,
  2489			REG_Y8 + 5,
  2490			REG_Y8 + 6,
  2491			REG_Y8 + 7:
  2492			if ctxt.Arch.Family == sys.I386 {
  2493				return Yxxx, true
  2494			}
  2495			return Yyvm, true
  2496		case REG_Y16 + 0,
  2497			REG_Y16 + 1,
  2498			REG_Y16 + 2,
  2499			REG_Y16 + 3,
  2500			REG_Y16 + 4,
  2501			REG_Y16 + 5,
  2502			REG_Y16 + 6,
  2503			REG_Y16 + 7,
  2504			REG_Y16 + 8,
  2505			REG_Y16 + 9,
  2506			REG_Y16 + 10,
  2507			REG_Y16 + 11,
  2508			REG_Y16 + 12,
  2509			REG_Y16 + 13,
  2510			REG_Y16 + 14,
  2511			REG_Y16 + 15:
  2512			if ctxt.Arch.Family == sys.I386 {
  2513				return Yxxx, true
  2514			}
  2515			return YyvmEvex, true
  2516	
  2517		case REG_Z0 + 0,
  2518			REG_Z0 + 1,
  2519			REG_Z0 + 2,
  2520			REG_Z0 + 3,
  2521			REG_Z0 + 4,
  2522			REG_Z0 + 5,
  2523			REG_Z0 + 6,
  2524			REG_Z0 + 7:
  2525			return Yzvm, true
  2526		case REG_Z8 + 0,
  2527			REG_Z8 + 1,
  2528			REG_Z8 + 2,
  2529			REG_Z8 + 3,
  2530			REG_Z8 + 4,
  2531			REG_Z8 + 5,
  2532			REG_Z8 + 6,
  2533			REG_Z8 + 7,
  2534			REG_Z8 + 8,
  2535			REG_Z8 + 9,
  2536			REG_Z8 + 10,
  2537			REG_Z8 + 11,
  2538			REG_Z8 + 12,
  2539			REG_Z8 + 13,
  2540			REG_Z8 + 14,
  2541			REG_Z8 + 15,
  2542			REG_Z8 + 16,
  2543			REG_Z8 + 17,
  2544			REG_Z8 + 18,
  2545			REG_Z8 + 19,
  2546			REG_Z8 + 20,
  2547			REG_Z8 + 21,
  2548			REG_Z8 + 22,
  2549			REG_Z8 + 23:
  2550			if ctxt.Arch.Family == sys.I386 {
  2551				return Yxxx, true
  2552			}
  2553			return Yzvm, true
  2554		}
  2555	
  2556		return Yxxx, false
  2557	}
  2558	
  2559	func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2560		switch a.Type {
  2561		case obj.TYPE_REGLIST:
  2562			return oclassRegList(ctxt, a)
  2563	
  2564		case obj.TYPE_NONE:
  2565			return Ynone
  2566	
  2567		case obj.TYPE_BRANCH:
  2568			return Ybr
  2569	
  2570		case obj.TYPE_INDIR:
  2571			if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2572				return Yindir
  2573			}
  2574			return Yxxx
  2575	
  2576		case obj.TYPE_MEM:
  2577			// Pseudo registers have negative index, but SP is
  2578			// not pseudo on x86, hence REG_SP check is not redundant.
  2579			if a.Index == REG_SP || a.Index < 0 {
  2580				// Can't use FP/SB/PC/SP as the index register.
  2581				return Yxxx
  2582			}
  2583	
  2584			if vmem, ok := oclassVMem(ctxt, a); ok {
  2585				return vmem
  2586			}
  2587	
  2588			if ctxt.Arch.Family == sys.AMD64 {
  2589				switch a.Name {
  2590				case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2591					// Global variables can't use index registers and their
  2592					// base register is %rip (%rip is encoded as REG_NONE).
  2593					if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2594						return Yxxx
  2595					}
  2596				case obj.NAME_AUTO, obj.NAME_PARAM:
  2597					// These names must have a base of SP.  The old compiler
  2598					// uses 0 for the base register. SSA uses REG_SP.
  2599					if a.Reg != REG_SP && a.Reg != 0 {
  2600						return Yxxx
  2601					}
  2602				case obj.NAME_NONE:
  2603					// everything is ok
  2604				default:
  2605					// unknown name
  2606					return Yxxx
  2607				}
  2608			}
  2609			return Ym
  2610	
  2611		case obj.TYPE_ADDR:
  2612			switch a.Name {
  2613			case obj.NAME_GOTREF:
  2614				ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2615				return Yxxx
  2616	
  2617			case obj.NAME_EXTERN,
  2618				obj.NAME_STATIC:
  2619				if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2620					return Yi32
  2621				}
  2622				return Yiauto // use pc-relative addressing
  2623	
  2624			case obj.NAME_AUTO,
  2625				obj.NAME_PARAM:
  2626				return Yiauto
  2627			}
  2628	
  2629			// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2630			// and got Yi32 in an earlier version of this code.
  2631			// Keep doing that until we fix yduff etc.
  2632			if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2633				return Yi32
  2634			}
  2635	
  2636			if a.Sym != nil || a.Name != obj.NAME_NONE {
  2637				ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2638			}
  2639			fallthrough
  2640	
  2641		case obj.TYPE_CONST:
  2642			if a.Sym != nil {
  2643				ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2644			}
  2645	
  2646			v := a.Offset
  2647			if ctxt.Arch.Family == sys.I386 {
  2648				v = int64(int32(v))
  2649			}
  2650			switch {
  2651			case v == 0:
  2652				return Yi0
  2653			case v == 1:
  2654				return Yi1
  2655			case v >= 0 && v <= 3:
  2656				return Yu2
  2657			case v >= 0 && v <= 127:
  2658				return Yu7
  2659			case v >= 0 && v <= 255:
  2660				return Yu8
  2661			case v >= -128 && v <= 127:
  2662				return Yi8
  2663			}
  2664			if ctxt.Arch.Family == sys.I386 {
  2665				return Yi32
  2666			}
  2667			l := int32(v)
  2668			if int64(l) == v {
  2669				return Ys32 // can sign extend
  2670			}
  2671			if v>>32 == 0 {
  2672				return Yi32 // unsigned
  2673			}
  2674			return Yi64
  2675	
  2676		case obj.TYPE_TEXTSIZE:
  2677			return Ytextsize
  2678		}
  2679	
  2680		if a.Type != obj.TYPE_REG {
  2681			ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2682			return Yxxx
  2683		}
  2684	
  2685		switch a.Reg {
  2686		case REG_AL:
  2687			return Yal
  2688	
  2689		case REG_AX:
  2690			return Yax
  2691	
  2692			/*
  2693				case REG_SPB:
  2694			*/
  2695		case REG_BPB,
  2696			REG_SIB,
  2697			REG_DIB,
  2698			REG_R8B,
  2699			REG_R9B,
  2700			REG_R10B,
  2701			REG_R11B,
  2702			REG_R12B,
  2703			REG_R13B,
  2704			REG_R14B,
  2705			REG_R15B:
  2706			if ctxt.Arch.Family == sys.I386 {
  2707				return Yxxx
  2708			}
  2709			fallthrough
  2710	
  2711		case REG_DL,
  2712			REG_BL,
  2713			REG_AH,
  2714			REG_CH,
  2715			REG_DH,
  2716			REG_BH:
  2717			return Yrb
  2718	
  2719		case REG_CL:
  2720			return Ycl
  2721	
  2722		case REG_CX:
  2723			return Ycx
  2724	
  2725		case REG_DX, REG_BX:
  2726			return Yrx
  2727	
  2728		case REG_R8, // not really Yrl
  2729			REG_R9,
  2730			REG_R10,
  2731			REG_R11,
  2732			REG_R12,
  2733			REG_R13,
  2734			REG_R14,
  2735			REG_R15:
  2736			if ctxt.Arch.Family == sys.I386 {
  2737				return Yxxx
  2738			}
  2739			fallthrough
  2740	
  2741		case REG_SP, REG_BP, REG_SI, REG_DI:
  2742			if ctxt.Arch.Family == sys.I386 {
  2743				return Yrl32
  2744			}
  2745			return Yrl
  2746	
  2747		case REG_F0 + 0:
  2748			return Yf0
  2749	
  2750		case REG_F0 + 1,
  2751			REG_F0 + 2,
  2752			REG_F0 + 3,
  2753			REG_F0 + 4,
  2754			REG_F0 + 5,
  2755			REG_F0 + 6,
  2756			REG_F0 + 7:
  2757			return Yrf
  2758	
  2759		case REG_M0 + 0,
  2760			REG_M0 + 1,
  2761			REG_M0 + 2,
  2762			REG_M0 + 3,
  2763			REG_M0 + 4,
  2764			REG_M0 + 5,
  2765			REG_M0 + 6,
  2766			REG_M0 + 7:
  2767			return Ymr
  2768	
  2769		case REG_X0:
  2770			return Yxr0
  2771	
  2772		case REG_X0 + 1,
  2773			REG_X0 + 2,
  2774			REG_X0 + 3,
  2775			REG_X0 + 4,
  2776			REG_X0 + 5,
  2777			REG_X0 + 6,
  2778			REG_X0 + 7,
  2779			REG_X0 + 8,
  2780			REG_X0 + 9,
  2781			REG_X0 + 10,
  2782			REG_X0 + 11,
  2783			REG_X0 + 12,
  2784			REG_X0 + 13,
  2785			REG_X0 + 14,
  2786			REG_X0 + 15:
  2787			return Yxr
  2788	
  2789		case REG_X0 + 16,
  2790			REG_X0 + 17,
  2791			REG_X0 + 18,
  2792			REG_X0 + 19,
  2793			REG_X0 + 20,
  2794			REG_X0 + 21,
  2795			REG_X0 + 22,
  2796			REG_X0 + 23,
  2797			REG_X0 + 24,
  2798			REG_X0 + 25,
  2799			REG_X0 + 26,
  2800			REG_X0 + 27,
  2801			REG_X0 + 28,
  2802			REG_X0 + 29,
  2803			REG_X0 + 30,
  2804			REG_X0 + 31:
  2805			return YxrEvex
  2806	
  2807		case REG_Y0 + 0,
  2808			REG_Y0 + 1,
  2809			REG_Y0 + 2,
  2810			REG_Y0 + 3,
  2811			REG_Y0 + 4,
  2812			REG_Y0 + 5,
  2813			REG_Y0 + 6,
  2814			REG_Y0 + 7,
  2815			REG_Y0 + 8,
  2816			REG_Y0 + 9,
  2817			REG_Y0 + 10,
  2818			REG_Y0 + 11,
  2819			REG_Y0 + 12,
  2820			REG_Y0 + 13,
  2821			REG_Y0 + 14,
  2822			REG_Y0 + 15:
  2823			return Yyr
  2824	
  2825		case REG_Y0 + 16,
  2826			REG_Y0 + 17,
  2827			REG_Y0 + 18,
  2828			REG_Y0 + 19,
  2829			REG_Y0 + 20,
  2830			REG_Y0 + 21,
  2831			REG_Y0 + 22,
  2832			REG_Y0 + 23,
  2833			REG_Y0 + 24,
  2834			REG_Y0 + 25,
  2835			REG_Y0 + 26,
  2836			REG_Y0 + 27,
  2837			REG_Y0 + 28,
  2838			REG_Y0 + 29,
  2839			REG_Y0 + 30,
  2840			REG_Y0 + 31:
  2841			return YyrEvex
  2842	
  2843		case REG_Z0 + 0,
  2844			REG_Z0 + 1,
  2845			REG_Z0 + 2,
  2846			REG_Z0 + 3,
  2847			REG_Z0 + 4,
  2848			REG_Z0 + 5,
  2849			REG_Z0 + 6,
  2850			REG_Z0 + 7:
  2851			return Yzr
  2852	
  2853		case REG_Z0 + 8,
  2854			REG_Z0 + 9,
  2855			REG_Z0 + 10,
  2856			REG_Z0 + 11,
  2857			REG_Z0 + 12,
  2858			REG_Z0 + 13,
  2859			REG_Z0 + 14,
  2860			REG_Z0 + 15,
  2861			REG_Z0 + 16,
  2862			REG_Z0 + 17,
  2863			REG_Z0 + 18,
  2864			REG_Z0 + 19,
  2865			REG_Z0 + 20,
  2866			REG_Z0 + 21,
  2867			REG_Z0 + 22,
  2868			REG_Z0 + 23,
  2869			REG_Z0 + 24,
  2870			REG_Z0 + 25,
  2871			REG_Z0 + 26,
  2872			REG_Z0 + 27,
  2873			REG_Z0 + 28,
  2874			REG_Z0 + 29,
  2875			REG_Z0 + 30,
  2876			REG_Z0 + 31:
  2877			if ctxt.Arch.Family == sys.I386 {
  2878				return Yxxx
  2879			}
  2880			return Yzr
  2881	
  2882		case REG_K0:
  2883			return Yk0
  2884	
  2885		case REG_K0 + 1,
  2886			REG_K0 + 2,
  2887			REG_K0 + 3,
  2888			REG_K0 + 4,
  2889			REG_K0 + 5,
  2890			REG_K0 + 6,
  2891			REG_K0 + 7:
  2892			return Yknot0
  2893	
  2894		case REG_CS:
  2895			return Ycs
  2896		case REG_SS:
  2897			return Yss
  2898		case REG_DS:
  2899			return Yds
  2900		case REG_ES:
  2901			return Yes
  2902		case REG_FS:
  2903			return Yfs
  2904		case REG_GS:
  2905			return Ygs
  2906		case REG_TLS:
  2907			return Ytls
  2908	
  2909		case REG_GDTR:
  2910			return Ygdtr
  2911		case REG_IDTR:
  2912			return Yidtr
  2913		case REG_LDTR:
  2914			return Yldtr
  2915		case REG_MSW:
  2916			return Ymsw
  2917		case REG_TASK:
  2918			return Ytask
  2919	
  2920		case REG_CR + 0:
  2921			return Ycr0
  2922		case REG_CR + 1:
  2923			return Ycr1
  2924		case REG_CR + 2:
  2925			return Ycr2
  2926		case REG_CR + 3:
  2927			return Ycr3
  2928		case REG_CR + 4:
  2929			return Ycr4
  2930		case REG_CR + 5:
  2931			return Ycr5
  2932		case REG_CR + 6:
  2933			return Ycr6
  2934		case REG_CR + 7:
  2935			return Ycr7
  2936		case REG_CR + 8:
  2937			return Ycr8
  2938	
  2939		case REG_DR + 0:
  2940			return Ydr0
  2941		case REG_DR + 1:
  2942			return Ydr1
  2943		case REG_DR + 2:
  2944			return Ydr2
  2945		case REG_DR + 3:
  2946			return Ydr3
  2947		case REG_DR + 4:
  2948			return Ydr4
  2949		case REG_DR + 5:
  2950			return Ydr5
  2951		case REG_DR + 6:
  2952			return Ydr6
  2953		case REG_DR + 7:
  2954			return Ydr7
  2955	
  2956		case REG_TR + 0:
  2957			return Ytr0
  2958		case REG_TR + 1:
  2959			return Ytr1
  2960		case REG_TR + 2:
  2961			return Ytr2
  2962		case REG_TR + 3:
  2963			return Ytr3
  2964		case REG_TR + 4:
  2965			return Ytr4
  2966		case REG_TR + 5:
  2967			return Ytr5
  2968		case REG_TR + 6:
  2969			return Ytr6
  2970		case REG_TR + 7:
  2971			return Ytr7
  2972		}
  2973	
  2974		return Yxxx
  2975	}
  2976	
  2977	// AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2978	// and hold assembly state.
  2979	type AsmBuf struct {
  2980		buf      [100]byte
  2981		off      int
  2982		rexflag  int
  2983		vexflag  bool // Per inst: true for VEX-encoded
  2984		evexflag bool // Per inst: true for EVEX-encoded
  2985		rep      bool
  2986		repn     bool
  2987		lock     bool
  2988	
  2989		evex evexBits // Initialized when evexflag is true
  2990	}
  2991	
  2992	// Put1 appends one byte to the end of the buffer.
  2993	func (ab *AsmBuf) Put1(x byte) {
  2994		ab.buf[ab.off] = x
  2995		ab.off++
  2996	}
  2997	
  2998	// Put2 appends two bytes to the end of the buffer.
  2999	func (ab *AsmBuf) Put2(x, y byte) {
  3000		ab.buf[ab.off+0] = x
  3001		ab.buf[ab.off+1] = y
  3002		ab.off += 2
  3003	}
  3004	
  3005	// Put3 appends three bytes to the end of the buffer.
  3006	func (ab *AsmBuf) Put3(x, y, z byte) {
  3007		ab.buf[ab.off+0] = x
  3008		ab.buf[ab.off+1] = y
  3009		ab.buf[ab.off+2] = z
  3010		ab.off += 3
  3011	}
  3012	
  3013	// Put4 appends four bytes to the end of the buffer.
  3014	func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3015		ab.buf[ab.off+0] = x
  3016		ab.buf[ab.off+1] = y
  3017		ab.buf[ab.off+2] = z
  3018		ab.buf[ab.off+3] = w
  3019		ab.off += 4
  3020	}
  3021	
  3022	// PutInt16 writes v into the buffer using little-endian encoding.
  3023	func (ab *AsmBuf) PutInt16(v int16) {
  3024		ab.buf[ab.off+0] = byte(v)
  3025		ab.buf[ab.off+1] = byte(v >> 8)
  3026		ab.off += 2
  3027	}
  3028	
  3029	// PutInt32 writes v into the buffer using little-endian encoding.
  3030	func (ab *AsmBuf) PutInt32(v int32) {
  3031		ab.buf[ab.off+0] = byte(v)
  3032		ab.buf[ab.off+1] = byte(v >> 8)
  3033		ab.buf[ab.off+2] = byte(v >> 16)
  3034		ab.buf[ab.off+3] = byte(v >> 24)
  3035		ab.off += 4
  3036	}
  3037	
  3038	// PutInt64 writes v into the buffer using little-endian encoding.
  3039	func (ab *AsmBuf) PutInt64(v int64) {
  3040		ab.buf[ab.off+0] = byte(v)
  3041		ab.buf[ab.off+1] = byte(v >> 8)
  3042		ab.buf[ab.off+2] = byte(v >> 16)
  3043		ab.buf[ab.off+3] = byte(v >> 24)
  3044		ab.buf[ab.off+4] = byte(v >> 32)
  3045		ab.buf[ab.off+5] = byte(v >> 40)
  3046		ab.buf[ab.off+6] = byte(v >> 48)
  3047		ab.buf[ab.off+7] = byte(v >> 56)
  3048		ab.off += 8
  3049	}
  3050	
  3051	// Put copies b into the buffer.
  3052	func (ab *AsmBuf) Put(b []byte) {
  3053		copy(ab.buf[ab.off:], b)
  3054		ab.off += len(b)
  3055	}
  3056	
  3057	// PutOpBytesLit writes zero terminated sequence of bytes from op,
  3058	// starting at specified offsed (e.g. z counter value).
  3059	// Trailing 0 is not written.
  3060	//
  3061	// Intended to be used for literal Z cases.
  3062	// Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3063	func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3064		for int(op[offset]) != 0 {
  3065			ab.Put1(byte(op[offset]))
  3066			offset++
  3067		}
  3068	}
  3069	
  3070	// Insert inserts b at offset i.
  3071	func (ab *AsmBuf) Insert(i int, b byte) {
  3072		ab.off++
  3073		copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3074		ab.buf[i] = b
  3075	}
  3076	
  3077	// Last returns the byte at the end of the buffer.
  3078	func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3079	
  3080	// Len returns the length of the buffer.
  3081	func (ab *AsmBuf) Len() int { return ab.off }
  3082	
  3083	// Bytes returns the contents of the buffer.
  3084	func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3085	
  3086	// Reset empties the buffer.
  3087	func (ab *AsmBuf) Reset() { ab.off = 0 }
  3088	
  3089	// At returns the byte at offset i.
  3090	func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3091	
  3092	// asmidx emits SIB byte.
  3093	func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3094		var i int
  3095	
  3096		// X/Y index register is used in VSIB.
  3097		switch index {
  3098		default:
  3099			goto bad
  3100	
  3101		case REG_NONE:
  3102			i = 4 << 3
  3103			goto bas
  3104	
  3105		case REG_R8,
  3106			REG_R9,
  3107			REG_R10,
  3108			REG_R11,
  3109			REG_R12,
  3110			REG_R13,
  3111			REG_R14,
  3112			REG_R15,
  3113			REG_X8,
  3114			REG_X9,
  3115			REG_X10,
  3116			REG_X11,
  3117			REG_X12,
  3118			REG_X13,
  3119			REG_X14,
  3120			REG_X15,
  3121			REG_X16,
  3122			REG_X17,
  3123			REG_X18,
  3124			REG_X19,
  3125			REG_X20,
  3126			REG_X21,
  3127			REG_X22,
  3128			REG_X23,
  3129			REG_X24,
  3130			REG_X25,
  3131			REG_X26,
  3132			REG_X27,
  3133			REG_X28,
  3134			REG_X29,
  3135			REG_X30,
  3136			REG_X31,
  3137			REG_Y8,
  3138			REG_Y9,
  3139			REG_Y10,
  3140			REG_Y11,
  3141			REG_Y12,
  3142			REG_Y13,
  3143			REG_Y14,
  3144			REG_Y15,
  3145			REG_Y16,
  3146			REG_Y17,
  3147			REG_Y18,
  3148			REG_Y19,
  3149			REG_Y20,
  3150			REG_Y21,
  3151			REG_Y22,
  3152			REG_Y23,
  3153			REG_Y24,
  3154			REG_Y25,
  3155			REG_Y26,
  3156			REG_Y27,
  3157			REG_Y28,
  3158			REG_Y29,
  3159			REG_Y30,
  3160			REG_Y31,
  3161			REG_Z8,
  3162			REG_Z9,
  3163			REG_Z10,
  3164			REG_Z11,
  3165			REG_Z12,
  3166			REG_Z13,
  3167			REG_Z14,
  3168			REG_Z15,
  3169			REG_Z16,
  3170			REG_Z17,
  3171			REG_Z18,
  3172			REG_Z19,
  3173			REG_Z20,
  3174			REG_Z21,
  3175			REG_Z22,
  3176			REG_Z23,
  3177			REG_Z24,
  3178			REG_Z25,
  3179			REG_Z26,
  3180			REG_Z27,
  3181			REG_Z28,
  3182			REG_Z29,
  3183			REG_Z30,
  3184			REG_Z31:
  3185			if ctxt.Arch.Family == sys.I386 {
  3186				goto bad
  3187			}
  3188			fallthrough
  3189	
  3190		case REG_AX,
  3191			REG_CX,
  3192			REG_DX,
  3193			REG_BX,
  3194			REG_BP,
  3195			REG_SI,
  3196			REG_DI,
  3197			REG_X0,
  3198			REG_X1,
  3199			REG_X2,
  3200			REG_X3,
  3201			REG_X4,
  3202			REG_X5,
  3203			REG_X6,
  3204			REG_X7,
  3205			REG_Y0,
  3206			REG_Y1,
  3207			REG_Y2,
  3208			REG_Y3,
  3209			REG_Y4,
  3210			REG_Y5,
  3211			REG_Y6,
  3212			REG_Y7,
  3213			REG_Z0,
  3214			REG_Z1,
  3215			REG_Z2,
  3216			REG_Z3,
  3217			REG_Z4,
  3218			REG_Z5,
  3219			REG_Z6,
  3220			REG_Z7:
  3221			i = reg[index] << 3
  3222		}
  3223	
  3224		switch scale {
  3225		default:
  3226			goto bad
  3227	
  3228		case 1:
  3229			break
  3230	
  3231		case 2:
  3232			i |= 1 << 6
  3233	
  3234		case 4:
  3235			i |= 2 << 6
  3236	
  3237		case 8:
  3238			i |= 3 << 6
  3239		}
  3240	
  3241	bas:
  3242		switch base {
  3243		default:
  3244			goto bad
  3245	
  3246		case REG_NONE: // must be mod=00
  3247			i |= 5
  3248	
  3249		case REG_R8,
  3250			REG_R9,
  3251			REG_R10,
  3252			REG_R11,
  3253			REG_R12,
  3254			REG_R13,
  3255			REG_R14,
  3256			REG_R15:
  3257			if ctxt.Arch.Family == sys.I386 {
  3258				goto bad
  3259			}
  3260			fallthrough
  3261	
  3262		case REG_AX,
  3263			REG_CX,
  3264			REG_DX,
  3265			REG_BX,
  3266			REG_SP,
  3267			REG_BP,
  3268			REG_SI,
  3269			REG_DI:
  3270			i |= reg[base]
  3271		}
  3272	
  3273		ab.Put1(byte(i))
  3274		return
  3275	
  3276	bad:
  3277		ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3278		ab.Put1(0)
  3279	}
  3280	
  3281	func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3282		var rel obj.Reloc
  3283	
  3284		v := vaddr(ctxt, p, a, &rel)
  3285		if rel.Siz != 0 {
  3286			if rel.Siz != 4 {
  3287				ctxt.Diag("bad reloc")
  3288			}
  3289			r := obj.Addrel(cursym)
  3290			*r = rel
  3291			r.Off = int32(p.Pc + int64(ab.Len()))
  3292		}
  3293	
  3294		ab.PutInt32(int32(v))
  3295	}
  3296	
  3297	func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3298		if r != nil {
  3299			*r = obj.Reloc{}
  3300		}
  3301	
  3302		switch a.Name {
  3303		case obj.NAME_STATIC,
  3304			obj.NAME_GOTREF,
  3305			obj.NAME_EXTERN:
  3306			s := a.Sym
  3307			if r == nil {
  3308				ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3309				log.Fatalf("reloc")
  3310			}
  3311	
  3312			if a.Name == obj.NAME_GOTREF {
  3313				r.Siz = 4
  3314				r.Type = objabi.R_GOTPCREL
  3315			} else if useAbs(ctxt, s) {
  3316				r.Siz = 4
  3317				r.Type = objabi.R_ADDR
  3318			} else {
  3319				r.Siz = 4
  3320				r.Type = objabi.R_PCREL
  3321			}
  3322	
  3323			r.Off = -1 // caller must fill in
  3324			r.Sym = s
  3325			r.Add = a.Offset
  3326	
  3327			return 0
  3328		}
  3329	
  3330		if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3331			if r == nil {
  3332				ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3333				log.Fatalf("reloc")
  3334			}
  3335	
  3336			if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3337				r.Type = objabi.R_TLS_LE
  3338				r.Siz = 4
  3339				r.Off = -1 // caller must fill in
  3340				r.Add = a.Offset
  3341			}
  3342			return 0
  3343		}
  3344	
  3345		return a.Offset
  3346	}
  3347	
  3348	func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3349		var base int
  3350		var rel obj.Reloc
  3351	
  3352		rex &= 0x40 | Rxr
  3353		if a.Offset != int64(int32(a.Offset)) {
  3354			// The rules are slightly different for 386 and AMD64,
  3355			// mostly for historical reasons. We may unify them later,
  3356			// but it must be discussed beforehand.
  3357			//
  3358			// For 64bit mode only LEAL is allowed to overflow.
  3359			// It's how https://golang.org/cl/59630 made it.
  3360			// crypto/sha1/sha1block_amd64.s depends on this feature.
  3361			//
  3362			// For 32bit mode rules are more permissive.
  3363			// If offset fits uint32, it's permitted.
  3364			// This is allowed for assembly that wants to use 32-bit hex
  3365			// constants, e.g. LEAL 0x99999999(AX), AX.
  3366			overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3367				(ctxt.Arch.Family != sys.AMD64 &&
  3368					int64(uint32(a.Offset)) == a.Offset &&
  3369					ab.rexflag&Rxw == 0)
  3370			if !overflowOK {
  3371				ctxt.Diag("offset too large in %s", p)
  3372			}
  3373		}
  3374		v := int32(a.Offset)
  3375		rel.Siz = 0
  3376	
  3377		switch a.Type {
  3378		case obj.TYPE_ADDR:
  3379			if a.Name == obj.NAME_NONE {
  3380				ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3381			}
  3382			if a.Index == REG_TLS {
  3383				ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3384			}
  3385			goto bad
  3386	
  3387		case obj.TYPE_REG:
  3388			const regFirst = REG_AL
  3389			const regLast = REG_Z31
  3390			if a.Reg < regFirst || regLast < a.Reg {
  3391				goto bad
  3392			}
  3393			if v != 0 {
  3394				goto bad
  3395			}
  3396			ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3397			ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3398			return
  3399		}
  3400	
  3401		if a.Type != obj.TYPE_MEM {
  3402			goto bad
  3403		}
  3404	
  3405		if a.Index != REG_NONE && a.Index != REG_TLS {
  3406			base := int(a.Reg)
  3407			switch a.Name {
  3408			case obj.NAME_EXTERN,
  3409				obj.NAME_GOTREF,
  3410				obj.NAME_STATIC:
  3411				if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3412					goto bad
  3413				}
  3414				if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3415					// The base register has already been set. It holds the PC
  3416					// of this instruction returned by a PC-reading thunk.
  3417					// See obj6.go:rewriteToPcrel.
  3418				} else {
  3419					base = REG_NONE
  3420				}
  3421				v = int32(vaddr(ctxt, p, a, &rel))
  3422	
  3423			case obj.NAME_AUTO,
  3424				obj.NAME_PARAM:
  3425				base = REG_SP
  3426			}
  3427	
  3428			ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3429			if base == REG_NONE {
  3430				ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3431				ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3432				goto putrelv
  3433			}
  3434	
  3435			if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3436				ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3437				ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3438				return
  3439			}
  3440	
  3441			if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3442				ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3443				ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3444				ab.Put1(disp8)
  3445				return
  3446			}
  3447	
  3448			ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3449			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3450			goto putrelv
  3451		}
  3452	
  3453		base = int(a.Reg)
  3454		switch a.Name {
  3455		case obj.NAME_STATIC,
  3456			obj.NAME_GOTREF,
  3457			obj.NAME_EXTERN:
  3458			if a.Sym == nil {
  3459				ctxt.Diag("bad addr: %v", p)
  3460			}
  3461			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3462				// The base register has already been set. It holds the PC
  3463				// of this instruction returned by a PC-reading thunk.
  3464				// See obj6.go:rewriteToPcrel.
  3465			} else {
  3466				base = REG_NONE
  3467			}
  3468			v = int32(vaddr(ctxt, p, a, &rel))
  3469	
  3470		case obj.NAME_AUTO,
  3471			obj.NAME_PARAM:
  3472			base = REG_SP
  3473		}
  3474	
  3475		if base == REG_TLS {
  3476			v = int32(vaddr(ctxt, p, a, &rel))
  3477		}
  3478	
  3479		ab.rexflag |= regrex[base]&Rxb | rex
  3480		if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3481			if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3482				if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3483					ctxt.Diag("%v has offset against gotref", p)
  3484				}
  3485				ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3486				goto putrelv
  3487			}
  3488	
  3489			// temporary
  3490			ab.Put2(
  3491				byte(0<<6|4<<0|r<<3), // sib present
  3492				0<<6|4<<3|5<<0,       // DS:d32
  3493			)
  3494			goto putrelv
  3495		}
  3496	
  3497		if base == REG_SP || base == REG_R12 {
  3498			if v == 0 {
  3499				ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3500				ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3501				return
  3502			}
  3503	
  3504			if disp8, ok := toDisp8(v, p, ab); ok {
  3505				ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3506				ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3507				ab.Put1(disp8)
  3508				return
  3509			}
  3510	
  3511			ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3512			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3513			goto putrelv
  3514		}
  3515	
  3516		if REG_AX <= base && base <= REG_R15 {
  3517			if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
  3518				rel = obj.Reloc{}
  3519				rel.Type = objabi.R_TLS_LE
  3520				rel.Siz = 4
  3521				rel.Sym = nil
  3522				rel.Add = int64(v)
  3523				v = 0
  3524			}
  3525	
  3526			if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3527				ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3528				return
  3529			}
  3530	
  3531			if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3532				ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3533				return
  3534			}
  3535	
  3536			ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3537			goto putrelv
  3538		}
  3539	
  3540		goto bad
  3541	
  3542	putrelv:
  3543		if rel.Siz != 0 {
  3544			if rel.Siz != 4 {
  3545				ctxt.Diag("bad rel")
  3546				goto bad
  3547			}
  3548	
  3549			r := obj.Addrel(cursym)
  3550			*r = rel
  3551			r.Off = int32(p.Pc + int64(ab.Len()))
  3552		}
  3553	
  3554		ab.PutInt32(v)
  3555		return
  3556	
  3557	bad:
  3558		ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3559	}
  3560	
  3561	func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3562		ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3563	}
  3564	
  3565	func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3566		ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3567	}
  3568	
  3569	func bytereg(a *obj.Addr, t *uint8) {
  3570		if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3571			a.Reg += REG_AL - REG_AX
  3572			*t = 0
  3573		}
  3574	}
  3575	
  3576	func unbytereg(a *obj.Addr, t *uint8) {
  3577		if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3578			a.Reg += REG_AX - REG_AL
  3579			*t = 0
  3580		}
  3581	}
  3582	
  3583	const (
  3584		movLit uint8 = iota // Like Zlit
  3585		movRegMem
  3586		movMemReg
  3587		movRegMem2op
  3588		movMemReg2op
  3589		movFullPtr // Load full pointer, trash heap (unsupported)
  3590		movDoubleShift
  3591		movTLSReg
  3592	)
  3593	
  3594	var ymovtab = []movtab{
  3595		// push
  3596		{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3597		{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3598		{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3599		{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3600		{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3601		{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3602		{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3603		{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3604		{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3605		{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3606		{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3607		{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3608		{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3609		{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3610	
  3611		// pop
  3612		{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3613		{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3614		{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3615		{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3616		{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3617		{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3618		{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3619		{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3620		{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3621		{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3622		{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3623		{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3624	
  3625		// mov seg
  3626		{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3627		{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3628		{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3629		{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3630		{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3631		{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3632		{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3633		{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3634		{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3635		{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3636		{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3637		{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3638	
  3639		// mov cr
  3640		{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3641		{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3642		{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3643		{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3644		{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3645		{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3646		{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3647		{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3648		{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3649		{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3650		{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3651		{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3652		{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3653		{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3654		{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3655		{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3656		{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3657		{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3658		{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3659		{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3660	
  3661		// mov dr
  3662		{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3663		{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3664		{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3665		{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3666		{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3667		{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3668		{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3669		{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3670		{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3671		{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3672		{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3673		{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3674		{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3675		{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3676		{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3677		{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3678	
  3679		// mov tr
  3680		{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3681		{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3682		{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3683		{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3684	
  3685		// lgdt, sgdt, lidt, sidt
  3686		{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3687		{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3688		{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3689		{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3690		{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3691		{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3692		{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3693		{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3694	
  3695		// lldt, sldt
  3696		{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3697		{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3698	
  3699		// lmsw, smsw
  3700		{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3701		{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3702	
  3703		// ltr, str
  3704		{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3705		{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3706	
  3707		/* load full pointer - unsupported
  3708		{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3709		{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3710		*/
  3711	
  3712		// double shift
  3713		{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3714		{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3715		{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3716		{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3717		{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3718		{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3719		{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3720		{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3721		{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3722		{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3723		{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3724		{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3725		{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3726		{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3727		{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3728		{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3729		{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3730		{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3731	
  3732		// load TLS base
  3733		{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3734		{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3735		{0, 0, 0, 0, 0, [4]uint8{}},
  3736	}
  3737	
  3738	func isax(a *obj.Addr) bool {
  3739		switch a.Reg {
  3740		case REG_AX, REG_AL, REG_AH:
  3741			return true
  3742		}
  3743	
  3744		if a.Index == REG_AX {
  3745			return true
  3746		}
  3747		return false
  3748	}
  3749	
  3750	func subreg(p *obj.Prog, from int, to int) {
  3751		if false { /* debug['Q'] */
  3752			fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3753		}
  3754	
  3755		if int(p.From.Reg) == from {
  3756			p.From.Reg = int16(to)
  3757			p.Ft = 0
  3758		}
  3759	
  3760		if int(p.To.Reg) == from {
  3761			p.To.Reg = int16(to)
  3762			p.Tt = 0
  3763		}
  3764	
  3765		if int(p.From.Index) == from {
  3766			p.From.Index = int16(to)
  3767			p.Ft = 0
  3768		}
  3769	
  3770		if int(p.To.Index) == from {
  3771			p.To.Index = int16(to)
  3772			p.Tt = 0
  3773		}
  3774	
  3775		if false { /* debug['Q'] */
  3776			fmt.Printf("%v\n", p)
  3777		}
  3778	}
  3779	
  3780	func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3781		switch op {
  3782		case Pm, Pe, Pf2, Pf3:
  3783			if osize != 1 {
  3784				if op != Pm {
  3785					ab.Put1(byte(op))
  3786				}
  3787				ab.Put1(Pm)
  3788				z++
  3789				op = int(o.op[z])
  3790				break
  3791			}
  3792			fallthrough
  3793	
  3794		default:
  3795			if ab.Len() == 0 || ab.Last() != Pm {
  3796				ab.Put1(Pm)
  3797			}
  3798		}
  3799	
  3800		ab.Put1(byte(op))
  3801		return z
  3802	}
  3803	
  3804	var bpduff1 = []byte{
  3805		0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3806		0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3807	}
  3808	
  3809	var bpduff2 = []byte{
  3810		0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3811	}
  3812	
  3813	// asmevex emits EVEX pregis and opcode byte.
  3814	// In addition to asmvex r/m, vvvv and reg fields also requires optional
  3815	// K-masking register.
  3816	//
  3817	// Expects asmbuf.evex to be properly initialized.
  3818	func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  3819		ab.evexflag = true
  3820		evex := ab.evex
  3821	
  3822		rexR := byte(1)
  3823		evexR := byte(1)
  3824		rexX := byte(1)
  3825		rexB := byte(1)
  3826		if r != nil {
  3827			if regrex[r.Reg]&Rxr != 0 {
  3828				rexR = 0 // "ModR/M.reg" selector 4th bit.
  3829			}
  3830			if regrex[r.Reg]&RxrEvex != 0 {
  3831				evexR = 0 // "ModR/M.reg" selector 5th bit.
  3832			}
  3833		}
  3834		if rm != nil {
  3835			if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  3836				rexX = 0
  3837			} else if regrex[rm.Index]&Rxx != 0 {
  3838				rexX = 0
  3839			}
  3840			if regrex[rm.Reg]&Rxb != 0 {
  3841				rexB = 0
  3842			}
  3843		}
  3844		// P0 = [R][X][B][R'][00][mm]
  3845		p0 := (rexR << 7) |
  3846			(rexX << 6) |
  3847			(rexB << 5) |
  3848			(evexR << 4) |
  3849			(0 << 2) |
  3850			(evex.M() << 0)
  3851	
  3852		vexV := byte(0)
  3853		if v != nil {
  3854			// 4bit-wide reg index.
  3855			vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3856		}
  3857		vexV ^= 0x0F
  3858		// P1 = [W][vvvv][1][pp]
  3859		p1 := (evex.W() << 7) |
  3860			(vexV << 3) |
  3861			(1 << 2) |
  3862			(evex.P() << 0)
  3863	
  3864		suffix := evexSuffixMap[p.Scond]
  3865		evexZ := byte(0)
  3866		evexLL := evex.L()
  3867		evexB := byte(0)
  3868		evexV := byte(1)
  3869		evexA := byte(0)
  3870		if suffix.zeroing {
  3871			if !evex.ZeroingEnabled() {
  3872				ctxt.Diag("unsupported zeroing: %v", p)
  3873			}
  3874			evexZ = 1
  3875		}
  3876		switch {
  3877		case suffix.rounding != rcUnset:
  3878			if rm != nil && rm.Type == obj.TYPE_MEM {
  3879				ctxt.Diag("illegal rounding with memory argument: %v", p)
  3880			} else if !evex.RoundingEnabled() {
  3881				ctxt.Diag("unsupported rounding: %v", p)
  3882			}
  3883			evexB = 1
  3884			evexLL = suffix.rounding
  3885		case suffix.broadcast:
  3886			if rm == nil || rm.Type != obj.TYPE_MEM {
  3887				ctxt.Diag("illegal broadcast without memory argument: %v", p)
  3888			} else if !evex.BroadcastEnabled() {
  3889				ctxt.Diag("unsupported broadcast: %v", p)
  3890			}
  3891			evexB = 1
  3892		case suffix.sae:
  3893			if rm != nil && rm.Type == obj.TYPE_MEM {
  3894				ctxt.Diag("illegal SAE with memory argument: %v", p)
  3895			} else if !evex.SaeEnabled() {
  3896				ctxt.Diag("unsupported SAE: %v", p)
  3897			}
  3898			evexB = 1
  3899		}
  3900		if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  3901			evexV = 0
  3902		} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  3903			evexV = 0 // VSR selector 5th bit.
  3904		}
  3905		if k != nil {
  3906			evexA = byte(reg[k.Reg])
  3907		}
  3908		// P2 = [z][L'L][b][V'][aaa]
  3909		p2 := (evexZ << 7) |
  3910			(evexLL << 5) |
  3911			(evexB << 4) |
  3912			(evexV << 3) |
  3913			(evexA << 0)
  3914	
  3915		const evexEscapeByte = 0x62
  3916		ab.Put4(evexEscapeByte, p0, p1, p2)
  3917		ab.Put1(evex.opcode)
  3918	}
  3919	
  3920	// Emit VEX prefix and opcode byte.
  3921	// The three addresses are the r/m, vvvv, and reg fields.
  3922	// The reg and rm arguments appear in the same order as the
  3923	// arguments to asmand, which typically follows the call to asmvex.
  3924	// The final two arguments are the VEX prefix (see encoding above)
  3925	// and the opcode byte.
  3926	// For details about vex prefix see:
  3927	// https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3928	func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3929		ab.vexflag = true
  3930		rexR := 0
  3931		if r != nil {
  3932			rexR = regrex[r.Reg] & Rxr
  3933		}
  3934		rexB := 0
  3935		rexX := 0
  3936		if rm != nil {
  3937			rexB = regrex[rm.Reg] & Rxb
  3938			rexX = regrex[rm.Index] & Rxx
  3939		}
  3940		vexM := (vex >> 3) & 0x7
  3941		vexWLP := vex & 0x87
  3942		vexV := byte(0)
  3943		if v != nil {
  3944			vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3945		}
  3946		vexV ^= 0xF
  3947		if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3948			// Can use 2-byte encoding.
  3949			ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3950		} else {
  3951			// Must use 3-byte encoding.
  3952			ab.Put3(0xc4,
  3953				(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3954				vexV<<3|vexWLP,
  3955			)
  3956		}
  3957		ab.Put1(opcode)
  3958	}
  3959	
  3960	// regIndex returns register index that fits in 5 bits.
  3961	//
  3962	//	R         : 3 bit | legacy instructions     | N/A
  3963	//	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  3964	//	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  3965	//
  3966	// Examples:
  3967	//	REG_Z30 => 30
  3968	//	REG_X15 => 15
  3969	//	REG_R9  => 9
  3970	//	REG_AX  => 0
  3971	//
  3972	func regIndex(r int16) int {
  3973		lower3bits := reg[r]
  3974		high4bit := regrex[r] & Rxr << 1
  3975		high5bit := regrex[r] & RxrEvex << 0
  3976		return lower3bits | high4bit | high5bit
  3977	}
  3978	
  3979	// avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  3980	// Reports errors via ctxt.
  3981	func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  3982		// If any pair of the index, mask, or destination registers
  3983		// are the same, illegal instruction trap (#UD) is triggered.
  3984		index := regIndex(p.GetFrom3().Index)
  3985		mask := regIndex(p.From.Reg)
  3986		dest := regIndex(p.To.Reg)
  3987		if dest == mask || dest == index || mask == index {
  3988			ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  3989			return false
  3990		}
  3991	
  3992		return true
  3993	}
  3994	
  3995	// avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  3996	// Reports errors via ctxt.
  3997	func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  3998		// Illegal instruction trap (#UD) is triggered if the destination vector
  3999		// register is the same as index vector in VSIB.
  4000		index := regIndex(p.From.Index)
  4001		dest := regIndex(p.To.Reg)
  4002		if dest == index {
  4003			ctxt.Diag("index and destination registers should be distinct: %v", p)
  4004			return false
  4005		}
  4006	
  4007		return true
  4008	}
  4009	
  4010	func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4011		o := opindex[p.As&obj.AMask]
  4012	
  4013		if o == nil {
  4014			ctxt.Diag("asmins: missing op %v", p)
  4015			return
  4016		}
  4017	
  4018		if pre := prefixof(ctxt, &p.From); pre != 0 {
  4019			ab.Put1(byte(pre))
  4020		}
  4021		if pre := prefixof(ctxt, &p.To); pre != 0 {
  4022			ab.Put1(byte(pre))
  4023		}
  4024	
  4025		// Checks to warn about instruction/arguments combinations that
  4026		// will unconditionally trigger illegal instruction trap (#UD).
  4027		switch p.As {
  4028		case AVGATHERDPD,
  4029			AVGATHERQPD,
  4030			AVGATHERDPS,
  4031			AVGATHERQPS,
  4032			AVPGATHERDD,
  4033			AVPGATHERQD,
  4034			AVPGATHERDQ,
  4035			AVPGATHERQQ:
  4036			// AVX512 gather requires explicit K mask.
  4037			if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4038				if !avx512gatherValid(ctxt, p) {
  4039					return
  4040				}
  4041			} else {
  4042				if !avx2gatherValid(ctxt, p) {
  4043					return
  4044				}
  4045			}
  4046		}
  4047	
  4048		if p.Ft == 0 {
  4049			p.Ft = uint8(oclass(ctxt, p, &p.From))
  4050		}
  4051		if p.Tt == 0 {
  4052			p.Tt = uint8(oclass(ctxt, p, &p.To))
  4053		}
  4054	
  4055		ft := int(p.Ft) * Ymax
  4056		var f3t int
  4057		tt := int(p.Tt) * Ymax
  4058	
  4059		xo := obj.Bool2int(o.op[0] == 0x0f)
  4060		z := 0
  4061		var a *obj.Addr
  4062		var l int
  4063		var op int
  4064		var q *obj.Prog
  4065		var r *obj.Reloc
  4066		var rel obj.Reloc
  4067		var v int64
  4068	
  4069		args := make([]int, 0, argListMax)
  4070		if ft != Ynone*Ymax {
  4071			args = append(args, ft)
  4072		}
  4073		for i := range p.RestArgs {
  4074			args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
  4075		}
  4076		if tt != Ynone*Ymax {
  4077			args = append(args, tt)
  4078		}
  4079	
  4080		for _, yt := range o.ytab {
  4081			// ytab matching is purely args-based,
  4082			// but AVX512 suffixes like "Z" or "RU_SAE" will
  4083			// add EVEX-only filter that will reject non-EVEX matches.
  4084			//
  4085			// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4086			// Without this rule, operands will lead to VEX-encoded form
  4087			// and produce "c5b15813" encoding.
  4088			if !yt.match(args) {
  4089				// "xo" is always zero for VEX/EVEX encoded insts.
  4090				z += int(yt.zoffset) + xo
  4091			} else {
  4092				if p.Scond != 0 && !evexZcase(yt.zcase) {
  4093					// Do not signal error and continue to search
  4094					// for matching EVEX-encoded form.
  4095					z += int(yt.zoffset)
  4096					continue
  4097				}
  4098	
  4099				switch o.prefix {
  4100				case Px1: // first option valid only in 32-bit mode
  4101					if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4102						z += int(yt.zoffset) + xo
  4103						continue
  4104					}
  4105				case Pq: // 16 bit escape and opcode escape
  4106					ab.Put2(Pe, Pm)
  4107	
  4108				case Pq3: // 16 bit escape and opcode escape + REX.W
  4109					ab.rexflag |= Pw
  4110					ab.Put2(Pe, Pm)
  4111	
  4112				case Pq4: // 66 0F 38
  4113					ab.Put3(0x66, 0x0F, 0x38)
  4114	
  4115				case Pq4w: // 66 0F 38 + REX.W
  4116					ab.rexflag |= Pw
  4117					ab.Put3(0x66, 0x0F, 0x38)
  4118	
  4119				case Pq5: // F3 0F 38
  4120					ab.Put3(0xF3, 0x0F, 0x38)
  4121	
  4122				case Pq5w: //  F3 0F 38 + REX.W
  4123					ab.rexflag |= Pw
  4124					ab.Put3(0xF3, 0x0F, 0x38)
  4125	
  4126				case Pf2, // xmm opcode escape
  4127					Pf3:
  4128					ab.Put2(o.prefix, Pm)
  4129	
  4130				case Pef3:
  4131					ab.Put3(Pe, Pf3, Pm)
  4132	
  4133				case Pfw: // xmm opcode escape + REX.W
  4134					ab.rexflag |= Pw
  4135					ab.Put2(Pf3, Pm)
  4136	
  4137				case Pm: // opcode escape
  4138					ab.Put1(Pm)
  4139	
  4140				case Pe: // 16 bit escape
  4141					ab.Put1(Pe)
  4142	
  4143				case Pw: // 64-bit escape
  4144					if ctxt.Arch.Family != sys.AMD64 {
  4145						ctxt.Diag("asmins: illegal 64: %v", p)
  4146					}
  4147					ab.rexflag |= Pw
  4148	
  4149				case Pw8: // 64-bit escape if z >= 8
  4150					if z >= 8 {
  4151						if ctxt.Arch.Family != sys.AMD64 {
  4152							ctxt.Diag("asmins: illegal 64: %v", p)
  4153						}
  4154						ab.rexflag |= Pw
  4155					}
  4156	
  4157				case Pb: // botch
  4158					if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4159						goto bad
  4160					}
  4161					// NOTE(rsc): This is probably safe to do always,
  4162					// but when enabled it chooses different encodings
  4163					// than the old cmd/internal/obj/i386 code did,
  4164					// which breaks our "same bits out" checks.
  4165					// In particular, CMPB AX, $0 encodes as 80 f8 00
  4166					// in the original obj/i386, and it would encode
  4167					// (using a valid, shorter form) as 3c 00 if we enabled
  4168					// the call to bytereg here.
  4169					if ctxt.Arch.Family == sys.AMD64 {
  4170						bytereg(&p.From, &p.Ft)
  4171						bytereg(&p.To, &p.Tt)
  4172					}
  4173	
  4174				case P32: // 32 bit but illegal if 64-bit mode
  4175					if ctxt.Arch.Family == sys.AMD64 {
  4176						ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4177					}
  4178	
  4179				case Py: // 64-bit only, no prefix
  4180					if ctxt.Arch.Family != sys.AMD64 {
  4181						ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4182					}
  4183	
  4184				case Py1: // 64-bit only if z < 1, no prefix
  4185					if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4186						ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4187					}
  4188	
  4189				case Py3: // 64-bit only if z < 3, no prefix
  4190					if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4191						ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4192					}
  4193				}
  4194	
  4195				if z >= len(o.op) {
  4196					log.Fatalf("asmins bad table %v", p)
  4197				}
  4198				op = int(o.op[z])
  4199				if op == 0x0f {
  4200					ab.Put1(byte(op))
  4201					z++
  4202					op = int(o.op[z])
  4203				}
  4204	
  4205				switch yt.zcase {
  4206				default:
  4207					ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4208					return
  4209	
  4210				case Zpseudo:
  4211					break
  4212	
  4213				case Zlit:
  4214					ab.PutOpBytesLit(z, &o.op)
  4215	
  4216				case Zlitr_m:
  4217					ab.PutOpBytesLit(z, &o.op)
  4218					ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4219	
  4220				case Zlitm_r:
  4221					ab.PutOpBytesLit(z, &o.op)
  4222					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4223	
  4224				case Zlit_m_r:
  4225					ab.PutOpBytesLit(z, &o.op)
  4226					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4227	
  4228				case Zmb_r:
  4229					bytereg(&p.From, &p.Ft)
  4230					fallthrough
  4231	
  4232				case Zm_r:
  4233					ab.Put1(byte(op))
  4234					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4235	
  4236				case Z_m_r:
  4237					ab.Put1(byte(op))
  4238					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4239	
  4240				case Zm2_r:
  4241					ab.Put2(byte(op), o.op[z+1])
  4242					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4243	
  4244				case Zm_r_xm:
  4245					ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4246					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4247	
  4248				case Zm_r_xm_nr:
  4249					ab.rexflag = 0
  4250					ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4251					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4252	
  4253				case Zm_r_i_xm:
  4254					ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4255					ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4256					ab.Put1(byte(p.To.Offset))
  4257	
  4258				case Zibm_r, Zibr_m:
  4259					ab.PutOpBytesLit(z, &o.op)
  4260					if yt.zcase == Zibr_m {
  4261						ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4262					} else {
  4263						ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4264					}
  4265					switch {
  4266					default:
  4267						ab.Put1(byte(p.From.Offset))
  4268					case yt.args[0] == Yi32 && o.prefix == Pe:
  4269						ab.PutInt16(int16(p.From.Offset))
  4270					case yt.args[0] == Yi32:
  4271						ab.PutInt32(int32(p.From.Offset))
  4272					}
  4273	
  4274				case Zaut_r:
  4275					ab.Put1(0x8d) // leal
  4276					if p.From.Type != obj.TYPE_ADDR {
  4277						ctxt.Diag("asmins: Zaut sb type ADDR")
  4278					}
  4279					p.From.Type = obj.TYPE_MEM
  4280					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4281					p.From.Type = obj.TYPE_ADDR
  4282	
  4283				case Zm_o:
  4284					ab.Put1(byte(op))
  4285					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4286	
  4287				case Zr_m:
  4288					ab.Put1(byte(op))
  4289					ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4290	
  4291				case Zvex:
  4292					ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4293	
  4294				case Zvex_rm_v_r:
  4295					ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4296					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4297	
  4298				case Zvex_rm_v_ro:
  4299					ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4300					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4301	
  4302				case Zvex_i_rm_vo:
  4303					ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4304					ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4305					ab.Put1(byte(p.From.Offset))
  4306	
  4307				case Zvex_i_r_v:
  4308					ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4309					regnum := byte(0x7)
  4310					if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4311						regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4312					} else {
  4313						regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4314					}
  4315					ab.Put1(o.op[z+2] | regnum)
  4316					ab.Put1(byte(p.From.Offset))
  4317	
  4318				case Zvex_i_rm_v_r:
  4319					imm, from, from3, to := unpackOps4(p)
  4320					ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4321					ab.asmand(ctxt, cursym, p, from, to)
  4322					ab.Put1(byte(imm.Offset))
  4323	
  4324				case Zvex_i_rm_r:
  4325					ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4326					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4327					ab.Put1(byte(p.From.Offset))
  4328	
  4329				case Zvex_v_rm_r:
  4330					ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4331					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4332	
  4333				case Zvex_r_v_rm:
  4334					ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4335					ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4336	
  4337				case Zvex_rm_r_vo:
  4338					ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4339					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4340	
  4341				case Zvex_i_r_rm:
  4342					ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4343					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4344					ab.Put1(byte(p.From.Offset))
  4345	
  4346				case Zvex_hr_rm_v_r:
  4347					hr, from, from3, to := unpackOps4(p)
  4348					ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4349					ab.asmand(ctxt, cursym, p, from, to)
  4350					ab.Put1(byte(regIndex(hr.Reg) << 4))
  4351	
  4352				case Zevex_k_rmo:
  4353					ab.evex = newEVEXBits(z, &o.op)
  4354					ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4355					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4356	
  4357				case Zevex_i_rm_vo:
  4358					ab.evex = newEVEXBits(z, &o.op)
  4359					ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4360					ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4361					ab.Put1(byte(p.From.Offset))
  4362	
  4363				case Zevex_i_rm_k_vo:
  4364					imm, from, kmask, to := unpackOps4(p)
  4365					ab.evex = newEVEXBits(z, &o.op)
  4366					ab.asmevex(ctxt, p, from, to, nil, kmask)
  4367					ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4368					ab.Put1(byte(imm.Offset))
  4369	
  4370				case Zevex_i_r_rm:
  4371					ab.evex = newEVEXBits(z, &o.op)
  4372					ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4373					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4374					ab.Put1(byte(p.From.Offset))
  4375	
  4376				case Zevex_i_r_k_rm:
  4377					imm, from, kmask, to := unpackOps4(p)
  4378					ab.evex = newEVEXBits(z, &o.op)
  4379					ab.asmevex(ctxt, p, to, nil, from, kmask)
  4380					ab.asmand(ctxt, cursym, p, to, from)
  4381					ab.Put1(byte(imm.Offset))
  4382	
  4383				case Zevex_i_rm_r:
  4384					ab.evex = newEVEXBits(z, &o.op)
  4385					ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4386					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4387					ab.Put1(byte(p.From.Offset))
  4388	
  4389				case Zevex_i_rm_k_r:
  4390					imm, from, kmask, to := unpackOps4(p)
  4391					ab.evex = newEVEXBits(z, &o.op)
  4392					ab.asmevex(ctxt, p, from, nil, to, kmask)
  4393					ab.asmand(ctxt, cursym, p, from, to)
  4394					ab.Put1(byte(imm.Offset))
  4395	
  4396				case Zevex_i_rm_v_r:
  4397					imm, from, from3, to := unpackOps4(p)
  4398					ab.evex = newEVEXBits(z, &o.op)
  4399					ab.asmevex(ctxt, p, from, from3, to, nil)
  4400					ab.asmand(ctxt, cursym, p, from, to)
  4401					ab.Put1(byte(imm.Offset))
  4402	
  4403				case Zevex_i_rm_v_k_r:
  4404					imm, from, from3, kmask, to := unpackOps5(p)
  4405					ab.evex = newEVEXBits(z, &o.op)
  4406					ab.asmevex(ctxt, p, from, from3, to, kmask)
  4407					ab.asmand(ctxt, cursym, p, from, to)
  4408					ab.Put1(byte(imm.Offset))
  4409	
  4410				case Zevex_r_v_rm:
  4411					ab.evex = newEVEXBits(z, &o.op)
  4412					ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4413					ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4414	
  4415				case Zevex_rm_v_r:
  4416					ab.evex = newEVEXBits(z, &o.op)
  4417					ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4418					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4419	
  4420				case Zevex_rm_k_r:
  4421					ab.evex = newEVEXBits(z, &o.op)
  4422					ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4423					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4424	
  4425				case Zevex_r_k_rm:
  4426					ab.evex = newEVEXBits(z, &o.op)
  4427					ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4428					ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4429	
  4430				case Zevex_rm_v_k_r:
  4431					from, from3, kmask, to := unpackOps4(p)
  4432					ab.evex = newEVEXBits(z, &o.op)
  4433					ab.asmevex(ctxt, p, from, from3, to, kmask)
  4434					ab.asmand(ctxt, cursym, p, from, to)
  4435	
  4436				case Zevex_r_v_k_rm:
  4437					from, from3, kmask, to := unpackOps4(p)
  4438					ab.evex = newEVEXBits(z, &o.op)
  4439					ab.asmevex(ctxt, p, to, from3, from, kmask)
  4440					ab.asmand(ctxt, cursym, p, to, from)
  4441	
  4442				case Zr_m_xm:
  4443					ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4444					ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4445	
  4446				case Zr_m_xm_nr:
  4447					ab.rexflag = 0
  4448					ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4449					ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4450	
  4451				case Zo_m:
  4452					ab.Put1(byte(op))
  4453					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4454	
  4455				case Zcallindreg:
  4456					r = obj.Addrel(cursym)
  4457					r.Off = int32(p.Pc)
  4458					r.Type = objabi.R_CALLIND
  4459					r.Siz = 0
  4460					fallthrough
  4461	
  4462				case Zo_m64:
  4463					ab.Put1(byte(op))
  4464					ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4465	
  4466				case Zm_ibo:
  4467					ab.Put1(byte(op))
  4468					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4469					ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4470	
  4471				case Zibo_m:
  4472					ab.Put1(byte(op))
  4473					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4474					ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4475	
  4476				case Zibo_m_xm:
  4477					z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4478					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4479					ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4480	
  4481				case Z_ib, Zib_:
  4482					if yt.zcase == Zib_ {
  4483						a = &p.From
  4484					} else {
  4485						a = &p.To
  4486					}
  4487					ab.Put1(byte(op))
  4488					if p.As == AXABORT {
  4489						ab.Put1(o.op[z+1])
  4490					}
  4491					ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4492	
  4493				case Zib_rp:
  4494					ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4495					ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4496	
  4497				case Zil_rp:
  4498					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4499					ab.Put1(byte(op + reg[p.To.Reg]))
  4500					if o.prefix == Pe {
  4501						v = vaddr(ctxt, p, &p.From, nil)
  4502						ab.PutInt16(int16(v))
  4503					} else {
  4504						ab.relput4(ctxt, cursym, p, &p.From)
  4505					}
  4506	
  4507				case Zo_iw:
  4508					ab.Put1(byte(op))
  4509					if p.From.Type != obj.TYPE_NONE {
  4510						v = vaddr(ctxt, p, &p.From, nil)
  4511						ab.PutInt16(int16(v))
  4512					}
  4513	
  4514				case Ziq_rp:
  4515					v = vaddr(ctxt, p, &p.From, &rel)
  4516					l = int(v >> 32)
  4517					if l == 0 && rel.Siz != 8 {
  4518						ab.rexflag &^= (0x40 | Rxw)
  4519	
  4520						ab.rexflag |= regrex[p.To.Reg] & Rxb
  4521						ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4522						if rel.Type != 0 {
  4523							r = obj.Addrel(cursym)
  4524							*r = rel
  4525							r.Off = int32(p.Pc + int64(ab.Len()))
  4526						}
  4527	
  4528						ab.PutInt32(int32(v))
  4529					} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4530						ab.Put1(0xc7)
  4531						ab.asmando(ctxt, cursym, p, &p.To, 0)
  4532	
  4533						ab.PutInt32(int32(v)) // need all 8
  4534					} else {
  4535						ab.rexflag |= regrex[p.To.Reg] & Rxb
  4536						ab.Put1(byte(op + reg[p.To.Reg]))
  4537						if rel.Type != 0 {
  4538							r = obj.Addrel(cursym)
  4539							*r = rel
  4540							r.Off = int32(p.Pc + int64(ab.Len()))
  4541						}
  4542	
  4543						ab.PutInt64(v)
  4544					}
  4545	
  4546				case Zib_rr:
  4547					ab.Put1(byte(op))
  4548					ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4549					ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4550	
  4551				case Z_il, Zil_:
  4552					if yt.zcase == Zil_ {
  4553						a = &p.From
  4554					} else {
  4555						a = &p.To
  4556					}
  4557					ab.Put1(byte(op))
  4558					if o.prefix == Pe {
  4559						v = vaddr(ctxt, p, a, nil)
  4560						ab.PutInt16(int16(v))
  4561					} else {
  4562						ab.relput4(ctxt, cursym, p, a)
  4563					}
  4564	
  4565				case Zm_ilo, Zilo_m:
  4566					ab.Put1(byte(op))
  4567					if yt.zcase == Zilo_m {
  4568						a = &p.From
  4569						ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4570					} else {
  4571						a = &p.To
  4572						ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4573					}
  4574	
  4575					if o.prefix == Pe {
  4576						v = vaddr(ctxt, p, a, nil)
  4577						ab.PutInt16(int16(v))
  4578					} else {
  4579						ab.relput4(ctxt, cursym, p, a)
  4580					}
  4581	
  4582				case Zil_rr:
  4583					ab.Put1(byte(op))
  4584					ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4585					if o.prefix == Pe {
  4586						v = vaddr(ctxt, p, &p.From, nil)
  4587						ab.PutInt16(int16(v))
  4588					} else {
  4589						ab.relput4(ctxt, cursym, p, &p.From)
  4590					}
  4591	
  4592				case Z_rp:
  4593					ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4594					ab.Put1(byte(op + reg[p.To.Reg]))
  4595	
  4596				case Zrp_:
  4597					ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4598					ab.Put1(byte(op + reg[p.From.Reg]))
  4599	
  4600				case Zcallcon, Zjmpcon:
  4601					if yt.zcase == Zcallcon {
  4602						ab.Put1(byte(op))
  4603					} else {
  4604						ab.Put1(o.op[z+1])
  4605					}
  4606					r = obj.Addrel(cursym)
  4607					r.Off = int32(p.Pc + int64(ab.Len()))
  4608					r.Type = objabi.R_PCREL
  4609					r.Siz = 4
  4610					r.Add = p.To.Offset
  4611					ab.PutInt32(0)
  4612	
  4613				case Zcallind:
  4614					ab.Put2(byte(op), o.op[z+1])
  4615					r = obj.Addrel(cursym)
  4616					r.Off = int32(p.Pc + int64(ab.Len()))
  4617					if ctxt.Arch.Family == sys.AMD64 {
  4618						r.Type = objabi.R_PCREL
  4619					} else {
  4620						r.Type = objabi.R_ADDR
  4621					}
  4622					r.Siz = 4
  4623					r.Add = p.To.Offset
  4624					r.Sym = p.To.Sym
  4625					ab.PutInt32(0)
  4626	
  4627				case Zcall, Zcallduff:
  4628					if p.To.Sym == nil {
  4629						ctxt.Diag("call without target")
  4630						ctxt.DiagFlush()
  4631						log.Fatalf("bad code")
  4632					}
  4633	
  4634					if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4635						ctxt.Diag("directly calling duff when dynamically linking Go")
  4636					}
  4637	
  4638					if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4639						// Maintain BP around call, since duffcopy/duffzero can't do it
  4640						// (the call jumps into the middle of the function).
  4641						// This makes it possible to see call sites for duffcopy/duffzero in
  4642						// BP-based profiling tools like Linux perf (which is the
  4643						// whole point of obj.Framepointer_enabled).
  4644						// MOVQ BP, -16(SP)
  4645						// LEAQ -16(SP), BP
  4646						ab.Put(bpduff1)
  4647					}
  4648					ab.Put1(byte(op))
  4649					r = obj.Addrel(cursym)
  4650					r.Off = int32(p.Pc + int64(ab.Len()))
  4651					r.Sym = p.To.Sym
  4652					r.Add = p.To.Offset
  4653					r.Type = objabi.R_CALL
  4654					r.Siz = 4
  4655					ab.PutInt32(0)
  4656	
  4657					if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4658						// Pop BP pushed above.
  4659						// MOVQ 0(BP), BP
  4660						ab.Put(bpduff2)
  4661					}
  4662	
  4663				// TODO: jump across functions needs reloc
  4664				case Zbr, Zjmp, Zloop:
  4665					if p.As == AXBEGIN {
  4666						ab.Put1(byte(op))
  4667					}
  4668					if p.To.Sym != nil {
  4669						if yt.zcase != Zjmp {
  4670							ctxt.Diag("branch to ATEXT")
  4671							ctxt.DiagFlush()
  4672							log.Fatalf("bad code")
  4673						}
  4674	
  4675						ab.Put1(o.op[z+1])
  4676						r = obj.Addrel(cursym)
  4677						r.Off = int32(p.Pc + int64(ab.Len()))
  4678						r.Sym = p.To.Sym
  4679						// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4680						// it can point to a trampoline instead of the destination itself.
  4681						r.Type = objabi.R_CALL
  4682						r.Siz = 4
  4683						ab.PutInt32(0)
  4684						break
  4685					}
  4686	
  4687					// Assumes q is in this function.
  4688					// TODO: Check in input, preserve in brchain.
  4689	
  4690					// Fill in backward jump now.
  4691					q = p.Pcond
  4692	
  4693					if q == nil {
  4694						ctxt.Diag("jmp/branch/loop without target")
  4695						ctxt.DiagFlush()
  4696						log.Fatalf("bad code")
  4697					}
  4698	
  4699					if p.Back&branchBackwards != 0 {
  4700						v = q.Pc - (p.Pc + 2)
  4701						if v >= -128 && p.As != AXBEGIN {
  4702							if p.As == AJCXZL {
  4703								ab.Put1(0x67)
  4704							}
  4705							ab.Put2(byte(op), byte(v))
  4706						} else if yt.zcase == Zloop {
  4707							ctxt.Diag("loop too far: %v", p)
  4708						} else {
  4709							v -= 5 - 2
  4710							if p.As == AXBEGIN {
  4711								v--
  4712							}
  4713							if yt.zcase == Zbr {
  4714								ab.Put1(0x0f)
  4715								v--
  4716							}
  4717	
  4718							ab.Put1(o.op[z+1])
  4719							ab.PutInt32(int32(v))
  4720						}
  4721	
  4722						break
  4723					}
  4724	
  4725					// Annotate target; will fill in later.
  4726					p.Forwd = q.Rel
  4727	
  4728					q.Rel = p
  4729					if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4730						if p.As == AJCXZL {
  4731							ab.Put1(0x67)
  4732						}
  4733						ab.Put2(byte(op), 0)
  4734					} else if yt.zcase == Zloop {
  4735						ctxt.Diag("loop too far: %v", p)
  4736					} else {
  4737						if yt.zcase == Zbr {
  4738							ab.Put1(0x0f)
  4739						}
  4740						ab.Put1(o.op[z+1])
  4741						ab.PutInt32(0)
  4742					}
  4743	
  4744				case Zbyte:
  4745					v = vaddr(ctxt, p, &p.From, &rel)
  4746					if rel.Siz != 0 {
  4747						rel.Siz = uint8(op)
  4748						r = obj.Addrel(cursym)
  4749						*r = rel
  4750						r.Off = int32(p.Pc + int64(ab.Len()))
  4751					}
  4752	
  4753					ab.Put1(byte(v))
  4754					if op > 1 {
  4755						ab.Put1(byte(v >> 8))
  4756						if op > 2 {
  4757							ab.PutInt16(int16(v >> 16))
  4758							if op > 4 {
  4759								ab.PutInt32(int32(v >> 32))
  4760							}
  4761						}
  4762					}
  4763				}
  4764	
  4765				return
  4766			}
  4767		}
  4768		f3t = Ynone * Ymax
  4769		if p.GetFrom3() != nil {
  4770			f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4771		}
  4772		for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4773			var pp obj.Prog
  4774			var t []byte
  4775			if p.As == mo[0].as {
  4776				if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4777					t = mo[0].op[:]
  4778					switch mo[0].code {
  4779					default:
  4780						ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4781	
  4782					case movLit:
  4783						for z = 0; t[z] != 0; z++ {
  4784							ab.Put1(t[z])
  4785						}
  4786	
  4787					case movRegMem:
  4788						ab.Put1(t[0])
  4789						ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4790	
  4791					case movMemReg:
  4792						ab.Put1(t[0])
  4793						ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4794	
  4795					case movRegMem2op: // r,m - 2op
  4796						ab.Put2(t[0], t[1])
  4797						ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4798						ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4799	
  4800					case movMemReg2op:
  4801						ab.Put2(t[0], t[1])
  4802						ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4803						ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4804	
  4805					case movFullPtr:
  4806						if t[0] != 0 {
  4807							ab.Put1(t[0])
  4808						}
  4809						switch p.To.Index {
  4810						default:
  4811							goto bad
  4812	
  4813						case REG_DS:
  4814							ab.Put1(0xc5)
  4815	
  4816						case REG_SS:
  4817							ab.Put2(0x0f, 0xb2)
  4818	
  4819						case REG_ES:
  4820							ab.Put1(0xc4)
  4821	
  4822						case REG_FS:
  4823							ab.Put2(0x0f, 0xb4)
  4824	
  4825						case REG_GS:
  4826							ab.Put2(0x0f, 0xb5)
  4827						}
  4828	
  4829						ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4830	
  4831					case movDoubleShift:
  4832						if t[0] == Pw {
  4833							if ctxt.Arch.Family != sys.AMD64 {
  4834								ctxt.Diag("asmins: illegal 64: %v", p)
  4835							}
  4836							ab.rexflag |= Pw
  4837							t = t[1:]
  4838						} else if t[0] == Pe {
  4839							ab.Put1(Pe)
  4840							t = t[1:]
  4841						}
  4842	
  4843						switch p.From.Type {
  4844						default:
  4845							goto bad
  4846	
  4847						case obj.TYPE_CONST:
  4848							ab.Put2(0x0f, t[0])
  4849							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4850							ab.Put1(byte(p.From.Offset))
  4851	
  4852						case obj.TYPE_REG:
  4853							switch p.From.Reg {
  4854							default:
  4855								goto bad
  4856	
  4857							case REG_CL, REG_CX:
  4858								ab.Put2(0x0f, t[1])
  4859								ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4860							}
  4861						}
  4862	
  4863					// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4864					// where you load the TLS base register into a register and then index off that
  4865					// register to access the actual TLS variables. Systems that allow direct TLS access
  4866					// are handled in prefixof above and should not be listed here.
  4867					case movTLSReg:
  4868						if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4869							ctxt.Diag("invalid load of TLS: %v", p)
  4870						}
  4871	
  4872						if ctxt.Arch.Family == sys.I386 {
  4873							// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4874							// where you load the TLS base register into a register and then index off that
  4875							// register to access the actual TLS variables. Systems that allow direct TLS access
  4876							// are handled in prefixof above and should not be listed here.
  4877							switch ctxt.Headtype {
  4878							default:
  4879								log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4880	
  4881							case objabi.Hlinux,
  4882								objabi.Hnacl, objabi.Hfreebsd:
  4883								if ctxt.Flag_shared {
  4884									// Note that this is not generating the same insns as the other cases.
  4885									//     MOV TLS, dst
  4886									// becomes
  4887									//     call __x86.get_pc_thunk.dst
  4888									//     movl (gotpc + g@gotntpoff)(dst), dst
  4889									// which is encoded as
  4890									//     call __x86.get_pc_thunk.dst
  4891									//     movq 0(dst), dst
  4892									// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4893									// is g, which we can't check here, but will when we assemble the second
  4894									// instruction.
  4895									dst := p.To.Reg
  4896									ab.Put1(0xe8)
  4897									r = obj.Addrel(cursym)
  4898									r.Off = int32(p.Pc + int64(ab.Len()))
  4899									r.Type = objabi.R_CALL
  4900									r.Siz = 4
  4901									r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4902									ab.PutInt32(0)
  4903	
  4904									ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4905									r = obj.Addrel(cursym)
  4906									r.Off = int32(p.Pc + int64(ab.Len()))
  4907									r.Type = objabi.R_TLS_IE
  4908									r.Siz = 4
  4909									r.Add = 2
  4910									ab.PutInt32(0)
  4911								} else {
  4912									// ELF TLS base is 0(GS).
  4913									pp.From = p.From
  4914	
  4915									pp.From.Type = obj.TYPE_MEM
  4916									pp.From.Reg = REG_GS
  4917									pp.From.Offset = 0
  4918									pp.From.Index = REG_NONE
  4919									pp.From.Scale = 0
  4920									ab.Put2(0x65, // GS
  4921										0x8B)
  4922									ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4923								}
  4924							case objabi.Hplan9:
  4925								pp.From = obj.Addr{}
  4926								pp.From.Type = obj.TYPE_MEM
  4927								pp.From.Name = obj.NAME_EXTERN
  4928								pp.From.Sym = plan9privates
  4929								pp.From.Offset = 0
  4930								pp.From.Index = REG_NONE
  4931								ab.Put1(0x8B)
  4932								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4933	
  4934							case objabi.Hwindows:
  4935								// Windows TLS base is always 0x14(FS).
  4936								pp.From = p.From
  4937	
  4938								pp.From.Type = obj.TYPE_MEM
  4939								pp.From.Reg = REG_FS
  4940								pp.From.Offset = 0x14
  4941								pp.From.Index = REG_NONE
  4942								pp.From.Scale = 0
  4943								ab.Put2(0x64, // FS
  4944									0x8B)
  4945								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4946							}
  4947							break
  4948						}
  4949	
  4950						switch ctxt.Headtype {
  4951						default:
  4952							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4953	
  4954						case objabi.Hlinux, objabi.Hfreebsd:
  4955							if !ctxt.Flag_shared {
  4956								log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  4957							}
  4958							// Note that this is not generating the same insn as the other cases.
  4959							//     MOV TLS, R_to
  4960							// becomes
  4961							//     movq g@gottpoff(%rip), R_to
  4962							// which is encoded as
  4963							//     movq 0(%rip), R_to
  4964							// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4965							// is g, which we can't check here, but will when we assemble the second
  4966							// instruction.
  4967							ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4968	
  4969							ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4970							r = obj.Addrel(cursym)
  4971							r.Off = int32(p.Pc + int64(ab.Len()))
  4972							r.Type = objabi.R_TLS_IE
  4973							r.Siz = 4
  4974							r.Add = -4
  4975							ab.PutInt32(0)
  4976	
  4977						case objabi.Hplan9:
  4978							pp.From = obj.Addr{}
  4979							pp.From.Type = obj.TYPE_MEM
  4980							pp.From.Name = obj.NAME_EXTERN
  4981							pp.From.Sym = plan9privates
  4982							pp.From.Offset = 0
  4983							pp.From.Index = REG_NONE
  4984							ab.rexflag |= Pw
  4985							ab.Put1(0x8B)
  4986							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4987	
  4988						case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4989							// TLS base is 0(FS).
  4990							pp.From = p.From
  4991	
  4992							pp.From.Type = obj.TYPE_MEM
  4993							pp.From.Name = obj.NAME_NONE
  4994							pp.From.Reg = REG_NONE
  4995							pp.From.Offset = 0
  4996							pp.From.Index = REG_NONE
  4997							pp.From.Scale = 0
  4998							ab.rexflag |= Pw
  4999							ab.Put2(0x64, // FS
  5000								0x8B)
  5001							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5002	
  5003						case objabi.Hwindows:
  5004							// Windows TLS base is always 0x28(GS).
  5005							pp.From = p.From
  5006	
  5007							pp.From.Type = obj.TYPE_MEM
  5008							pp.From.Name = obj.NAME_NONE
  5009							pp.From.Reg = REG_GS
  5010							pp.From.Offset = 0x28
  5011							pp.From.Index = REG_NONE
  5012							pp.From.Scale = 0
  5013							ab.rexflag |= Pw
  5014							ab.Put2(0x65, // GS
  5015								0x8B)
  5016							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5017						}
  5018					}
  5019					return
  5020				}
  5021			}
  5022		}
  5023		goto bad
  5024	
  5025	bad:
  5026		if ctxt.Arch.Family != sys.AMD64 {
  5027			// here, the assembly has failed.
  5028			// if it's a byte instruction that has
  5029			// unaddressable registers, try to
  5030			// exchange registers and reissue the
  5031			// instruction with the operands renamed.
  5032			pp := *p
  5033	
  5034			unbytereg(&pp.From, &pp.Ft)
  5035			unbytereg(&pp.To, &pp.Tt)
  5036	
  5037			z := int(p.From.Reg)
  5038			if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5039				// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5040				// For now, different to keep bit-for-bit compatibility.
  5041				if ctxt.Arch.Family == sys.I386 {
  5042					breg := byteswapreg(ctxt, &p.To)
  5043					if breg != REG_AX {
  5044						ab.Put1(0x87) // xchg lhs,bx
  5045						ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5046						subreg(&pp, z, breg)
  5047						ab.doasm(ctxt, cursym, &pp)
  5048						ab.Put1(0x87) // xchg lhs,bx
  5049						ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5050					} else {
  5051						ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5052						subreg(&pp, z, REG_AX)
  5053						ab.doasm(ctxt, cursym, &pp)
  5054						ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5055					}
  5056					return
  5057				}
  5058	
  5059				if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5060					// We certainly don't want to exchange
  5061					// with AX if the op is MUL or DIV.
  5062					ab.Put1(0x87) // xchg lhs,bx
  5063					ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5064					subreg(&pp, z, REG_BX)
  5065					ab.doasm(ctxt, cursym, &pp)
  5066					ab.Put1(0x87) // xchg lhs,bx
  5067					ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5068				} else {
  5069					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5070					subreg(&pp, z, REG_AX)
  5071					ab.doasm(ctxt, cursym, &pp)
  5072					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5073				}
  5074				return
  5075			}
  5076	
  5077			z = int(p.To.Reg)
  5078			if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5079				// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5080				// For now, different to keep bit-for-bit compatibility.
  5081				if ctxt.Arch.Family == sys.I386 {
  5082					breg := byteswapreg(ctxt, &p.From)
  5083					if breg != REG_AX {
  5084						ab.Put1(0x87) //xchg rhs,bx
  5085						ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5086						subreg(&pp, z, breg)
  5087						ab.doasm(ctxt, cursym, &pp)
  5088						ab.Put1(0x87) // xchg rhs,bx
  5089						ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5090					} else {
  5091						ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5092						subreg(&pp, z, REG_AX)
  5093						ab.doasm(ctxt, cursym, &pp)
  5094						ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5095					}
  5096					return
  5097				}
  5098	
  5099				if isax(&p.From) {
  5100					ab.Put1(0x87) // xchg rhs,bx
  5101					ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5102					subreg(&pp, z, REG_BX)
  5103					ab.doasm(ctxt, cursym, &pp)
  5104					ab.Put1(0x87) // xchg rhs,bx
  5105					ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5106				} else {
  5107					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5108					subreg(&pp, z, REG_AX)
  5109					ab.doasm(ctxt, cursym, &pp)
  5110					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5111				}
  5112				return
  5113			}
  5114		}
  5115	
  5116		ctxt.Diag("invalid instruction: %v", p)
  5117	}
  5118	
  5119	// byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5120	// which is not referenced in a.
  5121	// If a is empty, it returns BX to account for MULB-like instructions
  5122	// that might use DX and AX.
  5123	func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5124		cana, canb, canc, cand := true, true, true, true
  5125		if a.Type == obj.TYPE_NONE {
  5126			cana, cand = false, false
  5127		}
  5128	
  5129		if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5130			switch a.Reg {
  5131			case REG_NONE:
  5132				cana, cand = false, false
  5133			case REG_AX, REG_AL, REG_AH:
  5134				cana = false
  5135			case REG_BX, REG_BL, REG_BH:
  5136				canb = false
  5137			case REG_CX, REG_CL, REG_CH:
  5138				canc = false
  5139			case REG_DX, REG_DL, REG_DH:
  5140				cand = false
  5141			}
  5142		}
  5143	
  5144		if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5145			switch a.Index {
  5146			case REG_AX:
  5147				cana = false
  5148			case REG_BX:
  5149				canb = false
  5150			case REG_CX:
  5151				canc = false
  5152			case REG_DX:
  5153				cand = false
  5154			}
  5155		}
  5156	
  5157		switch {
  5158		case cana:
  5159			return REG_AX
  5160		case canb:
  5161			return REG_BX
  5162		case canc:
  5163			return REG_CX
  5164		case cand:
  5165			return REG_DX
  5166		default:
  5167			ctxt.Diag("impossible byte register")
  5168			ctxt.DiagFlush()
  5169			log.Fatalf("bad code")
  5170			return 0
  5171		}
  5172	}
  5173	
  5174	func isbadbyte(a *obj.Addr) bool {
  5175		return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5176	}
  5177	
  5178	var naclret = []uint8{
  5179		0x5e, // POPL SI
  5180		// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  5181		0x83,
  5182		0xe6,
  5183		0xe0, // ANDL $~31, SI
  5184		0x4c,
  5185		0x01,
  5186		0xfe, // ADDQ R15, SI
  5187		0xff,
  5188		0xe6, // JMP SI
  5189	}
  5190	
  5191	var naclret8 = []uint8{
  5192		0x5d, // POPL BP
  5193		// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  5194		0x83,
  5195		0xe5,
  5196		0xe0, // ANDL $~31, BP
  5197		0xff,
  5198		0xe5, // JMP BP
  5199	}
  5200	
  5201	var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  5202	
  5203	var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  5204	
  5205	var naclmovs = []uint8{
  5206		0x89,
  5207		0xf6, // MOVL SI, SI
  5208		0x49,
  5209		0x8d,
  5210		0x34,
  5211		0x37, // LEAQ (R15)(SI*1), SI
  5212		0x89,
  5213		0xff, // MOVL DI, DI
  5214		0x49,
  5215		0x8d,
  5216		0x3c,
  5217		0x3f, // LEAQ (R15)(DI*1), DI
  5218	}
  5219	
  5220	var naclstos = []uint8{
  5221		0x89,
  5222		0xff, // MOVL DI, DI
  5223		0x49,
  5224		0x8d,
  5225		0x3c,
  5226		0x3f, // LEAQ (R15)(DI*1), DI
  5227	}
  5228	
  5229	func (ab *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  5230		if reg >= REG_R8 {
  5231			ab.Put1(0x45)
  5232		}
  5233		reg = (reg - REG_AX) & 7
  5234		ab.Put2(0x89, byte(3<<6|reg<<3|reg))
  5235	}
  5236	
  5237	func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5238		ab.Reset()
  5239	
  5240		if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
  5241			switch p.As {
  5242			case obj.ARET:
  5243				ab.Put(naclret8)
  5244				return
  5245	
  5246			case obj.ACALL,
  5247				obj.AJMP:
  5248				if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  5249					ab.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  5250				}
  5251	
  5252			case AINT:
  5253				ab.Put1(0xf4)
  5254				return
  5255			}
  5256		}
  5257	
  5258		if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  5259			if p.As == AREP {
  5260				ab.rep = true
  5261				return
  5262			}
  5263	
  5264			if p.As == AREPN {
  5265				ab.repn = true
  5266				return
  5267			}
  5268	
  5269			if p.As == ALOCK {
  5270				ab.lock = true
  5271				return
  5272			}
  5273	
  5274			if p.As != ALEAQ && p.As != ALEAL {
  5275				if p.From.Index != REG_NONE && p.From.Scale > 0 {
  5276					ab.nacltrunc(ctxt, int(p.From.Index))
  5277				}
  5278				if p.To.Index != REG_NONE && p.To.Scale > 0 {
  5279					ab.nacltrunc(ctxt, int(p.To.Index))
  5280				}
  5281			}
  5282	
  5283			switch p.As {
  5284			case obj.ARET:
  5285				ab.Put(naclret)
  5286				return
  5287	
  5288			case obj.ACALL,
  5289				obj.AJMP:
  5290				if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  5291					// ANDL $~31, reg
  5292					ab.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  5293					// ADDQ R15, reg
  5294					ab.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  5295				}
  5296	
  5297				if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  5298					// ANDL $~31, reg
  5299					ab.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  5300					// ADDQ R15, reg
  5301					ab.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  5302				}
  5303	
  5304			case AINT:
  5305				ab.Put1(0xf4)
  5306				return
  5307	
  5308			case ASCASB,
  5309				ASCASW,
  5310				ASCASL,
  5311				ASCASQ,
  5312				ASTOSB,
  5313				ASTOSW,
  5314				ASTOSL,
  5315				ASTOSQ:
  5316				ab.Put(naclstos)
  5317	
  5318			case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  5319				ab.Put(naclmovs)
  5320			}
  5321	
  5322			if ab.rep {
  5323				ab.Put1(0xf3)
  5324				ab.rep = false
  5325			}
  5326	
  5327			if ab.repn {
  5328				ab.Put1(0xf2)
  5329				ab.repn = false
  5330			}
  5331	
  5332			if ab.lock {
  5333				ab.Put1(0xf0)
  5334				ab.lock = false
  5335			}
  5336		}
  5337	
  5338		ab.rexflag = 0
  5339		ab.vexflag = false
  5340		ab.evexflag = false
  5341		mark := ab.Len()
  5342		ab.doasm(ctxt, cursym, p)
  5343		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5344			// as befits the whole approach of the architecture,
  5345			// the rex prefix must appear before the first opcode byte
  5346			// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5347			// before the 0f opcode escape!), or it might be ignored.
  5348			// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5349			if ctxt.Arch.Family != sys.AMD64 {
  5350				ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5351			}
  5352			n := ab.Len()
  5353			var np int
  5354			for np = mark; np < n; np++ {
  5355				c := ab.At(np)
  5356				if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5357					break
  5358				}
  5359			}
  5360			ab.Insert(np, byte(0x40|ab.rexflag))
  5361		}
  5362	
  5363		n := ab.Len()
  5364		for i := len(cursym.R) - 1; i >= 0; i-- {
  5365			r := &cursym.R[i]
  5366			if int64(r.Off) < p.Pc {
  5367				break
  5368			}
  5369			if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5370				r.Off++
  5371			}
  5372			if r.Type == objabi.R_PCREL {
  5373				if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5374					// PC-relative addressing is relative to the end of the instruction,
  5375					// but the relocations applied by the linker are relative to the end
  5376					// of the relocation. Because immediate instruction
  5377					// arguments can follow the PC-relative memory reference in the
  5378					// instruction encoding, the two may not coincide. In this case,
  5379					// adjust addend so that linker can keep relocating relative to the
  5380					// end of the relocation.
  5381					r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5382				} else if ctxt.Arch.Family == sys.I386 {
  5383					// On 386 PC-relative addressing (for non-call/jmp instructions)
  5384					// assumes that the previous instruction loaded the PC of the end
  5385					// of that instruction into CX, so the adjustment is relative to
  5386					// that.
  5387					r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5388				}
  5389			}
  5390			if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5391				// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5392				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5393			}
  5394	
  5395		}
  5396	
  5397		if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  5398			switch p.To.Reg {
  5399			case REG_SP:
  5400				ab.Put(naclspfix)
  5401			case REG_BP:
  5402				ab.Put(naclbpfix)
  5403			}
  5404		}
  5405	}
  5406	
  5407	// unpackOps4 extracts 4 operands from p.
  5408	func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5409		return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
  5410	}
  5411	
  5412	// unpackOps5 extracts 5 operands from p.
  5413	func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5414		return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To
  5415	}
  5416	

View as plain text