...

Text file src/runtime/asm_amd64.s

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	// _rt0_amd64 is common startup code for most amd64 systems when using
    11	// internal linking. This is the entry point for the program from the
    12	// kernel for an ordinary -buildmode=exe program. The stack holds the
    13	// number of arguments and the C-style argv.
    14	TEXT _rt0_amd64(SB),NOSPLIT,$-8
    15		MOVQ	0(SP), DI	// argc
    16		LEAQ	8(SP), SI	// argv
    17		JMP	runtime·rt0_go(SB)
    18	
    19	// main is common startup code for most amd64 systems when using
    20	// external linking. The C startup code will call the symbol "main"
    21	// passing argc and argv in the usual C ABI registers DI and SI.
    22	TEXT main(SB),NOSPLIT,$-8
    23		JMP	runtime·rt0_go(SB)
    24	
    25	// _rt0_amd64_lib is common startup code for most amd64 systems when
    26	// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    27	// arrange to invoke this function as a global constructor (for
    28	// c-archive) or when the shared library is loaded (for c-shared).
    29	// We expect argc and argv to be passed in the usual C ABI registers
    30	// DI and SI.
    31	TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
    32		// Align stack per ELF ABI requirements.
    33		MOVQ	SP, AX
    34		ANDQ	$~15, SP
    35		// Save C ABI callee-saved registers, as caller may need them.
    36		MOVQ	BX, 0x10(SP)
    37		MOVQ	BP, 0x18(SP)
    38		MOVQ	R12, 0x20(SP)
    39		MOVQ	R13, 0x28(SP)
    40		MOVQ	R14, 0x30(SP)
    41		MOVQ	R15, 0x38(SP)
    42		MOVQ	AX, 0x40(SP)
    43	
    44		MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    45		MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    46	
    47		// Synchronous initialization.
    48		CALL	runtime·libpreinit(SB)
    49	
    50		// Create a new thread to finish Go runtime initialization.
    51		MOVQ	_cgo_sys_thread_create(SB), AX
    52		TESTQ	AX, AX
    53		JZ	nocgo
    54		MOVQ	$_rt0_amd64_lib_go(SB), DI
    55		MOVQ	$0, SI
    56		CALL	AX
    57		JMP	restore
    58	
    59	nocgo:
    60		MOVQ	$0x800000, 0(SP)		// stacksize
    61		MOVQ	$_rt0_amd64_lib_go(SB), AX
    62		MOVQ	AX, 8(SP)			// fn
    63		CALL	runtime·newosproc0(SB)
    64	
    65	restore:
    66		MOVQ	0x10(SP), BX
    67		MOVQ	0x18(SP), BP
    68		MOVQ	0x20(SP), R12
    69		MOVQ	0x28(SP), R13
    70		MOVQ	0x30(SP), R14
    71		MOVQ	0x38(SP), R15
    72		MOVQ	0x40(SP), SP
    73		RET
    74	
    75	// _rt0_amd64_lib_go initializes the Go runtime.
    76	// This is started in a separate thread by _rt0_amd64_lib.
    77	TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    78		MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    79		MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    80		JMP	runtime·rt0_go(SB)
    81	
    82	DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    83	GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    84	DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    85	GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    86	
    87	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    88		// copy arguments forward on an even stack
    89		MOVQ	DI, AX		// argc
    90		MOVQ	SI, BX		// argv
    91		SUBQ	$(4*8+7), SP		// 2args 2auto
    92		ANDQ	$~15, SP
    93		MOVQ	AX, 16(SP)
    94		MOVQ	BX, 24(SP)
    95	
    96		// create istack out of the given (operating system) stack.
    97		// _cgo_init may update stackguard.
    98		MOVQ	$runtime·g0(SB), DI
    99		LEAQ	(-64*1024+104)(SP), BX
   100		MOVQ	BX, g_stackguard0(DI)
   101		MOVQ	BX, g_stackguard1(DI)
   102		MOVQ	BX, (g_stack+stack_lo)(DI)
   103		MOVQ	SP, (g_stack+stack_hi)(DI)
   104	
   105		// find out information about the processor we're on
   106		MOVL	$0, AX
   107		CPUID
   108		MOVL	AX, SI
   109		CMPL	AX, $0
   110		JE	nocpuinfo
   111	
   112		// Figure out how to serialize RDTSC.
   113		// On Intel processors LFENCE is enough. AMD requires MFENCE.
   114		// Don't know about the rest, so let's do MFENCE.
   115		CMPL	BX, $0x756E6547  // "Genu"
   116		JNE	notintel
   117		CMPL	DX, $0x49656E69  // "ineI"
   118		JNE	notintel
   119		CMPL	CX, $0x6C65746E  // "ntel"
   120		JNE	notintel
   121		MOVB	$1, runtime·isIntel(SB)
   122		MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   123	notintel:
   124	
   125		// Load EAX=1 cpuid flags
   126		MOVL	$1, AX
   127		CPUID
   128		MOVL	AX, runtime·processorVersionInfo(SB)
   129	
   130	nocpuinfo:
   131		// if there is an _cgo_init, call it.
   132		MOVQ	_cgo_init(SB), AX
   133		TESTQ	AX, AX
   134		JZ	needtls
   135		// arg 1: g0, already in DI
   136		MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   137	#ifdef GOOS_android
   138		MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   139		// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   140		// Compensate for tls_g (+16).
   141		MOVQ	-16(TLS), CX
   142	#else
   143		MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   144		MOVQ	$0, CX
   145	#endif
   146	#ifdef GOOS_windows
   147		// Adjust for the Win64 calling convention.
   148		MOVQ	CX, R9 // arg 4
   149		MOVQ	DX, R8 // arg 3
   150		MOVQ	SI, DX // arg 2
   151		MOVQ	DI, CX // arg 1
   152	#endif
   153		CALL	AX
   154	
   155		// update stackguard after _cgo_init
   156		MOVQ	$runtime·g0(SB), CX
   157		MOVQ	(g_stack+stack_lo)(CX), AX
   158		ADDQ	$const__StackGuard, AX
   159		MOVQ	AX, g_stackguard0(CX)
   160		MOVQ	AX, g_stackguard1(CX)
   161	
   162	#ifndef GOOS_windows
   163		JMP ok
   164	#endif
   165	needtls:
   166	#ifdef GOOS_plan9
   167		// skip TLS setup on Plan 9
   168		JMP ok
   169	#endif
   170	#ifdef GOOS_solaris
   171		// skip TLS setup on Solaris
   172		JMP ok
   173	#endif
   174	#ifdef GOOS_illumos
   175		// skip TLS setup on illumos
   176		JMP ok
   177	#endif
   178	#ifdef GOOS_darwin
   179		// skip TLS setup on Darwin
   180		JMP ok
   181	#endif
   182	
   183		LEAQ	runtime·m0+m_tls(SB), DI
   184		CALL	runtime·settls(SB)
   185	
   186		// store through it, to make sure it works
   187		get_tls(BX)
   188		MOVQ	$0x123, g(BX)
   189		MOVQ	runtime·m0+m_tls(SB), AX
   190		CMPQ	AX, $0x123
   191		JEQ 2(PC)
   192		CALL	runtime·abort(SB)
   193	ok:
   194		// set the per-goroutine and per-mach "registers"
   195		get_tls(BX)
   196		LEAQ	runtime·g0(SB), CX
   197		MOVQ	CX, g(BX)
   198		LEAQ	runtime·m0(SB), AX
   199	
   200		// save m->g0 = g0
   201		MOVQ	CX, m_g0(AX)
   202		// save m0 to g0->m
   203		MOVQ	AX, g_m(CX)
   204	
   205		CLD				// convention is D is always left cleared
   206		CALL	runtime·check(SB)
   207	
   208		MOVL	16(SP), AX		// copy argc
   209		MOVL	AX, 0(SP)
   210		MOVQ	24(SP), AX		// copy argv
   211		MOVQ	AX, 8(SP)
   212		CALL	runtime·args(SB)
   213		CALL	runtime·osinit(SB)
   214		CALL	runtime·schedinit(SB)
   215	
   216		// create a new goroutine to start program
   217		MOVQ	$runtime·mainPC(SB), AX		// entry
   218		PUSHQ	AX
   219		PUSHQ	$0			// arg size
   220		CALL	runtime·newproc(SB)
   221		POPQ	AX
   222		POPQ	AX
   223	
   224		// start this M
   225		CALL	runtime·mstart(SB)
   226	
   227		CALL	runtime·abort(SB)	// mstart should never return
   228		RET
   229	
   230		// Prevent dead-code elimination of debugCallV1, which is
   231		// intended to be called by debuggers.
   232		MOVQ	$runtime·debugCallV1(SB), AX
   233		RET
   234	
   235	DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
   236	GLOBL	runtime·mainPC(SB),RODATA,$8
   237	
   238	TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   239		BYTE	$0xcc
   240		RET
   241	
   242	TEXT runtime·asminit(SB),NOSPLIT,$0-0
   243		// No per-thread init.
   244		RET
   245	
   246	/*
   247	 *  go-routine
   248	 */
   249	
   250	// func gosave(buf *gobuf)
   251	// save state in Gobuf; setjmp
   252	TEXT runtime·gosave(SB), NOSPLIT, $0-8
   253		MOVQ	buf+0(FP), AX		// gobuf
   254		LEAQ	buf+0(FP), BX		// caller's SP
   255		MOVQ	BX, gobuf_sp(AX)
   256		MOVQ	0(SP), BX		// caller's PC
   257		MOVQ	BX, gobuf_pc(AX)
   258		MOVQ	$0, gobuf_ret(AX)
   259		MOVQ	BP, gobuf_bp(AX)
   260		// Assert ctxt is zero. See func save.
   261		MOVQ	gobuf_ctxt(AX), BX
   262		TESTQ	BX, BX
   263		JZ	2(PC)
   264		CALL	runtime·badctxt(SB)
   265		get_tls(CX)
   266		MOVQ	g(CX), BX
   267		MOVQ	BX, gobuf_g(AX)
   268		RET
   269	
   270	// func gogo(buf *gobuf)
   271	// restore state from Gobuf; longjmp
   272	TEXT runtime·gogo(SB), NOSPLIT, $16-8
   273		MOVQ	buf+0(FP), BX		// gobuf
   274		MOVQ	gobuf_g(BX), DX
   275		MOVQ	0(DX), CX		// make sure g != nil
   276		get_tls(CX)
   277		MOVQ	DX, g(CX)
   278		MOVQ	gobuf_sp(BX), SP	// restore SP
   279		MOVQ	gobuf_ret(BX), AX
   280		MOVQ	gobuf_ctxt(BX), DX
   281		MOVQ	gobuf_bp(BX), BP
   282		MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   283		MOVQ	$0, gobuf_ret(BX)
   284		MOVQ	$0, gobuf_ctxt(BX)
   285		MOVQ	$0, gobuf_bp(BX)
   286		MOVQ	gobuf_pc(BX), BX
   287		JMP	BX
   288	
   289	// func mcall(fn func(*g))
   290	// Switch to m->g0's stack, call fn(g).
   291	// Fn must never return. It should gogo(&g->sched)
   292	// to keep running g.
   293	TEXT runtime·mcall(SB), NOSPLIT, $0-8
   294		MOVQ	fn+0(FP), DI
   295	
   296		get_tls(CX)
   297		MOVQ	g(CX), AX	// save state in g->sched
   298		MOVQ	0(SP), BX	// caller's PC
   299		MOVQ	BX, (g_sched+gobuf_pc)(AX)
   300		LEAQ	fn+0(FP), BX	// caller's SP
   301		MOVQ	BX, (g_sched+gobuf_sp)(AX)
   302		MOVQ	AX, (g_sched+gobuf_g)(AX)
   303		MOVQ	BP, (g_sched+gobuf_bp)(AX)
   304	
   305		// switch to m->g0 & its stack, call fn
   306		MOVQ	g(CX), BX
   307		MOVQ	g_m(BX), BX
   308		MOVQ	m_g0(BX), SI
   309		CMPQ	SI, AX	// if g == m->g0 call badmcall
   310		JNE	3(PC)
   311		MOVQ	$runtime·badmcall(SB), AX
   312		JMP	AX
   313		MOVQ	SI, g(CX)	// g = m->g0
   314		MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   315		PUSHQ	AX
   316		MOVQ	DI, DX
   317		MOVQ	0(DI), DI
   318		CALL	DI
   319		POPQ	AX
   320		MOVQ	$runtime·badmcall2(SB), AX
   321		JMP	AX
   322		RET
   323	
   324	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   325	// of the G stack. We need to distinguish the routine that
   326	// lives at the bottom of the G stack from the one that lives
   327	// at the top of the system stack because the one at the top of
   328	// the system stack terminates the stack walk (see topofstack()).
   329	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   330		RET
   331	
   332	// func systemstack(fn func())
   333	TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   334		MOVQ	fn+0(FP), DI	// DI = fn
   335		get_tls(CX)
   336		MOVQ	g(CX), AX	// AX = g
   337		MOVQ	g_m(AX), BX	// BX = m
   338	
   339		CMPQ	AX, m_gsignal(BX)
   340		JEQ	noswitch
   341	
   342		MOVQ	m_g0(BX), DX	// DX = g0
   343		CMPQ	AX, DX
   344		JEQ	noswitch
   345	
   346		CMPQ	AX, m_curg(BX)
   347		JNE	bad
   348	
   349		// switch stacks
   350		// save our state in g->sched. Pretend to
   351		// be systemstack_switch if the G stack is scanned.
   352		MOVQ	$runtime·systemstack_switch(SB), SI
   353		MOVQ	SI, (g_sched+gobuf_pc)(AX)
   354		MOVQ	SP, (g_sched+gobuf_sp)(AX)
   355		MOVQ	AX, (g_sched+gobuf_g)(AX)
   356		MOVQ	BP, (g_sched+gobuf_bp)(AX)
   357	
   358		// switch to g0
   359		MOVQ	DX, g(CX)
   360		MOVQ	(g_sched+gobuf_sp)(DX), BX
   361		// make it look like mstart called systemstack on g0, to stop traceback
   362		SUBQ	$8, BX
   363		MOVQ	$runtime·mstart(SB), DX
   364		MOVQ	DX, 0(BX)
   365		MOVQ	BX, SP
   366	
   367		// call target function
   368		MOVQ	DI, DX
   369		MOVQ	0(DI), DI
   370		CALL	DI
   371	
   372		// switch back to g
   373		get_tls(CX)
   374		MOVQ	g(CX), AX
   375		MOVQ	g_m(AX), BX
   376		MOVQ	m_curg(BX), AX
   377		MOVQ	AX, g(CX)
   378		MOVQ	(g_sched+gobuf_sp)(AX), SP
   379		MOVQ	$0, (g_sched+gobuf_sp)(AX)
   380		RET
   381	
   382	noswitch:
   383		// already on m stack; tail call the function
   384		// Using a tail call here cleans up tracebacks since we won't stop
   385		// at an intermediate systemstack.
   386		MOVQ	DI, DX
   387		MOVQ	0(DI), DI
   388		JMP	DI
   389	
   390	bad:
   391		// Bad: g is not gsignal, not g0, not curg. What is it?
   392		MOVQ	$runtime·badsystemstack(SB), AX
   393		CALL	AX
   394		INT	$3
   395	
   396	
   397	/*
   398	 * support for morestack
   399	 */
   400	
   401	// Called during function prolog when more stack is needed.
   402	//
   403	// The traceback routines see morestack on a g0 as being
   404	// the top of a stack (for example, morestack calling newstack
   405	// calling the scheduler calling newm calling gc), so we must
   406	// record an argument size. For that purpose, it has no arguments.
   407	TEXT runtime·morestack(SB),NOSPLIT,$0-0
   408		// Cannot grow scheduler stack (m->g0).
   409		get_tls(CX)
   410		MOVQ	g(CX), BX
   411		MOVQ	g_m(BX), BX
   412		MOVQ	m_g0(BX), SI
   413		CMPQ	g(CX), SI
   414		JNE	3(PC)
   415		CALL	runtime·badmorestackg0(SB)
   416		CALL	runtime·abort(SB)
   417	
   418		// Cannot grow signal stack (m->gsignal).
   419		MOVQ	m_gsignal(BX), SI
   420		CMPQ	g(CX), SI
   421		JNE	3(PC)
   422		CALL	runtime·badmorestackgsignal(SB)
   423		CALL	runtime·abort(SB)
   424	
   425		// Called from f.
   426		// Set m->morebuf to f's caller.
   427		NOP	SP	// tell vet SP changed - stop checking offsets
   428		MOVQ	8(SP), AX	// f's caller's PC
   429		MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   430		LEAQ	16(SP), AX	// f's caller's SP
   431		MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   432		get_tls(CX)
   433		MOVQ	g(CX), SI
   434		MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   435	
   436		// Set g->sched to context in f.
   437		MOVQ	0(SP), AX // f's PC
   438		MOVQ	AX, (g_sched+gobuf_pc)(SI)
   439		MOVQ	SI, (g_sched+gobuf_g)(SI)
   440		LEAQ	8(SP), AX // f's SP
   441		MOVQ	AX, (g_sched+gobuf_sp)(SI)
   442		MOVQ	BP, (g_sched+gobuf_bp)(SI)
   443		MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   444	
   445		// Call newstack on m->g0's stack.
   446		MOVQ	m_g0(BX), BX
   447		MOVQ	BX, g(CX)
   448		MOVQ	(g_sched+gobuf_sp)(BX), SP
   449		CALL	runtime·newstack(SB)
   450		CALL	runtime·abort(SB)	// crash if newstack returns
   451		RET
   452	
   453	// morestack but not preserving ctxt.
   454	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   455		MOVL	$0, DX
   456		JMP	runtime·morestack(SB)
   457	
   458	// reflectcall: call a function with the given argument list
   459	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   460	// we don't have variable-sized frames, so we use a small number
   461	// of constant-sized-frame functions to encode a few bits of size in the pc.
   462	// Caution: ugly multiline assembly macros in your future!
   463	
   464	#define DISPATCH(NAME,MAXSIZE)		\
   465		CMPQ	CX, $MAXSIZE;		\
   466		JA	3(PC);			\
   467		MOVQ	$NAME(SB), AX;		\
   468		JMP	AX
   469	// Note: can't just "JMP NAME(SB)" - bad inlining results.
   470	
   471	TEXT ·reflectcall(SB), NOSPLIT, $0-32
   472		MOVLQZX argsize+24(FP), CX
   473		DISPATCH(runtime·call32, 32)
   474		DISPATCH(runtime·call64, 64)
   475		DISPATCH(runtime·call128, 128)
   476		DISPATCH(runtime·call256, 256)
   477		DISPATCH(runtime·call512, 512)
   478		DISPATCH(runtime·call1024, 1024)
   479		DISPATCH(runtime·call2048, 2048)
   480		DISPATCH(runtime·call4096, 4096)
   481		DISPATCH(runtime·call8192, 8192)
   482		DISPATCH(runtime·call16384, 16384)
   483		DISPATCH(runtime·call32768, 32768)
   484		DISPATCH(runtime·call65536, 65536)
   485		DISPATCH(runtime·call131072, 131072)
   486		DISPATCH(runtime·call262144, 262144)
   487		DISPATCH(runtime·call524288, 524288)
   488		DISPATCH(runtime·call1048576, 1048576)
   489		DISPATCH(runtime·call2097152, 2097152)
   490		DISPATCH(runtime·call4194304, 4194304)
   491		DISPATCH(runtime·call8388608, 8388608)
   492		DISPATCH(runtime·call16777216, 16777216)
   493		DISPATCH(runtime·call33554432, 33554432)
   494		DISPATCH(runtime·call67108864, 67108864)
   495		DISPATCH(runtime·call134217728, 134217728)
   496		DISPATCH(runtime·call268435456, 268435456)
   497		DISPATCH(runtime·call536870912, 536870912)
   498		DISPATCH(runtime·call1073741824, 1073741824)
   499		MOVQ	$runtime·badreflectcall(SB), AX
   500		JMP	AX
   501	
   502	#define CALLFN(NAME,MAXSIZE)			\
   503	TEXT NAME(SB), WRAPPER, $MAXSIZE-32;		\
   504		NO_LOCAL_POINTERS;			\
   505		/* copy arguments to stack */		\
   506		MOVQ	argptr+16(FP), SI;		\
   507		MOVLQZX argsize+24(FP), CX;		\
   508		MOVQ	SP, DI;				\
   509		REP;MOVSB;				\
   510		/* call function */			\
   511		MOVQ	f+8(FP), DX;			\
   512		PCDATA  $PCDATA_StackMapIndex, $0;	\
   513		CALL	(DX);				\
   514		/* copy return values back */		\
   515		MOVQ	argtype+0(FP), DX;		\
   516		MOVQ	argptr+16(FP), DI;		\
   517		MOVLQZX	argsize+24(FP), CX;		\
   518		MOVLQZX	retoffset+28(FP), BX;		\
   519		MOVQ	SP, SI;				\
   520		ADDQ	BX, DI;				\
   521		ADDQ	BX, SI;				\
   522		SUBQ	BX, CX;				\
   523		CALL	callRet<>(SB);			\
   524		RET
   525	
   526	// callRet copies return values back at the end of call*. This is a
   527	// separate function so it can allocate stack space for the arguments
   528	// to reflectcallmove. It does not follow the Go ABI; it expects its
   529	// arguments in registers.
   530	TEXT callRet<>(SB), NOSPLIT, $32-0
   531		NO_LOCAL_POINTERS
   532		MOVQ	DX, 0(SP)
   533		MOVQ	DI, 8(SP)
   534		MOVQ	SI, 16(SP)
   535		MOVQ	CX, 24(SP)
   536		CALL	runtime·reflectcallmove(SB)
   537		RET
   538	
   539	CALLFN(·call32, 32)
   540	CALLFN(·call64, 64)
   541	CALLFN(·call128, 128)
   542	CALLFN(·call256, 256)
   543	CALLFN(·call512, 512)
   544	CALLFN(·call1024, 1024)
   545	CALLFN(·call2048, 2048)
   546	CALLFN(·call4096, 4096)
   547	CALLFN(·call8192, 8192)
   548	CALLFN(·call16384, 16384)
   549	CALLFN(·call32768, 32768)
   550	CALLFN(·call65536, 65536)
   551	CALLFN(·call131072, 131072)
   552	CALLFN(·call262144, 262144)
   553	CALLFN(·call524288, 524288)
   554	CALLFN(·call1048576, 1048576)
   555	CALLFN(·call2097152, 2097152)
   556	CALLFN(·call4194304, 4194304)
   557	CALLFN(·call8388608, 8388608)
   558	CALLFN(·call16777216, 16777216)
   559	CALLFN(·call33554432, 33554432)
   560	CALLFN(·call67108864, 67108864)
   561	CALLFN(·call134217728, 134217728)
   562	CALLFN(·call268435456, 268435456)
   563	CALLFN(·call536870912, 536870912)
   564	CALLFN(·call1073741824, 1073741824)
   565	
   566	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   567		MOVL	cycles+0(FP), AX
   568	again:
   569		PAUSE
   570		SUBL	$1, AX
   571		JNZ	again
   572		RET
   573	
   574	
   575	TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   576		// Stores are already ordered on x86, so this is just a
   577		// compile barrier.
   578		RET
   579	
   580	// func jmpdefer(fv *funcval, argp uintptr)
   581	// argp is a caller SP.
   582	// called from deferreturn.
   583	// 1. pop the caller
   584	// 2. sub 5 bytes from the callers return
   585	// 3. jmp to the argument
   586	TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   587		MOVQ	fv+0(FP), DX	// fn
   588		MOVQ	argp+8(FP), BX	// caller sp
   589		LEAQ	-8(BX), SP	// caller sp after CALL
   590		MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
   591		SUBQ	$5, (SP)	// return to CALL again
   592		MOVQ	0(DX), BX
   593		JMP	BX	// but first run the deferred function
   594	
   595	// Save state of caller into g->sched. Smashes R8, R9.
   596	TEXT gosave<>(SB),NOSPLIT,$0
   597		get_tls(R8)
   598		MOVQ	g(R8), R8
   599		MOVQ	0(SP), R9
   600		MOVQ	R9, (g_sched+gobuf_pc)(R8)
   601		LEAQ	8(SP), R9
   602		MOVQ	R9, (g_sched+gobuf_sp)(R8)
   603		MOVQ	$0, (g_sched+gobuf_ret)(R8)
   604		MOVQ	BP, (g_sched+gobuf_bp)(R8)
   605		// Assert ctxt is zero. See func save.
   606		MOVQ	(g_sched+gobuf_ctxt)(R8), R9
   607		TESTQ	R9, R9
   608		JZ	2(PC)
   609		CALL	runtime·badctxt(SB)
   610		RET
   611	
   612	// func asmcgocall(fn, arg unsafe.Pointer) int32
   613	// Call fn(arg) on the scheduler stack,
   614	// aligned appropriately for the gcc ABI.
   615	// See cgocall.go for more details.
   616	TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   617		MOVQ	fn+0(FP), AX
   618		MOVQ	arg+8(FP), BX
   619	
   620		MOVQ	SP, DX
   621	
   622		// Figure out if we need to switch to m->g0 stack.
   623		// We get called to create new OS threads too, and those
   624		// come in on the m->g0 stack already.
   625		get_tls(CX)
   626		MOVQ	g(CX), R8
   627		CMPQ	R8, $0
   628		JEQ	nosave
   629		MOVQ	g_m(R8), R8
   630		MOVQ	m_g0(R8), SI
   631		MOVQ	g(CX), DI
   632		CMPQ	SI, DI
   633		JEQ	nosave
   634		MOVQ	m_gsignal(R8), SI
   635		CMPQ	SI, DI
   636		JEQ	nosave
   637	
   638		// Switch to system stack.
   639		MOVQ	m_g0(R8), SI
   640		CALL	gosave<>(SB)
   641		MOVQ	SI, g(CX)
   642		MOVQ	(g_sched+gobuf_sp)(SI), SP
   643	
   644		// Now on a scheduling stack (a pthread-created stack).
   645		// Make sure we have enough room for 4 stack-backed fast-call
   646		// registers as per windows amd64 calling convention.
   647		SUBQ	$64, SP
   648		ANDQ	$~15, SP	// alignment for gcc ABI
   649		MOVQ	DI, 48(SP)	// save g
   650		MOVQ	(g_stack+stack_hi)(DI), DI
   651		SUBQ	DX, DI
   652		MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   653		MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   654		MOVQ	BX, CX		// CX = first argument in Win64
   655		CALL	AX
   656	
   657		// Restore registers, g, stack pointer.
   658		get_tls(CX)
   659		MOVQ	48(SP), DI
   660		MOVQ	(g_stack+stack_hi)(DI), SI
   661		SUBQ	40(SP), SI
   662		MOVQ	DI, g(CX)
   663		MOVQ	SI, SP
   664	
   665		MOVL	AX, ret+16(FP)
   666		RET
   667	
   668	nosave:
   669		// Running on a system stack, perhaps even without a g.
   670		// Having no g can happen during thread creation or thread teardown
   671		// (see needm/dropm on Solaris, for example).
   672		// This code is like the above sequence but without saving/restoring g
   673		// and without worrying about the stack moving out from under us
   674		// (because we're on a system stack, not a goroutine stack).
   675		// The above code could be used directly if already on a system stack,
   676		// but then the only path through this code would be a rare case on Solaris.
   677		// Using this code for all "already on system stack" calls exercises it more,
   678		// which should help keep it correct.
   679		SUBQ	$64, SP
   680		ANDQ	$~15, SP
   681		MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   682		MOVQ	DX, 40(SP)	// save original stack pointer
   683		MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   684		MOVQ	BX, CX		// CX = first argument in Win64
   685		CALL	AX
   686		MOVQ	40(SP), SI	// restore original stack pointer
   687		MOVQ	SI, SP
   688		MOVL	AX, ret+16(FP)
   689		RET
   690	
   691	// func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr)
   692	// Turn the fn into a Go func (by taking its address) and call
   693	// cgocallback_gofunc.
   694	TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   695		LEAQ	fn+0(FP), AX
   696		MOVQ	AX, 0(SP)
   697		MOVQ	frame+8(FP), AX
   698		MOVQ	AX, 8(SP)
   699		MOVQ	framesize+16(FP), AX
   700		MOVQ	AX, 16(SP)
   701		MOVQ	ctxt+24(FP), AX
   702		MOVQ	AX, 24(SP)
   703		MOVQ	$runtime·cgocallback_gofunc(SB), AX
   704		CALL	AX
   705		RET
   706	
   707	// func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr)
   708	// See cgocall.go for more details.
   709	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   710		NO_LOCAL_POINTERS
   711	
   712		// If g is nil, Go did not create the current thread.
   713		// Call needm to obtain one m for temporary use.
   714		// In this case, we're running on the thread stack, so there's
   715		// lots of space, but the linker doesn't know. Hide the call from
   716		// the linker analysis by using an indirect call through AX.
   717		get_tls(CX)
   718	#ifdef GOOS_windows
   719		MOVL	$0, BX
   720		CMPQ	CX, $0
   721		JEQ	2(PC)
   722	#endif
   723		MOVQ	g(CX), BX
   724		CMPQ	BX, $0
   725		JEQ	needm
   726		MOVQ	g_m(BX), BX
   727		MOVQ	BX, R8 // holds oldm until end of function
   728		JMP	havem
   729	needm:
   730		MOVQ	$0, 0(SP)
   731		MOVQ	$runtime·needm(SB), AX
   732		CALL	AX
   733		MOVQ	0(SP), R8
   734		get_tls(CX)
   735		MOVQ	g(CX), BX
   736		MOVQ	g_m(BX), BX
   737	
   738		// Set m->sched.sp = SP, so that if a panic happens
   739		// during the function we are about to execute, it will
   740		// have a valid SP to run on the g0 stack.
   741		// The next few lines (after the havem label)
   742		// will save this SP onto the stack and then write
   743		// the same SP back to m->sched.sp. That seems redundant,
   744		// but if an unrecovered panic happens, unwindm will
   745		// restore the g->sched.sp from the stack location
   746		// and then systemstack will try to use it. If we don't set it here,
   747		// that restored SP will be uninitialized (typically 0) and
   748		// will not be usable.
   749		MOVQ	m_g0(BX), SI
   750		MOVQ	SP, (g_sched+gobuf_sp)(SI)
   751	
   752	havem:
   753		// Now there's a valid m, and we're running on its m->g0.
   754		// Save current m->g0->sched.sp on stack and then set it to SP.
   755		// Save current sp in m->g0->sched.sp in preparation for
   756		// switch back to m->curg stack.
   757		// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   758		MOVQ	m_g0(BX), SI
   759		MOVQ	(g_sched+gobuf_sp)(SI), AX
   760		MOVQ	AX, 0(SP)
   761		MOVQ	SP, (g_sched+gobuf_sp)(SI)
   762	
   763		// Switch to m->curg stack and call runtime.cgocallbackg.
   764		// Because we are taking over the execution of m->curg
   765		// but *not* resuming what had been running, we need to
   766		// save that information (m->curg->sched) so we can restore it.
   767		// We can restore m->curg->sched.sp easily, because calling
   768		// runtime.cgocallbackg leaves SP unchanged upon return.
   769		// To save m->curg->sched.pc, we push it onto the stack.
   770		// This has the added benefit that it looks to the traceback
   771		// routine like cgocallbackg is going to return to that
   772		// PC (because the frame we allocate below has the same
   773		// size as cgocallback_gofunc's frame declared above)
   774		// so that the traceback will seamlessly trace back into
   775		// the earlier calls.
   776		//
   777		// In the new goroutine, 8(SP) holds the saved R8.
   778		MOVQ	m_curg(BX), SI
   779		MOVQ	SI, g(CX)
   780		MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   781		MOVQ	(g_sched+gobuf_pc)(SI), BX
   782		MOVQ	BX, -8(DI)
   783		// Compute the size of the frame, including return PC and, if
   784		// GOEXPERIMENT=framepointer, the saved base pointer
   785		MOVQ	ctxt+24(FP), BX
   786		LEAQ	fv+0(FP), AX
   787		SUBQ	SP, AX
   788		SUBQ	AX, DI
   789		MOVQ	DI, SP
   790	
   791		MOVQ	R8, 8(SP)
   792		MOVQ	BX, 0(SP)
   793		CALL	runtime·cgocallbackg(SB)
   794		MOVQ	8(SP), R8
   795	
   796		// Compute the size of the frame again. FP and SP have
   797		// completely different values here than they did above,
   798		// but only their difference matters.
   799		LEAQ	fv+0(FP), AX
   800		SUBQ	SP, AX
   801	
   802		// Restore g->sched (== m->curg->sched) from saved values.
   803		get_tls(CX)
   804		MOVQ	g(CX), SI
   805		MOVQ	SP, DI
   806		ADDQ	AX, DI
   807		MOVQ	-8(DI), BX
   808		MOVQ	BX, (g_sched+gobuf_pc)(SI)
   809		MOVQ	DI, (g_sched+gobuf_sp)(SI)
   810	
   811		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   812		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   813		// so we do not have to restore it.)
   814		MOVQ	g(CX), BX
   815		MOVQ	g_m(BX), BX
   816		MOVQ	m_g0(BX), SI
   817		MOVQ	SI, g(CX)
   818		MOVQ	(g_sched+gobuf_sp)(SI), SP
   819		MOVQ	0(SP), AX
   820		MOVQ	AX, (g_sched+gobuf_sp)(SI)
   821	
   822		// If the m on entry was nil, we called needm above to borrow an m
   823		// for the duration of the call. Since the call is over, return it with dropm.
   824		CMPQ	R8, $0
   825		JNE 3(PC)
   826		MOVQ	$runtime·dropm(SB), AX
   827		CALL	AX
   828	
   829		// Done!
   830		RET
   831	
   832	// func setg(gg *g)
   833	// set g. for use by needm.
   834	TEXT runtime·setg(SB), NOSPLIT, $0-8
   835		MOVQ	gg+0(FP), BX
   836	#ifdef GOOS_windows
   837		CMPQ	BX, $0
   838		JNE	settls
   839		MOVQ	$0, 0x28(GS)
   840		RET
   841	settls:
   842		MOVQ	g_m(BX), AX
   843		LEAQ	m_tls(AX), AX
   844		MOVQ	AX, 0x28(GS)
   845	#endif
   846		get_tls(CX)
   847		MOVQ	BX, g(CX)
   848		RET
   849	
   850	// void setg_gcc(G*); set g called from gcc.
   851	TEXT setg_gcc<>(SB),NOSPLIT,$0
   852		get_tls(AX)
   853		MOVQ	DI, g(AX)
   854		RET
   855	
   856	TEXT runtime·abort(SB),NOSPLIT,$0-0
   857		INT	$3
   858	loop:
   859		JMP	loop
   860	
   861	// check that SP is in range [g->stack.lo, g->stack.hi)
   862	TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   863		get_tls(CX)
   864		MOVQ	g(CX), AX
   865		CMPQ	(g_stack+stack_hi)(AX), SP
   866		JHI	2(PC)
   867		CALL	runtime·abort(SB)
   868		CMPQ	SP, (g_stack+stack_lo)(AX)
   869		JHI	2(PC)
   870		CALL	runtime·abort(SB)
   871		RET
   872	
   873	// func cputicks() int64
   874	TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   875		CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   876		JNE	mfence
   877		LFENCE
   878		JMP	done
   879	mfence:
   880		MFENCE
   881	done:
   882		RDTSC
   883		SHLQ	$32, DX
   884		ADDQ	DX, AX
   885		MOVQ	AX, ret+0(FP)
   886		RET
   887	
   888	// func aeshash(p unsafe.Pointer, h, s uintptr) uintptr
   889	// hash function using AES hardware instructions
   890	TEXT runtime·aeshash(SB),NOSPLIT,$0-32
   891		MOVQ	p+0(FP), AX	// ptr to data
   892		MOVQ	s+16(FP), CX	// size
   893		LEAQ	ret+24(FP), DX
   894		JMP	aeshashbody<>(SB)
   895	
   896	// func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
   897	TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24
   898		MOVQ	p+0(FP), AX	// ptr to string struct
   899		MOVQ	8(AX), CX	// length of string
   900		MOVQ	(AX), AX	// string data
   901		LEAQ	ret+16(FP), DX
   902		JMP	aeshashbody<>(SB)
   903	
   904	// AX: data
   905	// CX: length
   906	// DX: address to put return value
   907	TEXT aeshashbody<>(SB),NOSPLIT,$0-0
   908		// Fill an SSE register with our seeds.
   909		MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
   910		PINSRW	$4, CX, X0			// 16 bits of length
   911		PSHUFHW $0, X0, X0			// repeat length 4 times total
   912		MOVO	X0, X1				// save unscrambled seed
   913		PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
   914		AESENC	X0, X0				// scramble seed
   915	
   916		CMPQ	CX, $16
   917		JB	aes0to15
   918		JE	aes16
   919		CMPQ	CX, $32
   920		JBE	aes17to32
   921		CMPQ	CX, $64
   922		JBE	aes33to64
   923		CMPQ	CX, $128
   924		JBE	aes65to128
   925		JMP	aes129plus
   926	
   927	aes0to15:
   928		TESTQ	CX, CX
   929		JE	aes0
   930	
   931		ADDQ	$16, AX
   932		TESTW	$0xff0, AX
   933		JE	endofpage
   934	
   935		// 16 bytes loaded at this address won't cross
   936		// a page boundary, so we can load it directly.
   937		MOVOU	-16(AX), X1
   938		ADDQ	CX, CX
   939		MOVQ	$masks<>(SB), AX
   940		PAND	(AX)(CX*8), X1
   941	final1:
   942		PXOR	X0, X1	// xor data with seed
   943		AESENC	X1, X1	// scramble combo 3 times
   944		AESENC	X1, X1
   945		AESENC	X1, X1
   946		MOVQ	X1, (DX)
   947		RET
   948	
   949	endofpage:
   950		// address ends in 1111xxxx. Might be up against
   951		// a page boundary, so load ending at last byte.
   952		// Then shift bytes down using pshufb.
   953		MOVOU	-32(AX)(CX*1), X1
   954		ADDQ	CX, CX
   955		MOVQ	$shifts<>(SB), AX
   956		PSHUFB	(AX)(CX*8), X1
   957		JMP	final1
   958	
   959	aes0:
   960		// Return scrambled input seed
   961		AESENC	X0, X0
   962		MOVQ	X0, (DX)
   963		RET
   964	
   965	aes16:
   966		MOVOU	(AX), X1
   967		JMP	final1
   968	
   969	aes17to32:
   970		// make second starting seed
   971		PXOR	runtime·aeskeysched+16(SB), X1
   972		AESENC	X1, X1
   973	
   974		// load data to be hashed
   975		MOVOU	(AX), X2
   976		MOVOU	-16(AX)(CX*1), X3
   977	
   978		// xor with seed
   979		PXOR	X0, X2
   980		PXOR	X1, X3
   981	
   982		// scramble 3 times
   983		AESENC	X2, X2
   984		AESENC	X3, X3
   985		AESENC	X2, X2
   986		AESENC	X3, X3
   987		AESENC	X2, X2
   988		AESENC	X3, X3
   989	
   990		// combine results
   991		PXOR	X3, X2
   992		MOVQ	X2, (DX)
   993		RET
   994	
   995	aes33to64:
   996		// make 3 more starting seeds
   997		MOVO	X1, X2
   998		MOVO	X1, X3
   999		PXOR	runtime·aeskeysched+16(SB), X1
  1000		PXOR	runtime·aeskeysched+32(SB), X2
  1001		PXOR	runtime·aeskeysched+48(SB), X3
  1002		AESENC	X1, X1
  1003		AESENC	X2, X2
  1004		AESENC	X3, X3
  1005	
  1006		MOVOU	(AX), X4
  1007		MOVOU	16(AX), X5
  1008		MOVOU	-32(AX)(CX*1), X6
  1009		MOVOU	-16(AX)(CX*1), X7
  1010	
  1011		PXOR	X0, X4
  1012		PXOR	X1, X5
  1013		PXOR	X2, X6
  1014		PXOR	X3, X7
  1015	
  1016		AESENC	X4, X4
  1017		AESENC	X5, X5
  1018		AESENC	X6, X6
  1019		AESENC	X7, X7
  1020	
  1021		AESENC	X4, X4
  1022		AESENC	X5, X5
  1023		AESENC	X6, X6
  1024		AESENC	X7, X7
  1025	
  1026		AESENC	X4, X4
  1027		AESENC	X5, X5
  1028		AESENC	X6, X6
  1029		AESENC	X7, X7
  1030	
  1031		PXOR	X6, X4
  1032		PXOR	X7, X5
  1033		PXOR	X5, X4
  1034		MOVQ	X4, (DX)
  1035		RET
  1036	
  1037	aes65to128:
  1038		// make 7 more starting seeds
  1039		MOVO	X1, X2
  1040		MOVO	X1, X3
  1041		MOVO	X1, X4
  1042		MOVO	X1, X5
  1043		MOVO	X1, X6
  1044		MOVO	X1, X7
  1045		PXOR	runtime·aeskeysched+16(SB), X1
  1046		PXOR	runtime·aeskeysched+32(SB), X2
  1047		PXOR	runtime·aeskeysched+48(SB), X3
  1048		PXOR	runtime·aeskeysched+64(SB), X4
  1049		PXOR	runtime·aeskeysched+80(SB), X5
  1050		PXOR	runtime·aeskeysched+96(SB), X6
  1051		PXOR	runtime·aeskeysched+112(SB), X7
  1052		AESENC	X1, X1
  1053		AESENC	X2, X2
  1054		AESENC	X3, X3
  1055		AESENC	X4, X4
  1056		AESENC	X5, X5
  1057		AESENC	X6, X6
  1058		AESENC	X7, X7
  1059	
  1060		// load data
  1061		MOVOU	(AX), X8
  1062		MOVOU	16(AX), X9
  1063		MOVOU	32(AX), X10
  1064		MOVOU	48(AX), X11
  1065		MOVOU	-64(AX)(CX*1), X12
  1066		MOVOU	-48(AX)(CX*1), X13
  1067		MOVOU	-32(AX)(CX*1), X14
  1068		MOVOU	-16(AX)(CX*1), X15
  1069	
  1070		// xor with seed
  1071		PXOR	X0, X8
  1072		PXOR	X1, X9
  1073		PXOR	X2, X10
  1074		PXOR	X3, X11
  1075		PXOR	X4, X12
  1076		PXOR	X5, X13
  1077		PXOR	X6, X14
  1078		PXOR	X7, X15
  1079	
  1080		// scramble 3 times
  1081		AESENC	X8, X8
  1082		AESENC	X9, X9
  1083		AESENC	X10, X10
  1084		AESENC	X11, X11
  1085		AESENC	X12, X12
  1086		AESENC	X13, X13
  1087		AESENC	X14, X14
  1088		AESENC	X15, X15
  1089	
  1090		AESENC	X8, X8
  1091		AESENC	X9, X9
  1092		AESENC	X10, X10
  1093		AESENC	X11, X11
  1094		AESENC	X12, X12
  1095		AESENC	X13, X13
  1096		AESENC	X14, X14
  1097		AESENC	X15, X15
  1098	
  1099		AESENC	X8, X8
  1100		AESENC	X9, X9
  1101		AESENC	X10, X10
  1102		AESENC	X11, X11
  1103		AESENC	X12, X12
  1104		AESENC	X13, X13
  1105		AESENC	X14, X14
  1106		AESENC	X15, X15
  1107	
  1108		// combine results
  1109		PXOR	X12, X8
  1110		PXOR	X13, X9
  1111		PXOR	X14, X10
  1112		PXOR	X15, X11
  1113		PXOR	X10, X8
  1114		PXOR	X11, X9
  1115		PXOR	X9, X8
  1116		MOVQ	X8, (DX)
  1117		RET
  1118	
  1119	aes129plus:
  1120		// make 7 more starting seeds
  1121		MOVO	X1, X2
  1122		MOVO	X1, X3
  1123		MOVO	X1, X4
  1124		MOVO	X1, X5
  1125		MOVO	X1, X6
  1126		MOVO	X1, X7
  1127		PXOR	runtime·aeskeysched+16(SB), X1
  1128		PXOR	runtime·aeskeysched+32(SB), X2
  1129		PXOR	runtime·aeskeysched+48(SB), X3
  1130		PXOR	runtime·aeskeysched+64(SB), X4
  1131		PXOR	runtime·aeskeysched+80(SB), X5
  1132		PXOR	runtime·aeskeysched+96(SB), X6
  1133		PXOR	runtime·aeskeysched+112(SB), X7
  1134		AESENC	X1, X1
  1135		AESENC	X2, X2
  1136		AESENC	X3, X3
  1137		AESENC	X4, X4
  1138		AESENC	X5, X5
  1139		AESENC	X6, X6
  1140		AESENC	X7, X7
  1141	
  1142		// start with last (possibly overlapping) block
  1143		MOVOU	-128(AX)(CX*1), X8
  1144		MOVOU	-112(AX)(CX*1), X9
  1145		MOVOU	-96(AX)(CX*1), X10
  1146		MOVOU	-80(AX)(CX*1), X11
  1147		MOVOU	-64(AX)(CX*1), X12
  1148		MOVOU	-48(AX)(CX*1), X13
  1149		MOVOU	-32(AX)(CX*1), X14
  1150		MOVOU	-16(AX)(CX*1), X15
  1151	
  1152		// xor in seed
  1153		PXOR	X0, X8
  1154		PXOR	X1, X9
  1155		PXOR	X2, X10
  1156		PXOR	X3, X11
  1157		PXOR	X4, X12
  1158		PXOR	X5, X13
  1159		PXOR	X6, X14
  1160		PXOR	X7, X15
  1161	
  1162		// compute number of remaining 128-byte blocks
  1163		DECQ	CX
  1164		SHRQ	$7, CX
  1165	
  1166	aesloop:
  1167		// scramble state
  1168		AESENC	X8, X8
  1169		AESENC	X9, X9
  1170		AESENC	X10, X10
  1171		AESENC	X11, X11
  1172		AESENC	X12, X12
  1173		AESENC	X13, X13
  1174		AESENC	X14, X14
  1175		AESENC	X15, X15
  1176	
  1177		// scramble state, xor in a block
  1178		MOVOU	(AX), X0
  1179		MOVOU	16(AX), X1
  1180		MOVOU	32(AX), X2
  1181		MOVOU	48(AX), X3
  1182		AESENC	X0, X8
  1183		AESENC	X1, X9
  1184		AESENC	X2, X10
  1185		AESENC	X3, X11
  1186		MOVOU	64(AX), X4
  1187		MOVOU	80(AX), X5
  1188		MOVOU	96(AX), X6
  1189		MOVOU	112(AX), X7
  1190		AESENC	X4, X12
  1191		AESENC	X5, X13
  1192		AESENC	X6, X14
  1193		AESENC	X7, X15
  1194	
  1195		ADDQ	$128, AX
  1196		DECQ	CX
  1197		JNE	aesloop
  1198	
  1199		// 3 more scrambles to finish
  1200		AESENC	X8, X8
  1201		AESENC	X9, X9
  1202		AESENC	X10, X10
  1203		AESENC	X11, X11
  1204		AESENC	X12, X12
  1205		AESENC	X13, X13
  1206		AESENC	X14, X14
  1207		AESENC	X15, X15
  1208		AESENC	X8, X8
  1209		AESENC	X9, X9
  1210		AESENC	X10, X10
  1211		AESENC	X11, X11
  1212		AESENC	X12, X12
  1213		AESENC	X13, X13
  1214		AESENC	X14, X14
  1215		AESENC	X15, X15
  1216		AESENC	X8, X8
  1217		AESENC	X9, X9
  1218		AESENC	X10, X10
  1219		AESENC	X11, X11
  1220		AESENC	X12, X12
  1221		AESENC	X13, X13
  1222		AESENC	X14, X14
  1223		AESENC	X15, X15
  1224	
  1225		PXOR	X12, X8
  1226		PXOR	X13, X9
  1227		PXOR	X14, X10
  1228		PXOR	X15, X11
  1229		PXOR	X10, X8
  1230		PXOR	X11, X9
  1231		PXOR	X9, X8
  1232		MOVQ	X8, (DX)
  1233		RET
  1234	
  1235	// func aeshash32(p unsafe.Pointer, h uintptr) uintptr
  1236	TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
  1237		MOVQ	p+0(FP), AX	// ptr to data
  1238		MOVQ	h+8(FP), X0	// seed
  1239		PINSRD	$2, (AX), X0	// data
  1240		AESENC	runtime·aeskeysched+0(SB), X0
  1241		AESENC	runtime·aeskeysched+16(SB), X0
  1242		AESENC	runtime·aeskeysched+32(SB), X0
  1243		MOVQ	X0, ret+16(FP)
  1244		RET
  1245	
  1246	// func aeshash64(p unsafe.Pointer, h uintptr) uintptr
  1247	TEXT runtime·aeshash64(SB),NOSPLIT,$0-24
  1248		MOVQ	p+0(FP), AX	// ptr to data
  1249		MOVQ	h+8(FP), X0	// seed
  1250		PINSRQ	$1, (AX), X0	// data
  1251		AESENC	runtime·aeskeysched+0(SB), X0
  1252		AESENC	runtime·aeskeysched+16(SB), X0
  1253		AESENC	runtime·aeskeysched+32(SB), X0
  1254		MOVQ	X0, ret+16(FP)
  1255		RET
  1256	
  1257	// simple mask to get rid of data in the high part of the register.
  1258	DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1259	DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1260	DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1261	DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1262	DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1263	DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1264	DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1265	DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1266	DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1267	DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1268	DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1269	DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1270	DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1271	DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1272	DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1273	DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1274	DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1275	DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1276	DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1277	DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1278	DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1279	DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1280	DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1281	DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1282	DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1283	DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1284	DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1285	DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1286	DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1287	DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1288	DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1289	DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1290	GLOBL masks<>(SB),RODATA,$256
  1291	
  1292	// func checkASM() bool
  1293	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1294		// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1295		MOVQ	$masks<>(SB), AX
  1296		MOVQ	$shifts<>(SB), BX
  1297		ORQ	BX, AX
  1298		TESTQ	$15, AX
  1299		SETEQ	ret+0(FP)
  1300		RET
  1301	
  1302	// these are arguments to pshufb. They move data down from
  1303	// the high bytes of the register to the low bytes of the register.
  1304	// index is how many bytes to move.
  1305	DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1306	DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1307	DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1308	DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1309	DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1310	DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1311	DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1312	DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1313	DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1314	DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1315	DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1316	DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1317	DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1318	DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1319	DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1320	DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1321	DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1322	DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1323	DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1324	DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1325	DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1326	DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1327	DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1328	DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1329	DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1330	DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1331	DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1332	DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1333	DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1334	DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1335	DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1336	DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1337	GLOBL shifts<>(SB),RODATA,$256
  1338	
  1339	TEXT runtime·return0(SB), NOSPLIT, $0
  1340		MOVL	$0, AX
  1341		RET
  1342	
  1343	
  1344	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1345	// Must obey the gcc calling convention.
  1346	TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1347		get_tls(CX)
  1348		MOVQ	g(CX), AX
  1349		MOVQ	g_m(AX), AX
  1350		MOVQ	m_curg(AX), AX
  1351		MOVQ	(g_stack+stack_hi)(AX), AX
  1352		RET
  1353	
  1354	// The top-most function running on a goroutine
  1355	// returns to goexit+PCQuantum.
  1356	TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1357		BYTE	$0x90	// NOP
  1358		CALL	runtime·goexit1(SB)	// does not return
  1359		// traceback from goexit1 must hit code range of goexit
  1360		BYTE	$0x90	// NOP
  1361	
  1362	// This is called from .init_array and follows the platform, not Go, ABI.
  1363	TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1364		PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1365		MOVQ	runtime·lastmoduledatap(SB), AX
  1366		MOVQ	DI, moduledata_next(AX)
  1367		MOVQ	DI, runtime·lastmoduledatap(SB)
  1368		POPQ	R15
  1369		RET
  1370	
  1371	// gcWriteBarrier performs a heap pointer write and informs the GC.
  1372	//
  1373	// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1374	// - DI is the destination of the write
  1375	// - AX is the value being written at DI
  1376	// It clobbers FLAGS. It does not clobber any general-purpose registers,
  1377	// but may clobber others (e.g., SSE registers).
  1378	TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120
  1379		// Save the registers clobbered by the fast path. This is slightly
  1380		// faster than having the caller spill these.
  1381		MOVQ	R14, 104(SP)
  1382		MOVQ	R13, 112(SP)
  1383		// TODO: Consider passing g.m.p in as an argument so they can be shared
  1384		// across a sequence of write barriers.
  1385		get_tls(R13)
  1386		MOVQ	g(R13), R13
  1387		MOVQ	g_m(R13), R13
  1388		MOVQ	m_p(R13), R13
  1389		MOVQ	(p_wbBuf+wbBuf_next)(R13), R14
  1390		// Increment wbBuf.next position.
  1391		LEAQ	16(R14), R14
  1392		MOVQ	R14, (p_wbBuf+wbBuf_next)(R13)
  1393		CMPQ	R14, (p_wbBuf+wbBuf_end)(R13)
  1394		// Record the write.
  1395		MOVQ	AX, -16(R14)	// Record value
  1396		// Note: This turns bad pointer writes into bad
  1397		// pointer reads, which could be confusing. We could avoid
  1398		// reading from obviously bad pointers, which would
  1399		// take care of the vast majority of these. We could
  1400		// patch this up in the signal handler, or use XCHG to
  1401		// combine the read and the write.
  1402		MOVQ	(DI), R13
  1403		MOVQ	R13, -8(R14)	// Record *slot
  1404		// Is the buffer full? (flags set in CMPQ above)
  1405		JEQ	flush
  1406	ret:
  1407		MOVQ	104(SP), R14
  1408		MOVQ	112(SP), R13
  1409		// Do the write.
  1410		MOVQ	AX, (DI)
  1411		RET
  1412	
  1413	flush:
  1414		// Save all general purpose registers since these could be
  1415		// clobbered by wbBufFlush and were not saved by the caller.
  1416		// It is possible for wbBufFlush to clobber other registers
  1417		// (e.g., SSE registers), but the compiler takes care of saving
  1418		// those in the caller if necessary. This strikes a balance
  1419		// with registers that are likely to be used.
  1420		//
  1421		// We don't have type information for these, but all code under
  1422		// here is NOSPLIT, so nothing will observe these.
  1423		//
  1424		// TODO: We could strike a different balance; e.g., saving X0
  1425		// and not saving GP registers that are less likely to be used.
  1426		MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1427		MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1428		MOVQ	BX, 16(SP)
  1429		MOVQ	CX, 24(SP)
  1430		MOVQ	DX, 32(SP)
  1431		// DI already saved
  1432		MOVQ	SI, 40(SP)
  1433		MOVQ	BP, 48(SP)
  1434		MOVQ	R8, 56(SP)
  1435		MOVQ	R9, 64(SP)
  1436		MOVQ	R10, 72(SP)
  1437		MOVQ	R11, 80(SP)
  1438		MOVQ	R12, 88(SP)
  1439		// R13 already saved
  1440		// R14 already saved
  1441		MOVQ	R15, 96(SP)
  1442	
  1443		// This takes arguments DI and AX
  1444		CALL	runtime·wbBufFlush(SB)
  1445	
  1446		MOVQ	0(SP), DI
  1447		MOVQ	8(SP), AX
  1448		MOVQ	16(SP), BX
  1449		MOVQ	24(SP), CX
  1450		MOVQ	32(SP), DX
  1451		MOVQ	40(SP), SI
  1452		MOVQ	48(SP), BP
  1453		MOVQ	56(SP), R8
  1454		MOVQ	64(SP), R9
  1455		MOVQ	72(SP), R10
  1456		MOVQ	80(SP), R11
  1457		MOVQ	88(SP), R12
  1458		MOVQ	96(SP), R15
  1459		JMP	ret
  1460	
  1461	DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1462	GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1463	
  1464	// debugCallV1 is the entry point for debugger-injected function
  1465	// calls on running goroutines. It informs the runtime that a
  1466	// debug call has been injected and creates a call frame for the
  1467	// debugger to fill in.
  1468	//
  1469	// To inject a function call, a debugger should:
  1470	// 1. Check that the goroutine is in state _Grunning and that
  1471	//    there are at least 256 bytes free on the stack.
  1472	// 2. Push the current PC on the stack (updating SP).
  1473	// 3. Write the desired argument frame size at SP-16 (using the SP
  1474	//    after step 2).
  1475	// 4. Save all machine registers (including flags and XMM reigsters)
  1476	//    so they can be restored later by the debugger.
  1477	// 5. Set the PC to debugCallV1 and resume execution.
  1478	//
  1479	// If the goroutine is in state _Grunnable, then it's not generally
  1480	// safe to inject a call because it may return out via other runtime
  1481	// operations. Instead, the debugger should unwind the stack to find
  1482	// the return to non-runtime code, add a temporary breakpoint there,
  1483	// and inject the call once that breakpoint is hit.
  1484	//
  1485	// If the goroutine is in any other state, it's not safe to inject a call.
  1486	//
  1487	// This function communicates back to the debugger by setting RAX and
  1488	// invoking INT3 to raise a breakpoint signal. See the comments in the
  1489	// implementation for the protocol the debugger is expected to
  1490	// follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1491	//
  1492	// The debugger must ensure that any pointers passed to the function
  1493	// obey escape analysis requirements. Specifically, it must not pass
  1494	// a stack pointer to an escaping argument. debugCallV1 cannot check
  1495	// this invariant.
  1496	TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0
  1497		// Save all registers that may contain pointers in GC register
  1498		// map order (see ssa.registersAMD64). This makes it possible
  1499		// to copy the stack while updating pointers currently held in
  1500		// registers, and for the GC to find roots in registers.
  1501		//
  1502		// We can't do anything that might clobber any of these
  1503		// registers before this.
  1504		MOVQ	R15, r15-(14*8+8)(SP)
  1505		MOVQ	R14, r14-(13*8+8)(SP)
  1506		MOVQ	R13, r13-(12*8+8)(SP)
  1507		MOVQ	R12, r12-(11*8+8)(SP)
  1508		MOVQ	R11, r11-(10*8+8)(SP)
  1509		MOVQ	R10, r10-(9*8+8)(SP)
  1510		MOVQ	R9, r9-(8*8+8)(SP)
  1511		MOVQ	R8, r8-(7*8+8)(SP)
  1512		MOVQ	DI, di-(6*8+8)(SP)
  1513		MOVQ	SI, si-(5*8+8)(SP)
  1514		MOVQ	BP, bp-(4*8+8)(SP)
  1515		MOVQ	BX, bx-(3*8+8)(SP)
  1516		MOVQ	DX, dx-(2*8+8)(SP)
  1517		// Save the frame size before we clobber it. Either of the last
  1518		// saves could clobber this depending on whether there's a saved BP.
  1519		MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1520		MOVQ	CX, cx-(1*8+8)(SP)
  1521		MOVQ	AX, ax-(0*8+8)(SP)
  1522	
  1523		// Save the argument frame size.
  1524		MOVQ	DX, frameSize-128(SP)
  1525	
  1526		// Perform a safe-point check.
  1527		MOVQ	retpc-8(FP), AX	// Caller's PC
  1528		MOVQ	AX, 0(SP)
  1529		CALL	runtime·debugCallCheck(SB)
  1530		MOVQ	8(SP), AX
  1531		TESTQ	AX, AX
  1532		JZ	good
  1533		// The safety check failed. Put the reason string at the top
  1534		// of the stack.
  1535		MOVQ	AX, 0(SP)
  1536		MOVQ	16(SP), AX
  1537		MOVQ	AX, 8(SP)
  1538		// Set AX to 8 and invoke INT3. The debugger should get the
  1539		// reason a call can't be injected from the top of the stack
  1540		// and resume execution.
  1541		MOVQ	$8, AX
  1542		BYTE	$0xcc
  1543		JMP	restore
  1544	
  1545	good:
  1546		// Registers are saved and it's safe to make a call.
  1547		// Open up a call frame, moving the stack if necessary.
  1548		//
  1549		// Once the frame is allocated, this will set AX to 0 and
  1550		// invoke INT3. The debugger should write the argument
  1551		// frame for the call at SP, push the trapping PC on the
  1552		// stack, set the PC to the function to call, set RCX to point
  1553		// to the closure (if a closure call), and resume execution.
  1554		//
  1555		// If the function returns, this will set AX to 1 and invoke
  1556		// INT3. The debugger can then inspect any return value saved
  1557		// on the stack at SP and resume execution again.
  1558		//
  1559		// If the function panics, this will set AX to 2 and invoke INT3.
  1560		// The interface{} value of the panic will be at SP. The debugger
  1561		// can inspect the panic value and resume execution again.
  1562	#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1563		CMPQ	AX, $MAXSIZE;			\
  1564		JA	5(PC);				\
  1565		MOVQ	$NAME(SB), AX;			\
  1566		MOVQ	AX, 0(SP);			\
  1567		CALL	runtime·debugCallWrap(SB);	\
  1568		JMP	restore
  1569	
  1570		MOVQ	frameSize-128(SP), AX
  1571		DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1572		DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1573		DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1574		DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1575		DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1576		DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1577		DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1578		DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1579		DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1580		DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1581		DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1582		DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1583		// The frame size is too large. Report the error.
  1584		MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1585		MOVQ	AX, 0(SP)
  1586		MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  1587		MOVQ	$8, AX
  1588		BYTE	$0xcc
  1589		JMP	restore
  1590	
  1591	restore:
  1592		// Calls and failures resume here.
  1593		//
  1594		// Set AX to 16 and invoke INT3. The debugger should restore
  1595		// all registers except RIP and RSP and resume execution.
  1596		MOVQ	$16, AX
  1597		BYTE	$0xcc
  1598		// We must not modify flags after this point.
  1599	
  1600		// Restore pointer-containing registers, which may have been
  1601		// modified from the debugger's copy by stack copying.
  1602		MOVQ	ax-(0*8+8)(SP), AX
  1603		MOVQ	cx-(1*8+8)(SP), CX
  1604		MOVQ	dx-(2*8+8)(SP), DX
  1605		MOVQ	bx-(3*8+8)(SP), BX
  1606		MOVQ	bp-(4*8+8)(SP), BP
  1607		MOVQ	si-(5*8+8)(SP), SI
  1608		MOVQ	di-(6*8+8)(SP), DI
  1609		MOVQ	r8-(7*8+8)(SP), R8
  1610		MOVQ	r9-(8*8+8)(SP), R9
  1611		MOVQ	r10-(9*8+8)(SP), R10
  1612		MOVQ	r11-(10*8+8)(SP), R11
  1613		MOVQ	r12-(11*8+8)(SP), R12
  1614		MOVQ	r13-(12*8+8)(SP), R13
  1615		MOVQ	r14-(13*8+8)(SP), R14
  1616		MOVQ	r15-(14*8+8)(SP), R15
  1617	
  1618		RET
  1619	
  1620	// runtime.debugCallCheck assumes that functions defined with the
  1621	// DEBUG_CALL_FN macro are safe points to inject calls.
  1622	#define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1623	TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1624		NO_LOCAL_POINTERS;			\
  1625		MOVQ	$0, AX;				\
  1626		BYTE	$0xcc;				\
  1627		MOVQ	$1, AX;				\
  1628		BYTE	$0xcc;				\
  1629		RET
  1630	DEBUG_CALL_FN(debugCall32<>, 32)
  1631	DEBUG_CALL_FN(debugCall64<>, 64)
  1632	DEBUG_CALL_FN(debugCall128<>, 128)
  1633	DEBUG_CALL_FN(debugCall256<>, 256)
  1634	DEBUG_CALL_FN(debugCall512<>, 512)
  1635	DEBUG_CALL_FN(debugCall1024<>, 1024)
  1636	DEBUG_CALL_FN(debugCall2048<>, 2048)
  1637	DEBUG_CALL_FN(debugCall4096<>, 4096)
  1638	DEBUG_CALL_FN(debugCall8192<>, 8192)
  1639	DEBUG_CALL_FN(debugCall16384<>, 16384)
  1640	DEBUG_CALL_FN(debugCall32768<>, 32768)
  1641	DEBUG_CALL_FN(debugCall65536<>, 65536)
  1642	
  1643	// func debugCallPanicked(val interface{})
  1644	TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1645		// Copy the panic value to the top of stack.
  1646		MOVQ	val_type+0(FP), AX
  1647		MOVQ	AX, 0(SP)
  1648		MOVQ	val_data+8(FP), AX
  1649		MOVQ	AX, 8(SP)
  1650		MOVQ	$2, AX
  1651		BYTE	$0xcc
  1652		RET
  1653	
  1654	// Note: these functions use a special calling convention to save generated code space.
  1655	// Arguments are passed in registers, but the space for those arguments are allocated
  1656	// in the caller's stack frame. These stubs write the args into that stack space and
  1657	// then tail call to the corresponding runtime handler.
  1658	// The tail call makes these stubs disappear in backtraces.
  1659	TEXT runtime·panicIndex(SB),NOSPLIT,$0-16
  1660		MOVQ	AX, x+0(FP)
  1661		MOVQ	CX, y+8(FP)
  1662		JMP	runtime·goPanicIndex(SB)
  1663	TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16
  1664		MOVQ	AX, x+0(FP)
  1665		MOVQ	CX, y+8(FP)
  1666		JMP	runtime·goPanicIndexU(SB)
  1667	TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16
  1668		MOVQ	CX, x+0(FP)
  1669		MOVQ	DX, y+8(FP)
  1670		JMP	runtime·goPanicSliceAlen(SB)
  1671	TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16
  1672		MOVQ	CX, x+0(FP)
  1673		MOVQ	DX, y+8(FP)
  1674		JMP	runtime·goPanicSliceAlenU(SB)
  1675	TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16
  1676		MOVQ	CX, x+0(FP)
  1677		MOVQ	DX, y+8(FP)
  1678		JMP	runtime·goPanicSliceAcap(SB)
  1679	TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16
  1680		MOVQ	CX, x+0(FP)
  1681		MOVQ	DX, y+8(FP)
  1682		JMP	runtime·goPanicSliceAcapU(SB)
  1683	TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16
  1684		MOVQ	AX, x+0(FP)
  1685		MOVQ	CX, y+8(FP)
  1686		JMP	runtime·goPanicSliceB(SB)
  1687	TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16
  1688		MOVQ	AX, x+0(FP)
  1689		MOVQ	CX, y+8(FP)
  1690		JMP	runtime·goPanicSliceBU(SB)
  1691	TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16
  1692		MOVQ	DX, x+0(FP)
  1693		MOVQ	BX, y+8(FP)
  1694		JMP	runtime·goPanicSlice3Alen(SB)
  1695	TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16
  1696		MOVQ	DX, x+0(FP)
  1697		MOVQ	BX, y+8(FP)
  1698		JMP	runtime·goPanicSlice3AlenU(SB)
  1699	TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16
  1700		MOVQ	DX, x+0(FP)
  1701		MOVQ	BX, y+8(FP)
  1702		JMP	runtime·goPanicSlice3Acap(SB)
  1703	TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16
  1704		MOVQ	DX, x+0(FP)
  1705		MOVQ	BX, y+8(FP)
  1706		JMP	runtime·goPanicSlice3AcapU(SB)
  1707	TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16
  1708		MOVQ	CX, x+0(FP)
  1709		MOVQ	DX, y+8(FP)
  1710		JMP	runtime·goPanicSlice3B(SB)
  1711	TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16
  1712		MOVQ	CX, x+0(FP)
  1713		MOVQ	DX, y+8(FP)
  1714		JMP	runtime·goPanicSlice3BU(SB)
  1715	TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16
  1716		MOVQ	AX, x+0(FP)
  1717		MOVQ	CX, y+8(FP)
  1718		JMP	runtime·goPanicSlice3C(SB)
  1719	TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16
  1720		MOVQ	AX, x+0(FP)
  1721		MOVQ	CX, y+8(FP)
  1722		JMP	runtime·goPanicSlice3CU(SB)
  1723	
  1724	#ifdef GOOS_android
  1725	// Use the free TLS_SLOT_APP slot #2 on Android Q.
  1726	// Earlier androids are set up in gcc_android.c.
  1727	DATA runtime·tls_g+0(SB)/8, $16
  1728	GLOBL runtime·tls_g+0(SB), NOPTR, $8
  1729	#endif

View as plain text