...

Text file src/runtime/asm_386.s

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	// _rt0_386 is common startup code for most 386 systems when using
    11	// internal linking. This is the entry point for the program from the
    12	// kernel for an ordinary -buildmode=exe program. The stack holds the
    13	// number of arguments and the C-style argv.
    14	TEXT _rt0_386(SB),NOSPLIT,$8
    15		MOVL	8(SP), AX	// argc
    16		LEAL	12(SP), BX	// argv
    17		MOVL	AX, 0(SP)
    18		MOVL	BX, 4(SP)
    19		JMP	runtime·rt0_go(SB)
    20	
    21	// _rt0_386_lib is common startup code for most 386 systems when
    22	// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    23	// arrange to invoke this function as a global constructor (for
    24	// c-archive) or when the shared library is loaded (for c-shared).
    25	// We expect argc and argv to be passed on the stack following the
    26	// usual C ABI.
    27	TEXT _rt0_386_lib(SB),NOSPLIT,$0
    28		PUSHL	BP
    29		MOVL	SP, BP
    30		PUSHL	BX
    31		PUSHL	SI
    32		PUSHL	DI
    33	
    34		MOVL	8(BP), AX
    35		MOVL	AX, _rt0_386_lib_argc<>(SB)
    36		MOVL	12(BP), AX
    37		MOVL	AX, _rt0_386_lib_argv<>(SB)
    38	
    39		// Synchronous initialization.
    40		CALL	runtime·libpreinit(SB)
    41	
    42		SUBL	$8, SP
    43	
    44		// Create a new thread to do the runtime initialization.
    45		MOVL	_cgo_sys_thread_create(SB), AX
    46		TESTL	AX, AX
    47		JZ	nocgo
    48	
    49		// Align stack to call C function.
    50		// We moved SP to BP above, but BP was clobbered by the libpreinit call.
    51		MOVL	SP, BP
    52		ANDL	$~15, SP
    53	
    54		MOVL	$_rt0_386_lib_go(SB), BX
    55		MOVL	BX, 0(SP)
    56		MOVL	$0, 4(SP)
    57	
    58		CALL	AX
    59	
    60		MOVL	BP, SP
    61	
    62		JMP	restore
    63	
    64	nocgo:
    65		MOVL	$0x800000, 0(SP)                    // stacksize = 8192KB
    66		MOVL	$_rt0_386_lib_go(SB), AX
    67		MOVL	AX, 4(SP)                           // fn
    68		CALL	runtime·newosproc0(SB)
    69	
    70	restore:
    71		ADDL	$8, SP
    72		POPL	DI
    73		POPL	SI
    74		POPL	BX
    75		POPL	BP
    76		RET
    77	
    78	// _rt0_386_lib_go initializes the Go runtime.
    79	// This is started in a separate thread by _rt0_386_lib.
    80	TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
    81		MOVL	_rt0_386_lib_argc<>(SB), AX
    82		MOVL	AX, 0(SP)
    83		MOVL	_rt0_386_lib_argv<>(SB), AX
    84		MOVL	AX, 4(SP)
    85		JMP	runtime·rt0_go(SB)
    86	
    87	DATA _rt0_386_lib_argc<>(SB)/4, $0
    88	GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
    89	DATA _rt0_386_lib_argv<>(SB)/4, $0
    90	GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
    91	
    92	TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME,$0
    93		// Copy arguments forward on an even stack.
    94		// Users of this function jump to it, they don't call it.
    95		MOVL	0(SP), AX
    96		MOVL	4(SP), BX
    97		SUBL	$128, SP		// plenty of scratch
    98		ANDL	$~15, SP
    99		MOVL	AX, 120(SP)		// save argc, argv away
   100		MOVL	BX, 124(SP)
   101	
   102		// set default stack bounds.
   103		// _cgo_init may update stackguard.
   104		MOVL	$runtime·g0(SB), BP
   105		LEAL	(-64*1024+104)(SP), BX
   106		MOVL	BX, g_stackguard0(BP)
   107		MOVL	BX, g_stackguard1(BP)
   108		MOVL	BX, (g_stack+stack_lo)(BP)
   109		MOVL	SP, (g_stack+stack_hi)(BP)
   110	
   111		// find out information about the processor we're on
   112	#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
   113		JMP 	has_cpuid
   114	#else
   115		// first see if CPUID instruction is supported.
   116		PUSHFL
   117		PUSHFL
   118		XORL	$(1<<21), 0(SP) // flip ID bit
   119		POPFL
   120		PUSHFL
   121		POPL	AX
   122		XORL	0(SP), AX
   123		POPFL	// restore EFLAGS
   124		TESTL	$(1<<21), AX
   125		JNE 	has_cpuid
   126	#endif
   127	
   128	bad_proc: // show that the program requires MMX.
   129		MOVL	$2, 0(SP)
   130		MOVL	$bad_proc_msg<>(SB), 4(SP)
   131		MOVL	$0x3d, 8(SP)
   132		CALL	runtime·write(SB)
   133		MOVL	$1, 0(SP)
   134		CALL	runtime·exit(SB)
   135		CALL	runtime·abort(SB)
   136	
   137	has_cpuid:
   138		MOVL	$0, AX
   139		CPUID
   140		MOVL	AX, SI
   141		CMPL	AX, $0
   142		JE	nocpuinfo
   143	
   144		// Figure out how to serialize RDTSC.
   145		// On Intel processors LFENCE is enough. AMD requires MFENCE.
   146		// Don't know about the rest, so let's do MFENCE.
   147		CMPL	BX, $0x756E6547  // "Genu"
   148		JNE	notintel
   149		CMPL	DX, $0x49656E69  // "ineI"
   150		JNE	notintel
   151		CMPL	CX, $0x6C65746E  // "ntel"
   152		JNE	notintel
   153		MOVB	$1, runtime·isIntel(SB)
   154		MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   155	notintel:
   156	
   157		// Load EAX=1 cpuid flags
   158		MOVL	$1, AX
   159		CPUID
   160		MOVL	CX, DI // Move to global variable clobbers CX when generating PIC
   161		MOVL	AX, runtime·processorVersionInfo(SB)
   162	
   163		// Check for MMX support
   164		TESTL	$(1<<23), DX // MMX
   165		JZ	bad_proc
   166	
   167	nocpuinfo:
   168		// if there is an _cgo_init, call it to let it
   169		// initialize and to set up GS.  if not,
   170		// we set up GS ourselves.
   171		MOVL	_cgo_init(SB), AX
   172		TESTL	AX, AX
   173		JZ	needtls
   174	#ifdef GOOS_android
   175		// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   176		// Compensate for tls_g (+8).
   177		MOVL	-8(TLS), BX
   178		MOVL	BX, 12(SP)
   179		MOVL	$runtime·tls_g(SB), 8(SP)	// arg 3: &tls_g
   180	#else
   181		MOVL	$0, BX
   182		MOVL	BX, 12(SP)	// arg 3,4: not used when using platform's TLS
   183		MOVL	BX, 8(SP)
   184	#endif
   185		MOVL	$setg_gcc<>(SB), BX
   186		MOVL	BX, 4(SP)	// arg 2: setg_gcc
   187		MOVL	BP, 0(SP)	// arg 1: g0
   188		CALL	AX
   189	
   190		// update stackguard after _cgo_init
   191		MOVL	$runtime·g0(SB), CX
   192		MOVL	(g_stack+stack_lo)(CX), AX
   193		ADDL	$const__StackGuard, AX
   194		MOVL	AX, g_stackguard0(CX)
   195		MOVL	AX, g_stackguard1(CX)
   196	
   197	#ifndef GOOS_windows
   198		// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
   199		JMP ok
   200	#endif
   201	needtls:
   202	#ifdef GOOS_plan9
   203		// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
   204		JMP	ok
   205	#endif
   206	#ifdef GOOS_darwin
   207		// skip runtime·ldt0setup(SB) on Darwin
   208		JMP	ok
   209	#endif
   210	
   211		// set up %gs
   212		CALL	ldt0setup<>(SB)
   213	
   214		// store through it, to make sure it works
   215		get_tls(BX)
   216		MOVL	$0x123, g(BX)
   217		MOVL	runtime·m0+m_tls(SB), AX
   218		CMPL	AX, $0x123
   219		JEQ	ok
   220		MOVL	AX, 0	// abort
   221	ok:
   222		// set up m and g "registers"
   223		get_tls(BX)
   224		LEAL	runtime·g0(SB), DX
   225		MOVL	DX, g(BX)
   226		LEAL	runtime·m0(SB), AX
   227	
   228		// save m->g0 = g0
   229		MOVL	DX, m_g0(AX)
   230		// save g0->m = m0
   231		MOVL	AX, g_m(DX)
   232	
   233		CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   234	
   235		// convention is D is always cleared
   236		CLD
   237	
   238		CALL	runtime·check(SB)
   239	
   240		// saved argc, argv
   241		MOVL	120(SP), AX
   242		MOVL	AX, 0(SP)
   243		MOVL	124(SP), AX
   244		MOVL	AX, 4(SP)
   245		CALL	runtime·args(SB)
   246		CALL	runtime·osinit(SB)
   247		CALL	runtime·schedinit(SB)
   248	
   249		// create a new goroutine to start program
   250		PUSHL	$runtime·mainPC(SB)	// entry
   251		PUSHL	$0	// arg size
   252		CALL	runtime·newproc(SB)
   253		POPL	AX
   254		POPL	AX
   255	
   256		// start this M
   257		CALL	runtime·mstart(SB)
   258	
   259		CALL	runtime·abort(SB)
   260		RET
   261	
   262	DATA	bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
   263	GLOBL	bad_proc_msg<>(SB), RODATA, $61
   264	
   265	DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   266	GLOBL	runtime·mainPC(SB),RODATA,$4
   267	
   268	TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   269		INT $3
   270		RET
   271	
   272	TEXT runtime·asminit(SB),NOSPLIT,$0-0
   273		// Linux and MinGW start the FPU in extended double precision.
   274		// Other operating systems use double precision.
   275		// Change to double precision to match them,
   276		// and to match other hardware that only has double.
   277		FLDCW	runtime·controlWord64(SB)
   278		RET
   279	
   280	/*
   281	 *  go-routine
   282	 */
   283	
   284	// void gosave(Gobuf*)
   285	// save state in Gobuf; setjmp
   286	TEXT runtime·gosave(SB), NOSPLIT, $0-4
   287		MOVL	buf+0(FP), AX		// gobuf
   288		LEAL	buf+0(FP), BX		// caller's SP
   289		MOVL	BX, gobuf_sp(AX)
   290		MOVL	0(SP), BX		// caller's PC
   291		MOVL	BX, gobuf_pc(AX)
   292		MOVL	$0, gobuf_ret(AX)
   293		// Assert ctxt is zero. See func save.
   294		MOVL	gobuf_ctxt(AX), BX
   295		TESTL	BX, BX
   296		JZ	2(PC)
   297		CALL	runtime·badctxt(SB)
   298		get_tls(CX)
   299		MOVL	g(CX), BX
   300		MOVL	BX, gobuf_g(AX)
   301		RET
   302	
   303	// void gogo(Gobuf*)
   304	// restore state from Gobuf; longjmp
   305	TEXT runtime·gogo(SB), NOSPLIT, $8-4
   306		MOVL	buf+0(FP), BX		// gobuf
   307		MOVL	gobuf_g(BX), DX
   308		MOVL	0(DX), CX		// make sure g != nil
   309		get_tls(CX)
   310		MOVL	DX, g(CX)
   311		MOVL	gobuf_sp(BX), SP	// restore SP
   312		MOVL	gobuf_ret(BX), AX
   313		MOVL	gobuf_ctxt(BX), DX
   314		MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   315		MOVL	$0, gobuf_ret(BX)
   316		MOVL	$0, gobuf_ctxt(BX)
   317		MOVL	gobuf_pc(BX), BX
   318		JMP	BX
   319	
   320	// func mcall(fn func(*g))
   321	// Switch to m->g0's stack, call fn(g).
   322	// Fn must never return. It should gogo(&g->sched)
   323	// to keep running g.
   324	TEXT runtime·mcall(SB), NOSPLIT, $0-4
   325		MOVL	fn+0(FP), DI
   326	
   327		get_tls(DX)
   328		MOVL	g(DX), AX	// save state in g->sched
   329		MOVL	0(SP), BX	// caller's PC
   330		MOVL	BX, (g_sched+gobuf_pc)(AX)
   331		LEAL	fn+0(FP), BX	// caller's SP
   332		MOVL	BX, (g_sched+gobuf_sp)(AX)
   333		MOVL	AX, (g_sched+gobuf_g)(AX)
   334	
   335		// switch to m->g0 & its stack, call fn
   336		MOVL	g(DX), BX
   337		MOVL	g_m(BX), BX
   338		MOVL	m_g0(BX), SI
   339		CMPL	SI, AX	// if g == m->g0 call badmcall
   340		JNE	3(PC)
   341		MOVL	$runtime·badmcall(SB), AX
   342		JMP	AX
   343		MOVL	SI, g(DX)	// g = m->g0
   344		MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   345		PUSHL	AX
   346		MOVL	DI, DX
   347		MOVL	0(DI), DI
   348		CALL	DI
   349		POPL	AX
   350		MOVL	$runtime·badmcall2(SB), AX
   351		JMP	AX
   352		RET
   353	
   354	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   355	// of the G stack. We need to distinguish the routine that
   356	// lives at the bottom of the G stack from the one that lives
   357	// at the top of the system stack because the one at the top of
   358	// the system stack terminates the stack walk (see topofstack()).
   359	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   360		RET
   361	
   362	// func systemstack(fn func())
   363	TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   364		MOVL	fn+0(FP), DI	// DI = fn
   365		get_tls(CX)
   366		MOVL	g(CX), AX	// AX = g
   367		MOVL	g_m(AX), BX	// BX = m
   368	
   369		CMPL	AX, m_gsignal(BX)
   370		JEQ	noswitch
   371	
   372		MOVL	m_g0(BX), DX	// DX = g0
   373		CMPL	AX, DX
   374		JEQ	noswitch
   375	
   376		CMPL	AX, m_curg(BX)
   377		JNE	bad
   378	
   379		// switch stacks
   380		// save our state in g->sched. Pretend to
   381		// be systemstack_switch if the G stack is scanned.
   382		MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   383		MOVL	SP, (g_sched+gobuf_sp)(AX)
   384		MOVL	AX, (g_sched+gobuf_g)(AX)
   385	
   386		// switch to g0
   387		get_tls(CX)
   388		MOVL	DX, g(CX)
   389		MOVL	(g_sched+gobuf_sp)(DX), BX
   390		// make it look like mstart called systemstack on g0, to stop traceback
   391		SUBL	$4, BX
   392		MOVL	$runtime·mstart(SB), DX
   393		MOVL	DX, 0(BX)
   394		MOVL	BX, SP
   395	
   396		// call target function
   397		MOVL	DI, DX
   398		MOVL	0(DI), DI
   399		CALL	DI
   400	
   401		// switch back to g
   402		get_tls(CX)
   403		MOVL	g(CX), AX
   404		MOVL	g_m(AX), BX
   405		MOVL	m_curg(BX), AX
   406		MOVL	AX, g(CX)
   407		MOVL	(g_sched+gobuf_sp)(AX), SP
   408		MOVL	$0, (g_sched+gobuf_sp)(AX)
   409		RET
   410	
   411	noswitch:
   412		// already on system stack; tail call the function
   413		// Using a tail call here cleans up tracebacks since we won't stop
   414		// at an intermediate systemstack.
   415		MOVL	DI, DX
   416		MOVL	0(DI), DI
   417		JMP	DI
   418	
   419	bad:
   420		// Bad: g is not gsignal, not g0, not curg. What is it?
   421		// Hide call from linker nosplit analysis.
   422		MOVL	$runtime·badsystemstack(SB), AX
   423		CALL	AX
   424		INT	$3
   425	
   426	/*
   427	 * support for morestack
   428	 */
   429	
   430	// Called during function prolog when more stack is needed.
   431	//
   432	// The traceback routines see morestack on a g0 as being
   433	// the top of a stack (for example, morestack calling newstack
   434	// calling the scheduler calling newm calling gc), so we must
   435	// record an argument size. For that purpose, it has no arguments.
   436	TEXT runtime·morestack(SB),NOSPLIT,$0-0
   437		// Cannot grow scheduler stack (m->g0).
   438		get_tls(CX)
   439		MOVL	g(CX), BX
   440		MOVL	g_m(BX), BX
   441		MOVL	m_g0(BX), SI
   442		CMPL	g(CX), SI
   443		JNE	3(PC)
   444		CALL	runtime·badmorestackg0(SB)
   445		CALL	runtime·abort(SB)
   446	
   447		// Cannot grow signal stack.
   448		MOVL	m_gsignal(BX), SI
   449		CMPL	g(CX), SI
   450		JNE	3(PC)
   451		CALL	runtime·badmorestackgsignal(SB)
   452		CALL	runtime·abort(SB)
   453	
   454		// Called from f.
   455		// Set m->morebuf to f's caller.
   456		NOP	SP	// tell vet SP changed - stop checking offsets
   457		MOVL	4(SP), DI	// f's caller's PC
   458		MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   459		LEAL	8(SP), CX	// f's caller's SP
   460		MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   461		get_tls(CX)
   462		MOVL	g(CX), SI
   463		MOVL	SI, (m_morebuf+gobuf_g)(BX)
   464	
   465		// Set g->sched to context in f.
   466		MOVL	0(SP), AX	// f's PC
   467		MOVL	AX, (g_sched+gobuf_pc)(SI)
   468		MOVL	SI, (g_sched+gobuf_g)(SI)
   469		LEAL	4(SP), AX	// f's SP
   470		MOVL	AX, (g_sched+gobuf_sp)(SI)
   471		MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   472	
   473		// Call newstack on m->g0's stack.
   474		MOVL	m_g0(BX), BP
   475		MOVL	BP, g(CX)
   476		MOVL	(g_sched+gobuf_sp)(BP), AX
   477		MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   478		MOVL	AX, SP
   479		CALL	runtime·newstack(SB)
   480		CALL	runtime·abort(SB)	// crash if newstack returns
   481		RET
   482	
   483	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   484		MOVL	$0, DX
   485		JMP runtime·morestack(SB)
   486	
   487	// reflectcall: call a function with the given argument list
   488	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   489	// we don't have variable-sized frames, so we use a small number
   490	// of constant-sized-frame functions to encode a few bits of size in the pc.
   491	// Caution: ugly multiline assembly macros in your future!
   492	
   493	#define DISPATCH(NAME,MAXSIZE)		\
   494		CMPL	CX, $MAXSIZE;		\
   495		JA	3(PC);			\
   496		MOVL	$NAME(SB), AX;		\
   497		JMP	AX
   498	// Note: can't just "JMP NAME(SB)" - bad inlining results.
   499	
   500	TEXT ·reflectcall(SB), NOSPLIT, $0-20
   501		MOVL	argsize+12(FP), CX
   502		DISPATCH(runtime·call16, 16)
   503		DISPATCH(runtime·call32, 32)
   504		DISPATCH(runtime·call64, 64)
   505		DISPATCH(runtime·call128, 128)
   506		DISPATCH(runtime·call256, 256)
   507		DISPATCH(runtime·call512, 512)
   508		DISPATCH(runtime·call1024, 1024)
   509		DISPATCH(runtime·call2048, 2048)
   510		DISPATCH(runtime·call4096, 4096)
   511		DISPATCH(runtime·call8192, 8192)
   512		DISPATCH(runtime·call16384, 16384)
   513		DISPATCH(runtime·call32768, 32768)
   514		DISPATCH(runtime·call65536, 65536)
   515		DISPATCH(runtime·call131072, 131072)
   516		DISPATCH(runtime·call262144, 262144)
   517		DISPATCH(runtime·call524288, 524288)
   518		DISPATCH(runtime·call1048576, 1048576)
   519		DISPATCH(runtime·call2097152, 2097152)
   520		DISPATCH(runtime·call4194304, 4194304)
   521		DISPATCH(runtime·call8388608, 8388608)
   522		DISPATCH(runtime·call16777216, 16777216)
   523		DISPATCH(runtime·call33554432, 33554432)
   524		DISPATCH(runtime·call67108864, 67108864)
   525		DISPATCH(runtime·call134217728, 134217728)
   526		DISPATCH(runtime·call268435456, 268435456)
   527		DISPATCH(runtime·call536870912, 536870912)
   528		DISPATCH(runtime·call1073741824, 1073741824)
   529		MOVL	$runtime·badreflectcall(SB), AX
   530		JMP	AX
   531	
   532	#define CALLFN(NAME,MAXSIZE)			\
   533	TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   534		NO_LOCAL_POINTERS;			\
   535		/* copy arguments to stack */		\
   536		MOVL	argptr+8(FP), SI;		\
   537		MOVL	argsize+12(FP), CX;		\
   538		MOVL	SP, DI;				\
   539		REP;MOVSB;				\
   540		/* call function */			\
   541		MOVL	f+4(FP), DX;			\
   542		MOVL	(DX), AX; 			\
   543		PCDATA  $PCDATA_StackMapIndex, $0;	\
   544		CALL	AX;				\
   545		/* copy return values back */		\
   546		MOVL	argtype+0(FP), DX;		\
   547		MOVL	argptr+8(FP), DI;		\
   548		MOVL	argsize+12(FP), CX;		\
   549		MOVL	retoffset+16(FP), BX;		\
   550		MOVL	SP, SI;				\
   551		ADDL	BX, DI;				\
   552		ADDL	BX, SI;				\
   553		SUBL	BX, CX;				\
   554		CALL	callRet<>(SB);			\
   555		RET
   556	
   557	// callRet copies return values back at the end of call*. This is a
   558	// separate function so it can allocate stack space for the arguments
   559	// to reflectcallmove. It does not follow the Go ABI; it expects its
   560	// arguments in registers.
   561	TEXT callRet<>(SB), NOSPLIT, $16-0
   562		MOVL	DX, 0(SP)
   563		MOVL	DI, 4(SP)
   564		MOVL	SI, 8(SP)
   565		MOVL	CX, 12(SP)
   566		CALL	runtime·reflectcallmove(SB)
   567		RET
   568	
   569	CALLFN(·call16, 16)
   570	CALLFN(·call32, 32)
   571	CALLFN(·call64, 64)
   572	CALLFN(·call128, 128)
   573	CALLFN(·call256, 256)
   574	CALLFN(·call512, 512)
   575	CALLFN(·call1024, 1024)
   576	CALLFN(·call2048, 2048)
   577	CALLFN(·call4096, 4096)
   578	CALLFN(·call8192, 8192)
   579	CALLFN(·call16384, 16384)
   580	CALLFN(·call32768, 32768)
   581	CALLFN(·call65536, 65536)
   582	CALLFN(·call131072, 131072)
   583	CALLFN(·call262144, 262144)
   584	CALLFN(·call524288, 524288)
   585	CALLFN(·call1048576, 1048576)
   586	CALLFN(·call2097152, 2097152)
   587	CALLFN(·call4194304, 4194304)
   588	CALLFN(·call8388608, 8388608)
   589	CALLFN(·call16777216, 16777216)
   590	CALLFN(·call33554432, 33554432)
   591	CALLFN(·call67108864, 67108864)
   592	CALLFN(·call134217728, 134217728)
   593	CALLFN(·call268435456, 268435456)
   594	CALLFN(·call536870912, 536870912)
   595	CALLFN(·call1073741824, 1073741824)
   596	
   597	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   598		MOVL	cycles+0(FP), AX
   599	again:
   600		PAUSE
   601		SUBL	$1, AX
   602		JNZ	again
   603		RET
   604	
   605	TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   606		// Stores are already ordered on x86, so this is just a
   607		// compile barrier.
   608		RET
   609	
   610	// void jmpdefer(fn, sp);
   611	// called from deferreturn.
   612	// 1. pop the caller
   613	// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
   614	//    return (when building for shared libraries, subtract 16 bytes -- 5 bytes
   615	//    for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
   616	//    LEAL to load the offset into BX, and finally 5 for the call & displacement)
   617	// 3. jmp to the argument
   618	TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   619		MOVL	fv+0(FP), DX	// fn
   620		MOVL	argp+4(FP), BX	// caller sp
   621		LEAL	-4(BX), SP	// caller sp after CALL
   622	#ifdef GOBUILDMODE_shared
   623		SUBL	$16, (SP)	// return to CALL again
   624	#else
   625		SUBL	$5, (SP)	// return to CALL again
   626	#endif
   627		MOVL	0(DX), BX
   628		JMP	BX	// but first run the deferred function
   629	
   630	// Save state of caller into g->sched.
   631	TEXT gosave<>(SB),NOSPLIT,$0
   632		PUSHL	AX
   633		PUSHL	BX
   634		get_tls(BX)
   635		MOVL	g(BX), BX
   636		LEAL	arg+0(FP), AX
   637		MOVL	AX, (g_sched+gobuf_sp)(BX)
   638		MOVL	-4(AX), AX
   639		MOVL	AX, (g_sched+gobuf_pc)(BX)
   640		MOVL	$0, (g_sched+gobuf_ret)(BX)
   641		// Assert ctxt is zero. See func save.
   642		MOVL	(g_sched+gobuf_ctxt)(BX), AX
   643		TESTL	AX, AX
   644		JZ	2(PC)
   645		CALL	runtime·badctxt(SB)
   646		POPL	BX
   647		POPL	AX
   648		RET
   649	
   650	// func asmcgocall(fn, arg unsafe.Pointer) int32
   651	// Call fn(arg) on the scheduler stack,
   652	// aligned appropriately for the gcc ABI.
   653	// See cgocall.go for more details.
   654	TEXT ·asmcgocall(SB),NOSPLIT,$0-12
   655		MOVL	fn+0(FP), AX
   656		MOVL	arg+4(FP), BX
   657	
   658		MOVL	SP, DX
   659	
   660		// Figure out if we need to switch to m->g0 stack.
   661		// We get called to create new OS threads too, and those
   662		// come in on the m->g0 stack already.
   663		get_tls(CX)
   664		MOVL	g(CX), BP
   665		CMPL	BP, $0
   666		JEQ	nosave	// Don't even have a G yet.
   667		MOVL	g_m(BP), BP
   668		MOVL	m_g0(BP), SI
   669		MOVL	g(CX), DI
   670		CMPL	SI, DI
   671		JEQ	noswitch
   672		CMPL	DI, m_gsignal(BP)
   673		JEQ	noswitch
   674		CALL	gosave<>(SB)
   675		get_tls(CX)
   676		MOVL	SI, g(CX)
   677		MOVL	(g_sched+gobuf_sp)(SI), SP
   678	
   679	noswitch:
   680		// Now on a scheduling stack (a pthread-created stack).
   681		SUBL	$32, SP
   682		ANDL	$~15, SP	// alignment, perhaps unnecessary
   683		MOVL	DI, 8(SP)	// save g
   684		MOVL	(g_stack+stack_hi)(DI), DI
   685		SUBL	DX, DI
   686		MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   687		MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   688		CALL	AX
   689	
   690		// Restore registers, g, stack pointer.
   691		get_tls(CX)
   692		MOVL	8(SP), DI
   693		MOVL	(g_stack+stack_hi)(DI), SI
   694		SUBL	4(SP), SI
   695		MOVL	DI, g(CX)
   696		MOVL	SI, SP
   697	
   698		MOVL	AX, ret+8(FP)
   699		RET
   700	nosave:
   701		// Now on a scheduling stack (a pthread-created stack).
   702		SUBL	$32, SP
   703		ANDL	$~15, SP	// alignment, perhaps unnecessary
   704		MOVL	DX, 4(SP)	// save original stack pointer
   705		MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   706		CALL	AX
   707	
   708		MOVL	4(SP), CX	// restore original stack pointer
   709		MOVL	CX, SP
   710		MOVL	AX, ret+8(FP)
   711		RET
   712	
   713	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   714	// Turn the fn into a Go func (by taking its address) and call
   715	// cgocallback_gofunc.
   716	TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
   717		LEAL	fn+0(FP), AX
   718		MOVL	AX, 0(SP)
   719		MOVL	frame+4(FP), AX
   720		MOVL	AX, 4(SP)
   721		MOVL	framesize+8(FP), AX
   722		MOVL	AX, 8(SP)
   723		MOVL	ctxt+12(FP), AX
   724		MOVL	AX, 12(SP)
   725		MOVL	$runtime·cgocallback_gofunc(SB), AX
   726		CALL	AX
   727		RET
   728	
   729	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   730	// See cgocall.go for more details.
   731	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
   732		NO_LOCAL_POINTERS
   733	
   734		// If g is nil, Go did not create the current thread.
   735		// Call needm to obtain one for temporary use.
   736		// In this case, we're running on the thread stack, so there's
   737		// lots of space, but the linker doesn't know. Hide the call from
   738		// the linker analysis by using an indirect call through AX.
   739		get_tls(CX)
   740	#ifdef GOOS_windows
   741		MOVL	$0, BP
   742		CMPL	CX, $0
   743		JEQ	2(PC) // TODO
   744	#endif
   745		MOVL	g(CX), BP
   746		CMPL	BP, $0
   747		JEQ	needm
   748		MOVL	g_m(BP), BP
   749		MOVL	BP, DX // saved copy of oldm
   750		JMP	havem
   751	needm:
   752		MOVL	$0, 0(SP)
   753		MOVL	$runtime·needm(SB), AX
   754		CALL	AX
   755		MOVL	0(SP), DX
   756		get_tls(CX)
   757		MOVL	g(CX), BP
   758		MOVL	g_m(BP), BP
   759	
   760		// Set m->sched.sp = SP, so that if a panic happens
   761		// during the function we are about to execute, it will
   762		// have a valid SP to run on the g0 stack.
   763		// The next few lines (after the havem label)
   764		// will save this SP onto the stack and then write
   765		// the same SP back to m->sched.sp. That seems redundant,
   766		// but if an unrecovered panic happens, unwindm will
   767		// restore the g->sched.sp from the stack location
   768		// and then systemstack will try to use it. If we don't set it here,
   769		// that restored SP will be uninitialized (typically 0) and
   770		// will not be usable.
   771		MOVL	m_g0(BP), SI
   772		MOVL	SP, (g_sched+gobuf_sp)(SI)
   773	
   774	havem:
   775		// Now there's a valid m, and we're running on its m->g0.
   776		// Save current m->g0->sched.sp on stack and then set it to SP.
   777		// Save current sp in m->g0->sched.sp in preparation for
   778		// switch back to m->curg stack.
   779		// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   780		MOVL	m_g0(BP), SI
   781		MOVL	(g_sched+gobuf_sp)(SI), AX
   782		MOVL	AX, 0(SP)
   783		MOVL	SP, (g_sched+gobuf_sp)(SI)
   784	
   785		// Switch to m->curg stack and call runtime.cgocallbackg.
   786		// Because we are taking over the execution of m->curg
   787		// but *not* resuming what had been running, we need to
   788		// save that information (m->curg->sched) so we can restore it.
   789		// We can restore m->curg->sched.sp easily, because calling
   790		// runtime.cgocallbackg leaves SP unchanged upon return.
   791		// To save m->curg->sched.pc, we push it onto the stack.
   792		// This has the added benefit that it looks to the traceback
   793		// routine like cgocallbackg is going to return to that
   794		// PC (because the frame we allocate below has the same
   795		// size as cgocallback_gofunc's frame declared above)
   796		// so that the traceback will seamlessly trace back into
   797		// the earlier calls.
   798		//
   799		// In the new goroutine, 4(SP) holds the saved oldm (DX) register.
   800		// 8(SP) is unused.
   801		MOVL	m_curg(BP), SI
   802		MOVL	SI, g(CX)
   803		MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   804		MOVL	(g_sched+gobuf_pc)(SI), BP
   805		MOVL	BP, -4(DI)
   806		MOVL	ctxt+12(FP), CX
   807		LEAL	-(4+12)(DI), SP
   808		MOVL	DX, 4(SP)
   809		MOVL	CX, 0(SP)
   810		CALL	runtime·cgocallbackg(SB)
   811		MOVL	4(SP), DX
   812	
   813		// Restore g->sched (== m->curg->sched) from saved values.
   814		get_tls(CX)
   815		MOVL	g(CX), SI
   816		MOVL	12(SP), BP
   817		MOVL	BP, (g_sched+gobuf_pc)(SI)
   818		LEAL	(12+4)(SP), DI
   819		MOVL	DI, (g_sched+gobuf_sp)(SI)
   820	
   821		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   822		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   823		// so we do not have to restore it.)
   824		MOVL	g(CX), BP
   825		MOVL	g_m(BP), BP
   826		MOVL	m_g0(BP), SI
   827		MOVL	SI, g(CX)
   828		MOVL	(g_sched+gobuf_sp)(SI), SP
   829		MOVL	0(SP), AX
   830		MOVL	AX, (g_sched+gobuf_sp)(SI)
   831	
   832		// If the m on entry was nil, we called needm above to borrow an m
   833		// for the duration of the call. Since the call is over, return it with dropm.
   834		CMPL	DX, $0
   835		JNE 3(PC)
   836		MOVL	$runtime·dropm(SB), AX
   837		CALL	AX
   838	
   839		// Done!
   840		RET
   841	
   842	// void setg(G*); set g. for use by needm.
   843	TEXT runtime·setg(SB), NOSPLIT, $0-4
   844		MOVL	gg+0(FP), BX
   845	#ifdef GOOS_windows
   846		CMPL	BX, $0
   847		JNE	settls
   848		MOVL	$0, 0x14(FS)
   849		RET
   850	settls:
   851		MOVL	g_m(BX), AX
   852		LEAL	m_tls(AX), AX
   853		MOVL	AX, 0x14(FS)
   854	#endif
   855		get_tls(CX)
   856		MOVL	BX, g(CX)
   857		RET
   858	
   859	// void setg_gcc(G*); set g. for use by gcc
   860	TEXT setg_gcc<>(SB), NOSPLIT, $0
   861		get_tls(AX)
   862		MOVL	gg+0(FP), DX
   863		MOVL	DX, g(AX)
   864		RET
   865	
   866	TEXT runtime·abort(SB),NOSPLIT,$0-0
   867		INT	$3
   868	loop:
   869		JMP	loop
   870	
   871	// check that SP is in range [g->stack.lo, g->stack.hi)
   872	TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   873		get_tls(CX)
   874		MOVL	g(CX), AX
   875		CMPL	(g_stack+stack_hi)(AX), SP
   876		JHI	2(PC)
   877		CALL	runtime·abort(SB)
   878		CMPL	SP, (g_stack+stack_lo)(AX)
   879		JHI	2(PC)
   880		CALL	runtime·abort(SB)
   881		RET
   882	
   883	// func cputicks() int64
   884	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   885		CMPB	internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
   886		JNE	done
   887		CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   888		JNE	mfence
   889		LFENCE
   890		JMP	done
   891	mfence:
   892		MFENCE
   893	done:
   894		RDTSC
   895		MOVL	AX, ret_lo+0(FP)
   896		MOVL	DX, ret_hi+4(FP)
   897		RET
   898	
   899	TEXT ldt0setup<>(SB),NOSPLIT,$16-0
   900		// set up ldt 7 to point at m0.tls
   901		// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   902		// the entry number is just a hint.  setldt will set up GS with what it used.
   903		MOVL	$7, 0(SP)
   904		LEAL	runtime·m0+m_tls(SB), AX
   905		MOVL	AX, 4(SP)
   906		MOVL	$32, 8(SP)	// sizeof(tls array)
   907		CALL	runtime·setldt(SB)
   908		RET
   909	
   910	TEXT runtime·emptyfunc(SB),0,$0-0
   911		RET
   912	
   913	// hash function using AES hardware instructions
   914	TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   915		MOVL	p+0(FP), AX	// ptr to data
   916		MOVL	s+8(FP), BX	// size
   917		LEAL	ret+12(FP), DX
   918		JMP	aeshashbody<>(SB)
   919	
   920	TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   921		MOVL	p+0(FP), AX	// ptr to string object
   922		MOVL	4(AX), BX	// length of string
   923		MOVL	(AX), AX	// string data
   924		LEAL	ret+8(FP), DX
   925		JMP	aeshashbody<>(SB)
   926	
   927	// AX: data
   928	// BX: length
   929	// DX: address to put return value
   930	TEXT aeshashbody<>(SB),NOSPLIT,$0-0
   931		MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
   932		PINSRW	$4, BX, X0	            // 16 bits of length
   933		PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
   934		MOVO	X0, X1                      // save unscrambled seed
   935		PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
   936		AESENC	X0, X0                      // scramble seed
   937	
   938		CMPL	BX, $16
   939		JB	aes0to15
   940		JE	aes16
   941		CMPL	BX, $32
   942		JBE	aes17to32
   943		CMPL	BX, $64
   944		JBE	aes33to64
   945		JMP	aes65plus
   946	
   947	aes0to15:
   948		TESTL	BX, BX
   949		JE	aes0
   950	
   951		ADDL	$16, AX
   952		TESTW	$0xff0, AX
   953		JE	endofpage
   954	
   955		// 16 bytes loaded at this address won't cross
   956		// a page boundary, so we can load it directly.
   957		MOVOU	-16(AX), X1
   958		ADDL	BX, BX
   959		PAND	masks<>(SB)(BX*8), X1
   960	
   961	final1:
   962		AESENC	X0, X1  // scramble input, xor in seed
   963		AESENC	X1, X1  // scramble combo 2 times
   964		AESENC	X1, X1
   965		MOVL	X1, (DX)
   966		RET
   967	
   968	endofpage:
   969		// address ends in 1111xxxx. Might be up against
   970		// a page boundary, so load ending at last byte.
   971		// Then shift bytes down using pshufb.
   972		MOVOU	-32(AX)(BX*1), X1
   973		ADDL	BX, BX
   974		PSHUFB	shifts<>(SB)(BX*8), X1
   975		JMP	final1
   976	
   977	aes0:
   978		// Return scrambled input seed
   979		AESENC	X0, X0
   980		MOVL	X0, (DX)
   981		RET
   982	
   983	aes16:
   984		MOVOU	(AX), X1
   985		JMP	final1
   986	
   987	aes17to32:
   988		// make second starting seed
   989		PXOR	runtime·aeskeysched+16(SB), X1
   990		AESENC	X1, X1
   991	
   992		// load data to be hashed
   993		MOVOU	(AX), X2
   994		MOVOU	-16(AX)(BX*1), X3
   995	
   996		// scramble 3 times
   997		AESENC	X0, X2
   998		AESENC	X1, X3
   999		AESENC	X2, X2
  1000		AESENC	X3, X3
  1001		AESENC	X2, X2
  1002		AESENC	X3, X3
  1003	
  1004		// combine results
  1005		PXOR	X3, X2
  1006		MOVL	X2, (DX)
  1007		RET
  1008	
  1009	aes33to64:
  1010		// make 3 more starting seeds
  1011		MOVO	X1, X2
  1012		MOVO	X1, X3
  1013		PXOR	runtime·aeskeysched+16(SB), X1
  1014		PXOR	runtime·aeskeysched+32(SB), X2
  1015		PXOR	runtime·aeskeysched+48(SB), X3
  1016		AESENC	X1, X1
  1017		AESENC	X2, X2
  1018		AESENC	X3, X3
  1019	
  1020		MOVOU	(AX), X4
  1021		MOVOU	16(AX), X5
  1022		MOVOU	-32(AX)(BX*1), X6
  1023		MOVOU	-16(AX)(BX*1), X7
  1024	
  1025		AESENC	X0, X4
  1026		AESENC	X1, X5
  1027		AESENC	X2, X6
  1028		AESENC	X3, X7
  1029	
  1030		AESENC	X4, X4
  1031		AESENC	X5, X5
  1032		AESENC	X6, X6
  1033		AESENC	X7, X7
  1034	
  1035		AESENC	X4, X4
  1036		AESENC	X5, X5
  1037		AESENC	X6, X6
  1038		AESENC	X7, X7
  1039	
  1040		PXOR	X6, X4
  1041		PXOR	X7, X5
  1042		PXOR	X5, X4
  1043		MOVL	X4, (DX)
  1044		RET
  1045	
  1046	aes65plus:
  1047		// make 3 more starting seeds
  1048		MOVO	X1, X2
  1049		MOVO	X1, X3
  1050		PXOR	runtime·aeskeysched+16(SB), X1
  1051		PXOR	runtime·aeskeysched+32(SB), X2
  1052		PXOR	runtime·aeskeysched+48(SB), X3
  1053		AESENC	X1, X1
  1054		AESENC	X2, X2
  1055		AESENC	X3, X3
  1056	
  1057		// start with last (possibly overlapping) block
  1058		MOVOU	-64(AX)(BX*1), X4
  1059		MOVOU	-48(AX)(BX*1), X5
  1060		MOVOU	-32(AX)(BX*1), X6
  1061		MOVOU	-16(AX)(BX*1), X7
  1062	
  1063		// scramble state once
  1064		AESENC	X0, X4
  1065		AESENC	X1, X5
  1066		AESENC	X2, X6
  1067		AESENC	X3, X7
  1068	
  1069		// compute number of remaining 64-byte blocks
  1070		DECL	BX
  1071		SHRL	$6, BX
  1072	
  1073	aesloop:
  1074		// scramble state, xor in a block
  1075		MOVOU	(AX), X0
  1076		MOVOU	16(AX), X1
  1077		MOVOU	32(AX), X2
  1078		MOVOU	48(AX), X3
  1079		AESENC	X0, X4
  1080		AESENC	X1, X5
  1081		AESENC	X2, X6
  1082		AESENC	X3, X7
  1083	
  1084		// scramble state
  1085		AESENC	X4, X4
  1086		AESENC	X5, X5
  1087		AESENC	X6, X6
  1088		AESENC	X7, X7
  1089	
  1090		ADDL	$64, AX
  1091		DECL	BX
  1092		JNE	aesloop
  1093	
  1094		// 2 more scrambles to finish
  1095		AESENC	X4, X4
  1096		AESENC	X5, X5
  1097		AESENC	X6, X6
  1098		AESENC	X7, X7
  1099	
  1100		AESENC	X4, X4
  1101		AESENC	X5, X5
  1102		AESENC	X6, X6
  1103		AESENC	X7, X7
  1104	
  1105		PXOR	X6, X4
  1106		PXOR	X7, X5
  1107		PXOR	X5, X4
  1108		MOVL	X4, (DX)
  1109		RET
  1110	
  1111	TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1112		MOVL	p+0(FP), AX	// ptr to data
  1113		MOVL	h+4(FP), X0	// seed
  1114		PINSRD	$1, (AX), X0	// data
  1115		AESENC	runtime·aeskeysched+0(SB), X0
  1116		AESENC	runtime·aeskeysched+16(SB), X0
  1117		AESENC	runtime·aeskeysched+32(SB), X0
  1118		MOVL	X0, ret+8(FP)
  1119		RET
  1120	
  1121	TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1122		MOVL	p+0(FP), AX	// ptr to data
  1123		MOVQ	(AX), X0	// data
  1124		PINSRD	$2, h+4(FP), X0	// seed
  1125		AESENC	runtime·aeskeysched+0(SB), X0
  1126		AESENC	runtime·aeskeysched+16(SB), X0
  1127		AESENC	runtime·aeskeysched+32(SB), X0
  1128		MOVL	X0, ret+8(FP)
  1129		RET
  1130	
  1131	// simple mask to get rid of data in the high part of the register.
  1132	DATA masks<>+0x00(SB)/4, $0x00000000
  1133	DATA masks<>+0x04(SB)/4, $0x00000000
  1134	DATA masks<>+0x08(SB)/4, $0x00000000
  1135	DATA masks<>+0x0c(SB)/4, $0x00000000
  1136	
  1137	DATA masks<>+0x10(SB)/4, $0x000000ff
  1138	DATA masks<>+0x14(SB)/4, $0x00000000
  1139	DATA masks<>+0x18(SB)/4, $0x00000000
  1140	DATA masks<>+0x1c(SB)/4, $0x00000000
  1141	
  1142	DATA masks<>+0x20(SB)/4, $0x0000ffff
  1143	DATA masks<>+0x24(SB)/4, $0x00000000
  1144	DATA masks<>+0x28(SB)/4, $0x00000000
  1145	DATA masks<>+0x2c(SB)/4, $0x00000000
  1146	
  1147	DATA masks<>+0x30(SB)/4, $0x00ffffff
  1148	DATA masks<>+0x34(SB)/4, $0x00000000
  1149	DATA masks<>+0x38(SB)/4, $0x00000000
  1150	DATA masks<>+0x3c(SB)/4, $0x00000000
  1151	
  1152	DATA masks<>+0x40(SB)/4, $0xffffffff
  1153	DATA masks<>+0x44(SB)/4, $0x00000000
  1154	DATA masks<>+0x48(SB)/4, $0x00000000
  1155	DATA masks<>+0x4c(SB)/4, $0x00000000
  1156	
  1157	DATA masks<>+0x50(SB)/4, $0xffffffff
  1158	DATA masks<>+0x54(SB)/4, $0x000000ff
  1159	DATA masks<>+0x58(SB)/4, $0x00000000
  1160	DATA masks<>+0x5c(SB)/4, $0x00000000
  1161	
  1162	DATA masks<>+0x60(SB)/4, $0xffffffff
  1163	DATA masks<>+0x64(SB)/4, $0x0000ffff
  1164	DATA masks<>+0x68(SB)/4, $0x00000000
  1165	DATA masks<>+0x6c(SB)/4, $0x00000000
  1166	
  1167	DATA masks<>+0x70(SB)/4, $0xffffffff
  1168	DATA masks<>+0x74(SB)/4, $0x00ffffff
  1169	DATA masks<>+0x78(SB)/4, $0x00000000
  1170	DATA masks<>+0x7c(SB)/4, $0x00000000
  1171	
  1172	DATA masks<>+0x80(SB)/4, $0xffffffff
  1173	DATA masks<>+0x84(SB)/4, $0xffffffff
  1174	DATA masks<>+0x88(SB)/4, $0x00000000
  1175	DATA masks<>+0x8c(SB)/4, $0x00000000
  1176	
  1177	DATA masks<>+0x90(SB)/4, $0xffffffff
  1178	DATA masks<>+0x94(SB)/4, $0xffffffff
  1179	DATA masks<>+0x98(SB)/4, $0x000000ff
  1180	DATA masks<>+0x9c(SB)/4, $0x00000000
  1181	
  1182	DATA masks<>+0xa0(SB)/4, $0xffffffff
  1183	DATA masks<>+0xa4(SB)/4, $0xffffffff
  1184	DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1185	DATA masks<>+0xac(SB)/4, $0x00000000
  1186	
  1187	DATA masks<>+0xb0(SB)/4, $0xffffffff
  1188	DATA masks<>+0xb4(SB)/4, $0xffffffff
  1189	DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1190	DATA masks<>+0xbc(SB)/4, $0x00000000
  1191	
  1192	DATA masks<>+0xc0(SB)/4, $0xffffffff
  1193	DATA masks<>+0xc4(SB)/4, $0xffffffff
  1194	DATA masks<>+0xc8(SB)/4, $0xffffffff
  1195	DATA masks<>+0xcc(SB)/4, $0x00000000
  1196	
  1197	DATA masks<>+0xd0(SB)/4, $0xffffffff
  1198	DATA masks<>+0xd4(SB)/4, $0xffffffff
  1199	DATA masks<>+0xd8(SB)/4, $0xffffffff
  1200	DATA masks<>+0xdc(SB)/4, $0x000000ff
  1201	
  1202	DATA masks<>+0xe0(SB)/4, $0xffffffff
  1203	DATA masks<>+0xe4(SB)/4, $0xffffffff
  1204	DATA masks<>+0xe8(SB)/4, $0xffffffff
  1205	DATA masks<>+0xec(SB)/4, $0x0000ffff
  1206	
  1207	DATA masks<>+0xf0(SB)/4, $0xffffffff
  1208	DATA masks<>+0xf4(SB)/4, $0xffffffff
  1209	DATA masks<>+0xf8(SB)/4, $0xffffffff
  1210	DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1211	
  1212	GLOBL masks<>(SB),RODATA,$256
  1213	
  1214	// these are arguments to pshufb. They move data down from
  1215	// the high bytes of the register to the low bytes of the register.
  1216	// index is how many bytes to move.
  1217	DATA shifts<>+0x00(SB)/4, $0x00000000
  1218	DATA shifts<>+0x04(SB)/4, $0x00000000
  1219	DATA shifts<>+0x08(SB)/4, $0x00000000
  1220	DATA shifts<>+0x0c(SB)/4, $0x00000000
  1221	
  1222	DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1223	DATA shifts<>+0x14(SB)/4, $0xffffffff
  1224	DATA shifts<>+0x18(SB)/4, $0xffffffff
  1225	DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1226	
  1227	DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1228	DATA shifts<>+0x24(SB)/4, $0xffffffff
  1229	DATA shifts<>+0x28(SB)/4, $0xffffffff
  1230	DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1231	
  1232	DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1233	DATA shifts<>+0x34(SB)/4, $0xffffffff
  1234	DATA shifts<>+0x38(SB)/4, $0xffffffff
  1235	DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1236	
  1237	DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1238	DATA shifts<>+0x44(SB)/4, $0xffffffff
  1239	DATA shifts<>+0x48(SB)/4, $0xffffffff
  1240	DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1241	
  1242	DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1243	DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1244	DATA shifts<>+0x58(SB)/4, $0xffffffff
  1245	DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1246	
  1247	DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1248	DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1249	DATA shifts<>+0x68(SB)/4, $0xffffffff
  1250	DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1251	
  1252	DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1253	DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1254	DATA shifts<>+0x78(SB)/4, $0xffffffff
  1255	DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1256	
  1257	DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1258	DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1259	DATA shifts<>+0x88(SB)/4, $0xffffffff
  1260	DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1261	
  1262	DATA shifts<>+0x90(SB)/4, $0x0a090807
  1263	DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1264	DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1265	DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1266	
  1267	DATA shifts<>+0xa0(SB)/4, $0x09080706
  1268	DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1269	DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1270	DATA shifts<>+0xac(SB)/4, $0xffffffff
  1271	
  1272	DATA shifts<>+0xb0(SB)/4, $0x08070605
  1273	DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1274	DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1275	DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1276	
  1277	DATA shifts<>+0xc0(SB)/4, $0x07060504
  1278	DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1279	DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1280	DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1281	
  1282	DATA shifts<>+0xd0(SB)/4, $0x06050403
  1283	DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1284	DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1285	DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1286	
  1287	DATA shifts<>+0xe0(SB)/4, $0x05040302
  1288	DATA shifts<>+0xe4(SB)/4, $0x09080706
  1289	DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1290	DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1291	
  1292	DATA shifts<>+0xf0(SB)/4, $0x04030201
  1293	DATA shifts<>+0xf4(SB)/4, $0x08070605
  1294	DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1295	DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1296	
  1297	GLOBL shifts<>(SB),RODATA,$256
  1298	
  1299	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1300		// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1301		MOVL	$masks<>(SB), AX
  1302		MOVL	$shifts<>(SB), BX
  1303		ORL	BX, AX
  1304		TESTL	$15, AX
  1305		SETEQ	ret+0(FP)
  1306		RET
  1307	
  1308	TEXT runtime·return0(SB), NOSPLIT, $0
  1309		MOVL	$0, AX
  1310		RET
  1311	
  1312	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1313	// Must obey the gcc calling convention.
  1314	TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1315		get_tls(CX)
  1316		MOVL	g(CX), AX
  1317		MOVL	g_m(AX), AX
  1318		MOVL	m_curg(AX), AX
  1319		MOVL	(g_stack+stack_hi)(AX), AX
  1320		RET
  1321	
  1322	// The top-most function running on a goroutine
  1323	// returns to goexit+PCQuantum.
  1324	TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1325		BYTE	$0x90	// NOP
  1326		CALL	runtime·goexit1(SB)	// does not return
  1327		// traceback from goexit1 must hit code range of goexit
  1328		BYTE	$0x90	// NOP
  1329	
  1330	// Add a module's moduledata to the linked list of moduledata objects. This
  1331	// is called from .init_array by a function generated in the linker and so
  1332	// follows the platform ABI wrt register preservation -- it only touches AX,
  1333	// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
  1334	// instead the pointer to the moduledata is passed in AX.
  1335	TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1336		MOVL	runtime·lastmoduledatap(SB), DX
  1337		MOVL	AX, moduledata_next(DX)
  1338		MOVL	AX, runtime·lastmoduledatap(SB)
  1339		RET
  1340	
  1341	TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
  1342		MOVL	a+0(FP), AX
  1343		MOVL	AX, 0(SP)
  1344		MOVL	$0, 4(SP)
  1345		FMOVV	0(SP), F0
  1346		FMOVDP	F0, ret+4(FP)
  1347		RET
  1348	
  1349	TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
  1350		FMOVD	a+0(FP), F0
  1351		FSTCW	0(SP)
  1352		FLDCW	runtime·controlWord64trunc(SB)
  1353		FMOVVP	F0, 4(SP)
  1354		FLDCW	0(SP)
  1355		MOVL	4(SP), AX
  1356		MOVL	AX, ret+8(FP)
  1357		RET
  1358	
  1359	// gcWriteBarrier performs a heap pointer write and informs the GC.
  1360	//
  1361	// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1362	// - DI is the destination of the write
  1363	// - AX is the value being written at DI
  1364	// It clobbers FLAGS. It does not clobber any general-purpose registers,
  1365	// but may clobber others (e.g., SSE registers).
  1366	TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28
  1367		// Save the registers clobbered by the fast path. This is slightly
  1368		// faster than having the caller spill these.
  1369		MOVL	CX, 20(SP)
  1370		MOVL	BX, 24(SP)
  1371		// TODO: Consider passing g.m.p in as an argument so they can be shared
  1372		// across a sequence of write barriers.
  1373		get_tls(BX)
  1374		MOVL	g(BX), BX
  1375		MOVL	g_m(BX), BX
  1376		MOVL	m_p(BX), BX
  1377		MOVL	(p_wbBuf+wbBuf_next)(BX), CX
  1378		// Increment wbBuf.next position.
  1379		LEAL	8(CX), CX
  1380		MOVL	CX, (p_wbBuf+wbBuf_next)(BX)
  1381		CMPL	CX, (p_wbBuf+wbBuf_end)(BX)
  1382		// Record the write.
  1383		MOVL	AX, -8(CX)	// Record value
  1384		MOVL	(DI), BX	// TODO: This turns bad writes into bad reads.
  1385		MOVL	BX, -4(CX)	// Record *slot
  1386		// Is the buffer full? (flags set in CMPL above)
  1387		JEQ	flush
  1388	ret:
  1389		MOVL	20(SP), CX
  1390		MOVL	24(SP), BX
  1391		// Do the write.
  1392		MOVL	AX, (DI)
  1393		RET
  1394	
  1395	flush:
  1396		// Save all general purpose registers since these could be
  1397		// clobbered by wbBufFlush and were not saved by the caller.
  1398		MOVL	DI, 0(SP)	// Also first argument to wbBufFlush
  1399		MOVL	AX, 4(SP)	// Also second argument to wbBufFlush
  1400		// BX already saved
  1401		// CX already saved
  1402		MOVL	DX, 8(SP)
  1403		MOVL	BP, 12(SP)
  1404		MOVL	SI, 16(SP)
  1405		// DI already saved
  1406	
  1407		// This takes arguments DI and AX
  1408		CALL	runtime·wbBufFlush(SB)
  1409	
  1410		MOVL	0(SP), DI
  1411		MOVL	4(SP), AX
  1412		MOVL	8(SP), DX
  1413		MOVL	12(SP), BP
  1414		MOVL	16(SP), SI
  1415		JMP	ret
  1416	
  1417	// Note: these functions use a special calling convention to save generated code space.
  1418	// Arguments are passed in registers, but the space for those arguments are allocated
  1419	// in the caller's stack frame. These stubs write the args into that stack space and
  1420	// then tail call to the corresponding runtime handler.
  1421	// The tail call makes these stubs disappear in backtraces.
  1422	TEXT runtime·panicIndex(SB),NOSPLIT,$0-8
  1423		MOVL	AX, x+0(FP)
  1424		MOVL	CX, y+4(FP)
  1425		JMP	runtime·goPanicIndex(SB)
  1426	TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8
  1427		MOVL	AX, x+0(FP)
  1428		MOVL	CX, y+4(FP)
  1429		JMP	runtime·goPanicIndexU(SB)
  1430	TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8
  1431		MOVL	CX, x+0(FP)
  1432		MOVL	DX, y+4(FP)
  1433		JMP	runtime·goPanicSliceAlen(SB)
  1434	TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8
  1435		MOVL	CX, x+0(FP)
  1436		MOVL	DX, y+4(FP)
  1437		JMP	runtime·goPanicSliceAlenU(SB)
  1438	TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8
  1439		MOVL	CX, x+0(FP)
  1440		MOVL	DX, y+4(FP)
  1441		JMP	runtime·goPanicSliceAcap(SB)
  1442	TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8
  1443		MOVL	CX, x+0(FP)
  1444		MOVL	DX, y+4(FP)
  1445		JMP	runtime·goPanicSliceAcapU(SB)
  1446	TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8
  1447		MOVL	AX, x+0(FP)
  1448		MOVL	CX, y+4(FP)
  1449		JMP	runtime·goPanicSliceB(SB)
  1450	TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8
  1451		MOVL	AX, x+0(FP)
  1452		MOVL	CX, y+4(FP)
  1453		JMP	runtime·goPanicSliceBU(SB)
  1454	TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8
  1455		MOVL	DX, x+0(FP)
  1456		MOVL	BX, y+4(FP)
  1457		JMP	runtime·goPanicSlice3Alen(SB)
  1458	TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8
  1459		MOVL	DX, x+0(FP)
  1460		MOVL	BX, y+4(FP)
  1461		JMP	runtime·goPanicSlice3AlenU(SB)
  1462	TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8
  1463		MOVL	DX, x+0(FP)
  1464		MOVL	BX, y+4(FP)
  1465		JMP	runtime·goPanicSlice3Acap(SB)
  1466	TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8
  1467		MOVL	DX, x+0(FP)
  1468		MOVL	BX, y+4(FP)
  1469		JMP	runtime·goPanicSlice3AcapU(SB)
  1470	TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8
  1471		MOVL	CX, x+0(FP)
  1472		MOVL	DX, y+4(FP)
  1473		JMP	runtime·goPanicSlice3B(SB)
  1474	TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8
  1475		MOVL	CX, x+0(FP)
  1476		MOVL	DX, y+4(FP)
  1477		JMP	runtime·goPanicSlice3BU(SB)
  1478	TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8
  1479		MOVL	AX, x+0(FP)
  1480		MOVL	CX, y+4(FP)
  1481		JMP	runtime·goPanicSlice3C(SB)
  1482	TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8
  1483		MOVL	AX, x+0(FP)
  1484		MOVL	CX, y+4(FP)
  1485		JMP	runtime·goPanicSlice3CU(SB)
  1486	
  1487	// Extended versions for 64-bit indexes.
  1488	TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12
  1489		MOVL	SI, hi+0(FP)
  1490		MOVL	AX, lo+4(FP)
  1491		MOVL	CX, y+8(FP)
  1492		JMP	runtime·goPanicExtendIndex(SB)
  1493	TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12
  1494		MOVL	SI, hi+0(FP)
  1495		MOVL	AX, lo+4(FP)
  1496		MOVL	CX, y+8(FP)
  1497		JMP	runtime·goPanicExtendIndexU(SB)
  1498	TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12
  1499		MOVL	SI, hi+0(FP)
  1500		MOVL	CX, lo+4(FP)
  1501		MOVL	DX, y+8(FP)
  1502		JMP	runtime·goPanicExtendSliceAlen(SB)
  1503	TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12
  1504		MOVL	SI, hi+0(FP)
  1505		MOVL	CX, lo+4(FP)
  1506		MOVL	DX, y+8(FP)
  1507		JMP	runtime·goPanicExtendSliceAlenU(SB)
  1508	TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12
  1509		MOVL	SI, hi+0(FP)
  1510		MOVL	CX, lo+4(FP)
  1511		MOVL	DX, y+8(FP)
  1512		JMP	runtime·goPanicExtendSliceAcap(SB)
  1513	TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12
  1514		MOVL	SI, hi+0(FP)
  1515		MOVL	CX, lo+4(FP)
  1516		MOVL	DX, y+8(FP)
  1517		JMP	runtime·goPanicExtendSliceAcapU(SB)
  1518	TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12
  1519		MOVL	SI, hi+0(FP)
  1520		MOVL	AX, lo+4(FP)
  1521		MOVL	CX, y+8(FP)
  1522		JMP	runtime·goPanicExtendSliceB(SB)
  1523	TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12
  1524		MOVL	SI, hi+0(FP)
  1525		MOVL	AX, lo+4(FP)
  1526		MOVL	CX, y+8(FP)
  1527		JMP	runtime·goPanicExtendSliceBU(SB)
  1528	TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12
  1529		MOVL	SI, hi+0(FP)
  1530		MOVL	DX, lo+4(FP)
  1531		MOVL	BX, y+8(FP)
  1532		JMP	runtime·goPanicExtendSlice3Alen(SB)
  1533	TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12
  1534		MOVL	SI, hi+0(FP)
  1535		MOVL	DX, lo+4(FP)
  1536		MOVL	BX, y+8(FP)
  1537		JMP	runtime·goPanicExtendSlice3AlenU(SB)
  1538	TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12
  1539		MOVL	SI, hi+0(FP)
  1540		MOVL	DX, lo+4(FP)
  1541		MOVL	BX, y+8(FP)
  1542		JMP	runtime·goPanicExtendSlice3Acap(SB)
  1543	TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12
  1544		MOVL	SI, hi+0(FP)
  1545		MOVL	DX, lo+4(FP)
  1546		MOVL	BX, y+8(FP)
  1547		JMP	runtime·goPanicExtendSlice3AcapU(SB)
  1548	TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12
  1549		MOVL	SI, hi+0(FP)
  1550		MOVL	CX, lo+4(FP)
  1551		MOVL	DX, y+8(FP)
  1552		JMP	runtime·goPanicExtendSlice3B(SB)
  1553	TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12
  1554		MOVL	SI, hi+0(FP)
  1555		MOVL	CX, lo+4(FP)
  1556		MOVL	DX, y+8(FP)
  1557		JMP	runtime·goPanicExtendSlice3BU(SB)
  1558	TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12
  1559		MOVL	SI, hi+0(FP)
  1560		MOVL	AX, lo+4(FP)
  1561		MOVL	CX, y+8(FP)
  1562		JMP	runtime·goPanicExtendSlice3C(SB)
  1563	TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12
  1564		MOVL	SI, hi+0(FP)
  1565		MOVL	AX, lo+4(FP)
  1566		MOVL	CX, y+8(FP)
  1567		JMP	runtime·goPanicExtendSlice3CU(SB)
  1568	
  1569	#ifdef GOOS_android
  1570	// Use the free TLS_SLOT_APP slot #2 on Android Q.
  1571	// Earlier androids are set up in gcc_android.c.
  1572	DATA runtime·tls_g+0(SB)/4, $8
  1573	GLOBL runtime·tls_g+0(SB), NOPTR, $4
  1574	#endif

View as plain text