...

Text file src/runtime/race_amd64.s

     1	// Copyright 2013 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build race
     6	
     7	#include "go_asm.h"
     8	#include "go_tls.h"
     9	#include "funcdata.h"
    10	#include "textflag.h"
    11	
    12	// The following thunks allow calling the gcc-compiled race runtime directly
    13	// from Go code without going all the way through cgo.
    14	// First, it's much faster (up to 50% speedup for real Go programs).
    15	// Second, it eliminates race-related special cases from cgocall and scheduler.
    16	// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17	
    18	// A brief recap of the amd64 calling convention.
    19	// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    20	// Callee-saved registers are: BX, BP, R12-R15.
    21	// SP must be 16-byte aligned.
    22	// On Windows:
    23	// Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    24	// Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    25	// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    26	// https://msdn.microsoft.com/en-us/library/ms235286.aspx
    27	// We do not do this, because it seems to be intended for vararg/unprototyped functions.
    28	// Gcc-compiled race runtime does not try to use that space.
    29	
    30	#ifdef GOOS_windows
    31	#define RARG0 CX
    32	#define RARG1 DX
    33	#define RARG2 R8
    34	#define RARG3 R9
    35	#else
    36	#define RARG0 DI
    37	#define RARG1 SI
    38	#define RARG2 DX
    39	#define RARG3 CX
    40	#endif
    41	
    42	// func runtime·raceread(addr uintptr)
    43	// Called from instrumented code.
    44	TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    45		MOVQ	addr+0(FP), RARG1
    46		MOVQ	(SP), RARG2
    47		// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    48		MOVQ	$__tsan_read(SB), AX
    49		JMP	racecalladdr<>(SB)
    50	
    51	// func runtime·RaceRead(addr uintptr)
    52	TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    53		// This needs to be a tail call, because raceread reads caller pc.
    54		JMP	runtime·raceread(SB)
    55	
    56	// void runtime·racereadpc(void *addr, void *callpc, void *pc)
    57	TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    58		MOVQ	addr+0(FP), RARG1
    59		MOVQ	callpc+8(FP), RARG2
    60		MOVQ	pc+16(FP), RARG3
    61		ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    62		// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    63		MOVQ	$__tsan_read_pc(SB), AX
    64		JMP	racecalladdr<>(SB)
    65	
    66	// func runtime·racewrite(addr uintptr)
    67	// Called from instrumented code.
    68	TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    69		MOVQ	addr+0(FP), RARG1
    70		MOVQ	(SP), RARG2
    71		// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    72		MOVQ	$__tsan_write(SB), AX
    73		JMP	racecalladdr<>(SB)
    74	
    75	// func runtime·RaceWrite(addr uintptr)
    76	TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    77		// This needs to be a tail call, because racewrite reads caller pc.
    78		JMP	runtime·racewrite(SB)
    79	
    80	// void runtime·racewritepc(void *addr, void *callpc, void *pc)
    81	TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    82		MOVQ	addr+0(FP), RARG1
    83		MOVQ	callpc+8(FP), RARG2
    84		MOVQ	pc+16(FP), RARG3
    85		ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    86		// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    87		MOVQ	$__tsan_write_pc(SB), AX
    88		JMP	racecalladdr<>(SB)
    89	
    90	// func runtime·racereadrange(addr, size uintptr)
    91	// Called from instrumented code.
    92	TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    93		MOVQ	addr+0(FP), RARG1
    94		MOVQ	size+8(FP), RARG2
    95		MOVQ	(SP), RARG3
    96		// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    97		MOVQ	$__tsan_read_range(SB), AX
    98		JMP	racecalladdr<>(SB)
    99	
   100	// func runtime·RaceReadRange(addr, size uintptr)
   101	TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   102		// This needs to be a tail call, because racereadrange reads caller pc.
   103		JMP	runtime·racereadrange(SB)
   104	
   105	// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   106	TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   107		MOVQ	addr+0(FP), RARG1
   108		MOVQ	size+8(FP), RARG2
   109		MOVQ	pc+16(FP), RARG3
   110		ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   111		// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   112		MOVQ	$__tsan_read_range(SB), AX
   113		JMP	racecalladdr<>(SB)
   114	
   115	// func runtime·racewriterange(addr, size uintptr)
   116	// Called from instrumented code.
   117	TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   118		MOVQ	addr+0(FP), RARG1
   119		MOVQ	size+8(FP), RARG2
   120		MOVQ	(SP), RARG3
   121		// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   122		MOVQ	$__tsan_write_range(SB), AX
   123		JMP	racecalladdr<>(SB)
   124	
   125	// func runtime·RaceWriteRange(addr, size uintptr)
   126	TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   127		// This needs to be a tail call, because racewriterange reads caller pc.
   128		JMP	runtime·racewriterange(SB)
   129	
   130	// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   131	TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   132		MOVQ	addr+0(FP), RARG1
   133		MOVQ	size+8(FP), RARG2
   134		MOVQ	pc+16(FP), RARG3
   135		ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   136		// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   137		MOVQ	$__tsan_write_range(SB), AX
   138		JMP	racecalladdr<>(SB)
   139	
   140	// If addr (RARG1) is out of range, do nothing.
   141	// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   142	TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   143		get_tls(R12)
   144		MOVQ	g(R12), R14
   145		MOVQ	g_racectx(R14), RARG0	// goroutine context
   146		// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   147		CMPQ	RARG1, runtime·racearenastart(SB)
   148		JB	data
   149		CMPQ	RARG1, runtime·racearenaend(SB)
   150		JB	call
   151	data:
   152		CMPQ	RARG1, runtime·racedatastart(SB)
   153		JB	ret
   154		CMPQ	RARG1, runtime·racedataend(SB)
   155		JAE	ret
   156	call:
   157		MOVQ	AX, AX		// w/o this 6a miscompiles this function
   158		JMP	racecall<>(SB)
   159	ret:
   160		RET
   161	
   162	// func runtime·racefuncenterfp(fp uintptr)
   163	// Called from instrumented code.
   164	// Like racefuncenter but passes FP, not PC
   165	TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-8
   166		MOVQ	fp+0(FP), R11
   167		MOVQ	-8(R11), R11
   168		JMP	racefuncenter<>(SB)
   169	
   170	// func runtime·racefuncenter(pc uintptr)
   171	// Called from instrumented code.
   172	TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   173		MOVQ	callpc+0(FP), R11
   174		JMP	racefuncenter<>(SB)
   175	
   176	// Common code for racefuncenter/racefuncenterfp
   177	// R11 = caller's return address
   178	TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   179		MOVQ	DX, R15		// save function entry context (for closures)
   180		get_tls(R12)
   181		MOVQ	g(R12), R14
   182		MOVQ	g_racectx(R14), RARG0	// goroutine context
   183		MOVQ	R11, RARG1
   184		// void __tsan_func_enter(ThreadState *thr, void *pc);
   185		MOVQ	$__tsan_func_enter(SB), AX
   186		// racecall<> preserves R15
   187		CALL	racecall<>(SB)
   188		MOVQ	R15, DX	// restore function entry context
   189		RET
   190	
   191	// func runtime·racefuncexit()
   192	// Called from instrumented code.
   193	TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   194		get_tls(R12)
   195		MOVQ	g(R12), R14
   196		MOVQ	g_racectx(R14), RARG0	// goroutine context
   197		// void __tsan_func_exit(ThreadState *thr);
   198		MOVQ	$__tsan_func_exit(SB), AX
   199		JMP	racecall<>(SB)
   200	
   201	// Atomic operations for sync/atomic package.
   202	
   203	// Load
   204	TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
   205		MOVQ	$__tsan_go_atomic32_load(SB), AX
   206		CALL	racecallatomic<>(SB)
   207		RET
   208	
   209	TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
   210		MOVQ	$__tsan_go_atomic64_load(SB), AX
   211		CALL	racecallatomic<>(SB)
   212		RET
   213	
   214	TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
   215		JMP	sync∕atomic·LoadInt32(SB)
   216	
   217	TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
   218		JMP	sync∕atomic·LoadInt64(SB)
   219	
   220	TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
   221		JMP	sync∕atomic·LoadInt64(SB)
   222	
   223	TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
   224		JMP	sync∕atomic·LoadInt64(SB)
   225	
   226	// Store
   227	TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
   228		MOVQ	$__tsan_go_atomic32_store(SB), AX
   229		CALL	racecallatomic<>(SB)
   230		RET
   231	
   232	TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
   233		MOVQ	$__tsan_go_atomic64_store(SB), AX
   234		CALL	racecallatomic<>(SB)
   235		RET
   236	
   237	TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
   238		JMP	sync∕atomic·StoreInt32(SB)
   239	
   240	TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
   241		JMP	sync∕atomic·StoreInt64(SB)
   242	
   243	TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
   244		JMP	sync∕atomic·StoreInt64(SB)
   245	
   246	// Swap
   247	TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
   248		MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   249		CALL	racecallatomic<>(SB)
   250		RET
   251	
   252	TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
   253		MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   254		CALL	racecallatomic<>(SB)
   255		RET
   256	
   257	TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
   258		JMP	sync∕atomic·SwapInt32(SB)
   259	
   260	TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
   261		JMP	sync∕atomic·SwapInt64(SB)
   262	
   263	TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
   264		JMP	sync∕atomic·SwapInt64(SB)
   265	
   266	// Add
   267	TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
   268		MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   269		CALL	racecallatomic<>(SB)
   270		MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   271		ADDL	AX, ret+16(FP)
   272		RET
   273	
   274	TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
   275		MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   276		CALL	racecallatomic<>(SB)
   277		MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   278		ADDQ	AX, ret+16(FP)
   279		RET
   280	
   281	TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
   282		JMP	sync∕atomic·AddInt32(SB)
   283	
   284	TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
   285		JMP	sync∕atomic·AddInt64(SB)
   286	
   287	TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
   288		JMP	sync∕atomic·AddInt64(SB)
   289	
   290	// CompareAndSwap
   291	TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
   292		MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   293		CALL	racecallatomic<>(SB)
   294		RET
   295	
   296	TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
   297		MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   298		CALL	racecallatomic<>(SB)
   299		RET
   300	
   301	TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
   302		JMP	sync∕atomic·CompareAndSwapInt32(SB)
   303	
   304	TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
   305		JMP	sync∕atomic·CompareAndSwapInt64(SB)
   306	
   307	TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
   308		JMP	sync∕atomic·CompareAndSwapInt64(SB)
   309	
   310	// Generic atomic operation implementation.
   311	// AX already contains target function.
   312	TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
   313		// Trigger SIGSEGV early.
   314		MOVQ	16(SP), R12
   315		MOVL	(R12), R13
   316		// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   317		CMPQ	R12, runtime·racearenastart(SB)
   318		JB	racecallatomic_data
   319		CMPQ	R12, runtime·racearenaend(SB)
   320		JB	racecallatomic_ok
   321	racecallatomic_data:
   322		CMPQ	R12, runtime·racedatastart(SB)
   323		JB	racecallatomic_ignore
   324		CMPQ	R12, runtime·racedataend(SB)
   325		JAE	racecallatomic_ignore
   326	racecallatomic_ok:
   327		// Addr is within the good range, call the atomic function.
   328		get_tls(R12)
   329		MOVQ	g(R12), R14
   330		MOVQ	g_racectx(R14), RARG0	// goroutine context
   331		MOVQ	8(SP), RARG1	// caller pc
   332		MOVQ	(SP), RARG2	// pc
   333		LEAQ	16(SP), RARG3	// arguments
   334		JMP	racecall<>(SB)	// does not return
   335	racecallatomic_ignore:
   336		// Addr is outside the good range.
   337		// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   338		// An attempt to synchronize on the address would cause crash.
   339		MOVQ	AX, R15	// remember the original function
   340		MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   341		get_tls(R12)
   342		MOVQ	g(R12), R14
   343		MOVQ	g_racectx(R14), RARG0	// goroutine context
   344		CALL	racecall<>(SB)
   345		MOVQ	R15, AX	// restore the original function
   346		// Call the atomic function.
   347		MOVQ	g_racectx(R14), RARG0	// goroutine context
   348		MOVQ	8(SP), RARG1	// caller pc
   349		MOVQ	(SP), RARG2	// pc
   350		LEAQ	16(SP), RARG3	// arguments
   351		CALL	racecall<>(SB)
   352		// Call __tsan_go_ignore_sync_end.
   353		MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   354		MOVQ	g_racectx(R14), RARG0	// goroutine context
   355		JMP	racecall<>(SB)
   356	
   357	// void runtime·racecall(void(*f)(...), ...)
   358	// Calls C function f from race runtime and passes up to 4 arguments to it.
   359	// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   360	TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   361		MOVQ	fn+0(FP), AX
   362		MOVQ	arg0+8(FP), RARG0
   363		MOVQ	arg1+16(FP), RARG1
   364		MOVQ	arg2+24(FP), RARG2
   365		MOVQ	arg3+32(FP), RARG3
   366		JMP	racecall<>(SB)
   367	
   368	// Switches SP to g0 stack and calls (AX). Arguments already set.
   369	TEXT	racecall<>(SB), NOSPLIT, $0-0
   370		get_tls(R12)
   371		MOVQ	g(R12), R14
   372		MOVQ	g_m(R14), R13
   373		// Switch to g0 stack.
   374		MOVQ	SP, R12		// callee-saved, preserved across the CALL
   375		MOVQ	m_g0(R13), R10
   376		CMPQ	R10, R14
   377		JE	call	// already on g0
   378		MOVQ	(g_sched+gobuf_sp)(R10), SP
   379	call:
   380		ANDQ	$~15, SP	// alignment for gcc ABI
   381		CALL	AX
   382		MOVQ	R12, SP
   383		RET
   384	
   385	// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   386	// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   387	// The overall effect of Go->C->Go call chain is similar to that of mcall.
   388	// RARG0 contains command code. RARG1 contains command-specific context.
   389	// See racecallback for command codes.
   390	TEXT	runtime·racecallbackthunk(SB), NOSPLIT, $56-8
   391		// Handle command raceGetProcCmd (0) here.
   392		// First, code below assumes that we are on curg, while raceGetProcCmd
   393		// can be executed on g0. Second, it is called frequently, so will
   394		// benefit from this fast path.
   395		CMPQ	RARG0, $0
   396		JNE	rest
   397		get_tls(RARG0)
   398		MOVQ	g(RARG0), RARG0
   399		MOVQ	g_m(RARG0), RARG0
   400		MOVQ	m_p(RARG0), RARG0
   401		MOVQ	p_raceprocctx(RARG0), RARG0
   402		MOVQ	RARG0, (RARG1)
   403		RET
   404	
   405	rest:
   406		// Save callee-saved registers (Go code won't respect that).
   407		// This is superset of darwin/linux/windows registers.
   408		PUSHQ	BX
   409		PUSHQ	BP
   410		PUSHQ	DI
   411		PUSHQ	SI
   412		PUSHQ	R12
   413		PUSHQ	R13
   414		PUSHQ	R14
   415		PUSHQ	R15
   416		// Set g = g0.
   417		get_tls(R12)
   418		MOVQ	g(R12), R13
   419		MOVQ	g_m(R13), R13
   420		MOVQ	m_g0(R13), R14
   421		MOVQ	R14, g(R12)	// g = m->g0
   422		PUSHQ	RARG1	// func arg
   423		PUSHQ	RARG0	// func arg
   424		CALL	runtime·racecallback(SB)
   425		POPQ	R12
   426		POPQ	R12
   427		// All registers are smashed after Go code, reload.
   428		get_tls(R12)
   429		MOVQ	g(R12), R13
   430		MOVQ	g_m(R13), R13
   431		MOVQ	m_curg(R13), R14
   432		MOVQ	R14, g(R12)	// g = m->curg
   433		// Restore callee-saved registers.
   434		POPQ	R15
   435		POPQ	R14
   436		POPQ	R13
   437		POPQ	R12
   438		POPQ	SI
   439		POPQ	DI
   440		POPQ	BP
   441		POPQ	BX
   442		RET

View as plain text