...

Text file src/runtime/race_arm64.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build race
     6	
     7	#include "go_asm.h"
     8	#include "funcdata.h"
     9	#include "textflag.h"
    10	#include "tls_arm64.h"
    11	
    12	// The following thunks allow calling the gcc-compiled race runtime directly
    13	// from Go code without going all the way through cgo.
    14	// First, it's much faster (up to 50% speedup for real Go programs).
    15	// Second, it eliminates race-related special cases from cgocall and scheduler.
    16	// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17	
    18	// A brief recap of the arm64 calling convention.
    19	// Arguments are passed in R0...R7, the rest is on stack.
    20	// Callee-saved registers are: R19...R28.
    21	// Temporary registers are: R9...R15
    22	// SP must be 16-byte aligned.
    23	
    24	// When calling racecalladdr, R9 is the call target address.
    25	
    26	// The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
    27	
    28	#define load_g \
    29		MRS_TPIDR_R0 \
    30		MOVD    runtime·tls_g(SB), R11 \
    31		ADD     R11, R0 \
    32		MOVD    0(R0), g
    33	
    34	// func runtime·raceread(addr uintptr)
    35	// Called from instrumented code.
    36	TEXT	runtime·raceread(SB), NOSPLIT, $0-8
    37		MOVD	addr+0(FP), R1
    38		MOVD	LR, R2
    39		// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    40		MOVD	$__tsan_read(SB), R9
    41		JMP	racecalladdr<>(SB)
    42	
    43	// func runtime·RaceRead(addr uintptr)
    44	TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    45		// This needs to be a tail call, because raceread reads caller pc.
    46		JMP	runtime·raceread(SB)
    47	
    48	// func runtime·racereadpc(void *addr, void *callpc, void *pc)
    49	TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    50		MOVD	addr+0(FP), R1
    51		MOVD	callpc+8(FP), R2
    52		MOVD	pc+16(FP), R3
    53		// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    54		MOVD	$__tsan_read_pc(SB), R9
    55		JMP	racecalladdr<>(SB)
    56	
    57	// func runtime·racewrite(addr uintptr)
    58	// Called from instrumented code.
    59	TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
    60		MOVD	addr+0(FP), R1
    61		MOVD	LR, R2
    62		// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    63		MOVD	$__tsan_write(SB), R9
    64		JMP	racecalladdr<>(SB)
    65	
    66	// func runtime·RaceWrite(addr uintptr)
    67	TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    68		// This needs to be a tail call, because racewrite reads caller pc.
    69		JMP	runtime·racewrite(SB)
    70	
    71	// func runtime·racewritepc(void *addr, void *callpc, void *pc)
    72	TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    73		MOVD	addr+0(FP), R1
    74		MOVD	callpc+8(FP), R2
    75		MOVD	pc+16(FP), R3
    76		// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    77		MOVD	$__tsan_write_pc(SB), R9
    78		JMP	racecalladdr<>(SB)
    79	
    80	// func runtime·racereadrange(addr, size uintptr)
    81	// Called from instrumented code.
    82	TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    83		MOVD	addr+0(FP), R1
    84		MOVD	size+8(FP), R2
    85		MOVD	LR, R3
    86		// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    87		MOVD	$__tsan_read_range(SB), R9
    88		JMP	racecalladdr<>(SB)
    89	
    90	// func runtime·RaceReadRange(addr, size uintptr)
    91	TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
    92		// This needs to be a tail call, because racereadrange reads caller pc.
    93		JMP	runtime·racereadrange(SB)
    94	
    95	// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
    96	TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
    97		MOVD	addr+0(FP), R1
    98		MOVD	size+8(FP), R2
    99		MOVD	pc+16(FP), R3
   100		ADD	$4, R3	// pc is function start, tsan wants return address.
   101		// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   102		MOVD	$__tsan_read_range(SB), R9
   103		JMP	racecalladdr<>(SB)
   104	
   105	// func runtime·racewriterange(addr, size uintptr)
   106	// Called from instrumented code.
   107	TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
   108		MOVD	addr+0(FP), R1
   109		MOVD	size+8(FP), R2
   110		MOVD	LR, R3
   111		// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   112		MOVD	$__tsan_write_range(SB), R9
   113		JMP	racecalladdr<>(SB)
   114	
   115	// func runtime·RaceWriteRange(addr, size uintptr)
   116	TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   117		// This needs to be a tail call, because racewriterange reads caller pc.
   118		JMP	runtime·racewriterange(SB)
   119	
   120	// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   121	TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   122		MOVD	addr+0(FP), R1
   123		MOVD	size+8(FP), R2
   124		MOVD	pc+16(FP), R3
   125		ADD	$4, R3	// pc is function start, tsan wants return address.
   126		// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   127		MOVD	$__tsan_write_range(SB), R9
   128		JMP	racecalladdr<>(SB)
   129	
   130	// If addr (R1) is out of range, do nothing.
   131	// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   132	TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   133		load_g
   134		MOVD	g_racectx(g), R0
   135		// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   136		MOVD	runtime·racearenastart(SB), R10
   137		CMP	R10, R1
   138		BLT	data
   139		MOVD	runtime·racearenaend(SB), R10
   140		CMP	R10, R1
   141		BLT	call
   142	data:
   143		MOVD	runtime·racedatastart(SB), R10
   144		CMP	R10, R1
   145		BLT	ret
   146		MOVD	runtime·racedataend(SB), R10
   147		CMP	R10, R1
   148		BGT	ret
   149	call:
   150		JMP	racecall<>(SB)
   151	ret:
   152		RET
   153	
   154	// func runtime·racefuncenterfp(fp uintptr)
   155	// Called from instrumented code.
   156	// Like racefuncenter but doesn't passes an arg, uses the caller pc
   157	// from the first slot on the stack
   158	TEXT	runtime·racefuncenterfp(SB), NOSPLIT, $0-0
   159		MOVD	0(RSP), R9
   160		JMP	racefuncenter<>(SB)
   161	
   162	// func runtime·racefuncenter(pc uintptr)
   163	// Called from instrumented code.
   164	TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   165		MOVD	callpc+0(FP), R9
   166		JMP	racefuncenter<>(SB)
   167	
   168	// Common code for racefuncenter/racefuncenterfp
   169	// R9 = caller's return address
   170	TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   171		load_g
   172		MOVD	g_racectx(g), R0	// goroutine racectx
   173		MOVD	R9, R1
   174		// void __tsan_func_enter(ThreadState *thr, void *pc);
   175		MOVD	$__tsan_func_enter(SB), R9
   176		BL	racecall<>(SB)
   177		RET
   178	
   179	// func runtime·racefuncexit()
   180	// Called from instrumented code.
   181	TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   182		load_g
   183		MOVD	g_racectx(g), R0	// race context
   184		// void __tsan_func_exit(ThreadState *thr);
   185		MOVD	$__tsan_func_exit(SB), R9
   186		JMP	racecall<>(SB)
   187	
   188	// Atomic operations for sync/atomic package.
   189	// R3 = addr of arguments passed to this function, it can
   190	// be fetched at 40(RSP) in racecallatomic after two times BL
   191	// R0, R1, R2 set in racecallatomic
   192	
   193	// Load
   194	TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0
   195		GO_ARGS
   196		MOVD	$__tsan_go_atomic32_load(SB), R9
   197		BL	racecallatomic<>(SB)
   198		RET
   199	
   200	TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0
   201		GO_ARGS
   202		MOVD	$__tsan_go_atomic64_load(SB), R9
   203		BL	racecallatomic<>(SB)
   204		RET
   205	
   206	TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0
   207		GO_ARGS
   208		JMP	sync∕atomic·LoadInt32(SB)
   209	
   210	TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0
   211		GO_ARGS
   212		JMP	sync∕atomic·LoadInt64(SB)
   213	
   214	TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0
   215		GO_ARGS
   216		JMP	sync∕atomic·LoadInt64(SB)
   217	
   218	TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0
   219		GO_ARGS
   220		JMP	sync∕atomic·LoadInt64(SB)
   221	
   222	// Store
   223	TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0
   224		GO_ARGS
   225		MOVD	$__tsan_go_atomic32_store(SB), R9
   226		BL	racecallatomic<>(SB)
   227		RET
   228	
   229	TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0
   230		GO_ARGS
   231		MOVD	$__tsan_go_atomic64_store(SB), R9
   232		BL	racecallatomic<>(SB)
   233		RET
   234	
   235	TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0
   236		GO_ARGS
   237		JMP	sync∕atomic·StoreInt32(SB)
   238	
   239	TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0
   240		GO_ARGS
   241		JMP	sync∕atomic·StoreInt64(SB)
   242	
   243	TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0
   244		GO_ARGS
   245		JMP	sync∕atomic·StoreInt64(SB)
   246	
   247	// Swap
   248	TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0
   249		GO_ARGS
   250		MOVD	$__tsan_go_atomic32_exchange(SB), R9
   251		BL	racecallatomic<>(SB)
   252		RET
   253	
   254	TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0
   255		GO_ARGS
   256		MOVD	$__tsan_go_atomic64_exchange(SB), R9
   257		BL	racecallatomic<>(SB)
   258		RET
   259	
   260	TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0
   261		GO_ARGS
   262		JMP	sync∕atomic·SwapInt32(SB)
   263	
   264	TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0
   265		GO_ARGS
   266		JMP	sync∕atomic·SwapInt64(SB)
   267	
   268	TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0
   269		GO_ARGS
   270		JMP	sync∕atomic·SwapInt64(SB)
   271	
   272	// Add
   273	TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0
   274		GO_ARGS
   275		MOVD	$__tsan_go_atomic32_fetch_add(SB), R9
   276		BL	racecallatomic<>(SB)
   277		MOVW	add+8(FP), R0	// convert fetch_add to add_fetch
   278		MOVW	ret+16(FP), R1
   279		ADD	R0, R1, R0
   280		MOVW	R0, ret+16(FP)
   281		RET
   282	
   283	TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0
   284		GO_ARGS
   285		MOVD	$__tsan_go_atomic64_fetch_add(SB), R9
   286		BL	racecallatomic<>(SB)
   287		MOVD	add+8(FP), R0	// convert fetch_add to add_fetch
   288		MOVD	ret+16(FP), R1
   289		ADD	R0, R1, R0
   290		MOVD	R0, ret+16(FP)
   291		RET
   292	
   293	TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0
   294		GO_ARGS
   295		JMP	sync∕atomic·AddInt32(SB)
   296	
   297	TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0
   298		GO_ARGS
   299		JMP	sync∕atomic·AddInt64(SB)
   300	
   301	TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0
   302		GO_ARGS
   303		JMP	sync∕atomic·AddInt64(SB)
   304	
   305	// CompareAndSwap
   306	TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0
   307		GO_ARGS
   308		MOVD	$__tsan_go_atomic32_compare_exchange(SB), R9
   309		BL	racecallatomic<>(SB)
   310		RET
   311	
   312	TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0
   313		GO_ARGS
   314		MOVD	$__tsan_go_atomic64_compare_exchange(SB), R9
   315		BL	racecallatomic<>(SB)
   316		RET
   317	
   318	TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0
   319		GO_ARGS
   320		JMP	sync∕atomic·CompareAndSwapInt32(SB)
   321	
   322	TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0
   323		GO_ARGS
   324		JMP	sync∕atomic·CompareAndSwapInt64(SB)
   325	
   326	TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0
   327		GO_ARGS
   328		JMP	sync∕atomic·CompareAndSwapInt64(SB)
   329	
   330	// Generic atomic operation implementation.
   331	// R9 = addr of target function
   332	TEXT	racecallatomic<>(SB), NOSPLIT, $0
   333		// Set up these registers
   334		// R0 = *ThreadState
   335		// R1 = caller pc
   336		// R2 = pc
   337		// R3 = addr of incoming arg list
   338	
   339		// Trigger SIGSEGV early.
   340		MOVD	40(RSP), R3	// 1st arg is addr. after two times BL, get it at 40(RSP)
   341		MOVD	(R3), R13	// segv here if addr is bad
   342		// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   343		MOVD	runtime·racearenastart(SB), R10
   344		CMP	R10, R3
   345		BLT	racecallatomic_data
   346		MOVD	runtime·racearenaend(SB), R10
   347		CMP	R10, R3
   348		BLT	racecallatomic_ok
   349	racecallatomic_data:
   350		MOVD	runtime·racedatastart(SB), R10
   351		CMP	R10, R3
   352		BLT	racecallatomic_ignore
   353		MOVD	runtime·racedataend(SB), R10
   354		CMP	R10, R3
   355		BGE	racecallatomic_ignore
   356	racecallatomic_ok:
   357		// Addr is within the good range, call the atomic function.
   358		load_g
   359		MOVD	g_racectx(g), R0	// goroutine context
   360		MOVD	16(RSP), R1	// caller pc
   361		MOVD	R9, R2	// pc
   362		ADD	$40, RSP, R3
   363		JMP	racecall<>(SB)	// does not return
   364	racecallatomic_ignore:
   365		// Addr is outside the good range.
   366		// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   367		// An attempt to synchronize on the address would cause crash.
   368		MOVD	R9, R20	// remember the original function
   369		MOVD	$__tsan_go_ignore_sync_begin(SB), R9
   370		load_g
   371		MOVD	g_racectx(g), R0	// goroutine context
   372		BL	racecall<>(SB)
   373		MOVD	R20, R9	// restore the original function
   374		// Call the atomic function.
   375		// racecall will call LLVM race code which might clobber R28 (g)
   376		load_g
   377		MOVD	g_racectx(g), R0	// goroutine context
   378		MOVD	16(RSP), R1	// caller pc
   379		MOVD	R9, R2	// pc
   380		ADD	$40, RSP, R3	// arguments
   381		BL	racecall<>(SB)
   382		// Call __tsan_go_ignore_sync_end.
   383		MOVD	$__tsan_go_ignore_sync_end(SB), R9
   384		MOVD	g_racectx(g), R0	// goroutine context
   385		BL	racecall<>(SB)
   386		RET
   387	
   388	// func runtime·racecall(void(*f)(...), ...)
   389	// Calls C function f from race runtime and passes up to 4 arguments to it.
   390	// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   391	TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   392		MOVD	fn+0(FP), R9
   393		MOVD	arg0+8(FP), R0
   394		MOVD	arg1+16(FP), R1
   395		MOVD	arg2+24(FP), R2
   396		MOVD	arg3+32(FP), R3
   397		JMP	racecall<>(SB)
   398	
   399	// Switches SP to g0 stack and calls (R9). Arguments already set.
   400	TEXT	racecall<>(SB), NOSPLIT, $0-0
   401		MOVD	g_m(g), R10
   402		// Switch to g0 stack.
   403		MOVD	RSP, R19	// callee-saved, preserved across the CALL
   404		MOVD	m_g0(R10), R11
   405		CMP	R11, g
   406		BEQ	call	// already on g0
   407		MOVD	(g_sched+gobuf_sp)(R11), R12
   408		MOVD	R12, RSP
   409	call:
   410		BL	R9
   411		MOVD	R19, RSP
   412		RET
   413	
   414	// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   415	// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   416	// The overall effect of Go->C->Go call chain is similar to that of mcall.
   417	// R0 contains command code. R1 contains command-specific context.
   418	// See racecallback for command codes.
   419	TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   420		// Handle command raceGetProcCmd (0) here.
   421		// First, code below assumes that we are on curg, while raceGetProcCmd
   422		// can be executed on g0. Second, it is called frequently, so will
   423		// benefit from this fast path.
   424		CMP	$0, R0
   425		BNE	rest
   426		MOVD	g, R13
   427		load_g
   428		MOVD	g_m(g), R0
   429		MOVD	m_p(R0), R0
   430		MOVD	p_raceprocctx(R0), R0
   431		MOVD	R0, (R1)
   432		MOVD	R13, g
   433		JMP	(LR)
   434	rest:
   435		// Save callee-saved registers (Go code won't respect that).
   436		// 8(RSP) and 16(RSP) are for args passed through racecallback
   437		SUB	$96, RSP
   438		MOVD	LR, 0(RSP)
   439		STP	(R19, R20), 24(RSP)
   440		STP	(R21, R22), 40(RSP)
   441		STP	(R23, R24), 56(RSP)
   442		STP	(R25, R26), 72(RSP)
   443		MOVD	R27, 88(RSP)
   444		// Set g = g0.
   445		// load_g will clobber R0, Save R0
   446		MOVD	R0, R13
   447		load_g
   448		// restore R0
   449		MOVD	R13, R0
   450		MOVD	g_m(g), R13
   451		MOVD	m_g0(R13), g
   452	
   453		MOVD	R0, 8(RSP)	// func arg
   454		MOVD	R1, 16(RSP)	// func arg
   455		BL	runtime·racecallback(SB)
   456	
   457		// All registers are smashed after Go code, reload.
   458		MOVD	g_m(g), R13
   459		MOVD	m_curg(R13), g	// g = m->curg
   460		// Restore callee-saved registers.
   461		MOVD	0(RSP), LR
   462		LDP	24(RSP), (R19, R20)
   463		LDP	40(RSP), (R21, R22)
   464		LDP	56(RSP), (R23, R24)
   465		LDP	72(RSP), (R25, R26)
   466		MOVD	88(RSP), R27
   467		ADD	$96, RSP
   468		JMP	(LR)
   469	
   470	// tls_g, g value for each thread in TLS
   471	GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8

View as plain text