Text file src/runtime/race_arm64.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build race
6
7 #include "go_asm.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10 #include "tls_arm64.h"
11
12 // The following thunks allow calling the gcc-compiled race runtime directly
13 // from Go code without going all the way through cgo.
14 // First, it's much faster (up to 50% speedup for real Go programs).
15 // Second, it eliminates race-related special cases from cgocall and scheduler.
16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
17
18 // A brief recap of the arm64 calling convention.
19 // Arguments are passed in R0...R7, the rest is on stack.
20 // Callee-saved registers are: R19...R28.
21 // Temporary registers are: R9...R15
22 // SP must be 16-byte aligned.
23
24 // When calling racecalladdr, R9 is the call target address.
25
26 // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
27
28 #define load_g \
29 MRS_TPIDR_R0 \
30 MOVD runtime·tls_g(SB), R11 \
31 ADD R11, R0 \
32 MOVD 0(R0), g
33
34 // func runtime·raceread(addr uintptr)
35 // Called from instrumented code.
36 TEXT runtime·raceread(SB), NOSPLIT, $0-8
37 MOVD addr+0(FP), R1
38 MOVD LR, R2
39 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
40 MOVD $__tsan_read(SB), R9
41 JMP racecalladdr<>(SB)
42
43 // func runtime·RaceRead(addr uintptr)
44 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
45 // This needs to be a tail call, because raceread reads caller pc.
46 JMP runtime·raceread(SB)
47
48 // func runtime·racereadpc(void *addr, void *callpc, void *pc)
49 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
50 MOVD addr+0(FP), R1
51 MOVD callpc+8(FP), R2
52 MOVD pc+16(FP), R3
53 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
54 MOVD $__tsan_read_pc(SB), R9
55 JMP racecalladdr<>(SB)
56
57 // func runtime·racewrite(addr uintptr)
58 // Called from instrumented code.
59 TEXT runtime·racewrite(SB), NOSPLIT, $0-8
60 MOVD addr+0(FP), R1
61 MOVD LR, R2
62 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
63 MOVD $__tsan_write(SB), R9
64 JMP racecalladdr<>(SB)
65
66 // func runtime·RaceWrite(addr uintptr)
67 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
68 // This needs to be a tail call, because racewrite reads caller pc.
69 JMP runtime·racewrite(SB)
70
71 // func runtime·racewritepc(void *addr, void *callpc, void *pc)
72 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
73 MOVD addr+0(FP), R1
74 MOVD callpc+8(FP), R2
75 MOVD pc+16(FP), R3
76 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
77 MOVD $__tsan_write_pc(SB), R9
78 JMP racecalladdr<>(SB)
79
80 // func runtime·racereadrange(addr, size uintptr)
81 // Called from instrumented code.
82 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16
83 MOVD addr+0(FP), R1
84 MOVD size+8(FP), R2
85 MOVD LR, R3
86 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
87 MOVD $__tsan_read_range(SB), R9
88 JMP racecalladdr<>(SB)
89
90 // func runtime·RaceReadRange(addr, size uintptr)
91 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
92 // This needs to be a tail call, because racereadrange reads caller pc.
93 JMP runtime·racereadrange(SB)
94
95 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
96 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
97 MOVD addr+0(FP), R1
98 MOVD size+8(FP), R2
99 MOVD pc+16(FP), R3
100 ADD $4, R3 // pc is function start, tsan wants return address.
101 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
102 MOVD $__tsan_read_range(SB), R9
103 JMP racecalladdr<>(SB)
104
105 // func runtime·racewriterange(addr, size uintptr)
106 // Called from instrumented code.
107 TEXT runtime·racewriterange(SB), NOSPLIT, $0-16
108 MOVD addr+0(FP), R1
109 MOVD size+8(FP), R2
110 MOVD LR, R3
111 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
112 MOVD $__tsan_write_range(SB), R9
113 JMP racecalladdr<>(SB)
114
115 // func runtime·RaceWriteRange(addr, size uintptr)
116 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
117 // This needs to be a tail call, because racewriterange reads caller pc.
118 JMP runtime·racewriterange(SB)
119
120 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
121 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
122 MOVD addr+0(FP), R1
123 MOVD size+8(FP), R2
124 MOVD pc+16(FP), R3
125 ADD $4, R3 // pc is function start, tsan wants return address.
126 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
127 MOVD $__tsan_write_range(SB), R9
128 JMP racecalladdr<>(SB)
129
130 // If addr (R1) is out of range, do nothing.
131 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
132 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
133 load_g
134 MOVD g_racectx(g), R0
135 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
136 MOVD runtime·racearenastart(SB), R10
137 CMP R10, R1
138 BLT data
139 MOVD runtime·racearenaend(SB), R10
140 CMP R10, R1
141 BLT call
142 data:
143 MOVD runtime·racedatastart(SB), R10
144 CMP R10, R1
145 BLT ret
146 MOVD runtime·racedataend(SB), R10
147 CMP R10, R1
148 BGT ret
149 call:
150 JMP racecall<>(SB)
151 ret:
152 RET
153
154 // func runtime·racefuncenterfp(fp uintptr)
155 // Called from instrumented code.
156 // Like racefuncenter but doesn't passes an arg, uses the caller pc
157 // from the first slot on the stack
158 TEXT runtime·racefuncenterfp(SB), NOSPLIT, $0-0
159 MOVD 0(RSP), R9
160 JMP racefuncenter<>(SB)
161
162 // func runtime·racefuncenter(pc uintptr)
163 // Called from instrumented code.
164 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
165 MOVD callpc+0(FP), R9
166 JMP racefuncenter<>(SB)
167
168 // Common code for racefuncenter/racefuncenterfp
169 // R9 = caller's return address
170 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
171 load_g
172 MOVD g_racectx(g), R0 // goroutine racectx
173 MOVD R9, R1
174 // void __tsan_func_enter(ThreadState *thr, void *pc);
175 MOVD $__tsan_func_enter(SB), R9
176 BL racecall<>(SB)
177 RET
178
179 // func runtime·racefuncexit()
180 // Called from instrumented code.
181 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
182 load_g
183 MOVD g_racectx(g), R0 // race context
184 // void __tsan_func_exit(ThreadState *thr);
185 MOVD $__tsan_func_exit(SB), R9
186 JMP racecall<>(SB)
187
188 // Atomic operations for sync/atomic package.
189 // R3 = addr of arguments passed to this function, it can
190 // be fetched at 40(RSP) in racecallatomic after two times BL
191 // R0, R1, R2 set in racecallatomic
192
193 // Load
194 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0
195 GO_ARGS
196 MOVD $__tsan_go_atomic32_load(SB), R9
197 BL racecallatomic<>(SB)
198 RET
199
200 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0
201 GO_ARGS
202 MOVD $__tsan_go_atomic64_load(SB), R9
203 BL racecallatomic<>(SB)
204 RET
205
206 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0
207 GO_ARGS
208 JMP sync∕atomic·LoadInt32(SB)
209
210 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0
211 GO_ARGS
212 JMP sync∕atomic·LoadInt64(SB)
213
214 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0
215 GO_ARGS
216 JMP sync∕atomic·LoadInt64(SB)
217
218 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0
219 GO_ARGS
220 JMP sync∕atomic·LoadInt64(SB)
221
222 // Store
223 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0
224 GO_ARGS
225 MOVD $__tsan_go_atomic32_store(SB), R9
226 BL racecallatomic<>(SB)
227 RET
228
229 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0
230 GO_ARGS
231 MOVD $__tsan_go_atomic64_store(SB), R9
232 BL racecallatomic<>(SB)
233 RET
234
235 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0
236 GO_ARGS
237 JMP sync∕atomic·StoreInt32(SB)
238
239 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0
240 GO_ARGS
241 JMP sync∕atomic·StoreInt64(SB)
242
243 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0
244 GO_ARGS
245 JMP sync∕atomic·StoreInt64(SB)
246
247 // Swap
248 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0
249 GO_ARGS
250 MOVD $__tsan_go_atomic32_exchange(SB), R9
251 BL racecallatomic<>(SB)
252 RET
253
254 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0
255 GO_ARGS
256 MOVD $__tsan_go_atomic64_exchange(SB), R9
257 BL racecallatomic<>(SB)
258 RET
259
260 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0
261 GO_ARGS
262 JMP sync∕atomic·SwapInt32(SB)
263
264 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0
265 GO_ARGS
266 JMP sync∕atomic·SwapInt64(SB)
267
268 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0
269 GO_ARGS
270 JMP sync∕atomic·SwapInt64(SB)
271
272 // Add
273 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0
274 GO_ARGS
275 MOVD $__tsan_go_atomic32_fetch_add(SB), R9
276 BL racecallatomic<>(SB)
277 MOVW add+8(FP), R0 // convert fetch_add to add_fetch
278 MOVW ret+16(FP), R1
279 ADD R0, R1, R0
280 MOVW R0, ret+16(FP)
281 RET
282
283 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0
284 GO_ARGS
285 MOVD $__tsan_go_atomic64_fetch_add(SB), R9
286 BL racecallatomic<>(SB)
287 MOVD add+8(FP), R0 // convert fetch_add to add_fetch
288 MOVD ret+16(FP), R1
289 ADD R0, R1, R0
290 MOVD R0, ret+16(FP)
291 RET
292
293 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0
294 GO_ARGS
295 JMP sync∕atomic·AddInt32(SB)
296
297 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0
298 GO_ARGS
299 JMP sync∕atomic·AddInt64(SB)
300
301 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0
302 GO_ARGS
303 JMP sync∕atomic·AddInt64(SB)
304
305 // CompareAndSwap
306 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0
307 GO_ARGS
308 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9
309 BL racecallatomic<>(SB)
310 RET
311
312 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0
313 GO_ARGS
314 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9
315 BL racecallatomic<>(SB)
316 RET
317
318 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0
319 GO_ARGS
320 JMP sync∕atomic·CompareAndSwapInt32(SB)
321
322 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0
323 GO_ARGS
324 JMP sync∕atomic·CompareAndSwapInt64(SB)
325
326 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0
327 GO_ARGS
328 JMP sync∕atomic·CompareAndSwapInt64(SB)
329
330 // Generic atomic operation implementation.
331 // R9 = addr of target function
332 TEXT racecallatomic<>(SB), NOSPLIT, $0
333 // Set up these registers
334 // R0 = *ThreadState
335 // R1 = caller pc
336 // R2 = pc
337 // R3 = addr of incoming arg list
338
339 // Trigger SIGSEGV early.
340 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
341 MOVD (R3), R13 // segv here if addr is bad
342 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
343 MOVD runtime·racearenastart(SB), R10
344 CMP R10, R3
345 BLT racecallatomic_data
346 MOVD runtime·racearenaend(SB), R10
347 CMP R10, R3
348 BLT racecallatomic_ok
349 racecallatomic_data:
350 MOVD runtime·racedatastart(SB), R10
351 CMP R10, R3
352 BLT racecallatomic_ignore
353 MOVD runtime·racedataend(SB), R10
354 CMP R10, R3
355 BGE racecallatomic_ignore
356 racecallatomic_ok:
357 // Addr is within the good range, call the atomic function.
358 load_g
359 MOVD g_racectx(g), R0 // goroutine context
360 MOVD 16(RSP), R1 // caller pc
361 MOVD R9, R2 // pc
362 ADD $40, RSP, R3
363 JMP racecall<>(SB) // does not return
364 racecallatomic_ignore:
365 // Addr is outside the good range.
366 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
367 // An attempt to synchronize on the address would cause crash.
368 MOVD R9, R20 // remember the original function
369 MOVD $__tsan_go_ignore_sync_begin(SB), R9
370 load_g
371 MOVD g_racectx(g), R0 // goroutine context
372 BL racecall<>(SB)
373 MOVD R20, R9 // restore the original function
374 // Call the atomic function.
375 // racecall will call LLVM race code which might clobber R28 (g)
376 load_g
377 MOVD g_racectx(g), R0 // goroutine context
378 MOVD 16(RSP), R1 // caller pc
379 MOVD R9, R2 // pc
380 ADD $40, RSP, R3 // arguments
381 BL racecall<>(SB)
382 // Call __tsan_go_ignore_sync_end.
383 MOVD $__tsan_go_ignore_sync_end(SB), R9
384 MOVD g_racectx(g), R0 // goroutine context
385 BL racecall<>(SB)
386 RET
387
388 // func runtime·racecall(void(*f)(...), ...)
389 // Calls C function f from race runtime and passes up to 4 arguments to it.
390 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
391 TEXT runtime·racecall(SB), NOSPLIT, $0-0
392 MOVD fn+0(FP), R9
393 MOVD arg0+8(FP), R0
394 MOVD arg1+16(FP), R1
395 MOVD arg2+24(FP), R2
396 MOVD arg3+32(FP), R3
397 JMP racecall<>(SB)
398
399 // Switches SP to g0 stack and calls (R9). Arguments already set.
400 TEXT racecall<>(SB), NOSPLIT, $0-0
401 MOVD g_m(g), R10
402 // Switch to g0 stack.
403 MOVD RSP, R19 // callee-saved, preserved across the CALL
404 MOVD m_g0(R10), R11
405 CMP R11, g
406 BEQ call // already on g0
407 MOVD (g_sched+gobuf_sp)(R11), R12
408 MOVD R12, RSP
409 call:
410 BL R9
411 MOVD R19, RSP
412 RET
413
414 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
415 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
416 // The overall effect of Go->C->Go call chain is similar to that of mcall.
417 // R0 contains command code. R1 contains command-specific context.
418 // See racecallback for command codes.
419 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
420 // Handle command raceGetProcCmd (0) here.
421 // First, code below assumes that we are on curg, while raceGetProcCmd
422 // can be executed on g0. Second, it is called frequently, so will
423 // benefit from this fast path.
424 CMP $0, R0
425 BNE rest
426 MOVD g, R13
427 load_g
428 MOVD g_m(g), R0
429 MOVD m_p(R0), R0
430 MOVD p_raceprocctx(R0), R0
431 MOVD R0, (R1)
432 MOVD R13, g
433 JMP (LR)
434 rest:
435 // Save callee-saved registers (Go code won't respect that).
436 // 8(RSP) and 16(RSP) are for args passed through racecallback
437 SUB $96, RSP
438 MOVD LR, 0(RSP)
439 STP (R19, R20), 24(RSP)
440 STP (R21, R22), 40(RSP)
441 STP (R23, R24), 56(RSP)
442 STP (R25, R26), 72(RSP)
443 MOVD R27, 88(RSP)
444 // Set g = g0.
445 // load_g will clobber R0, Save R0
446 MOVD R0, R13
447 load_g
448 // restore R0
449 MOVD R13, R0
450 MOVD g_m(g), R13
451 MOVD m_g0(R13), g
452
453 MOVD R0, 8(RSP) // func arg
454 MOVD R1, 16(RSP) // func arg
455 BL runtime·racecallback(SB)
456
457 // All registers are smashed after Go code, reload.
458 MOVD g_m(g), R13
459 MOVD m_curg(R13), g // g = m->curg
460 // Restore callee-saved registers.
461 MOVD 0(RSP), LR
462 LDP 24(RSP), (R19, R20)
463 LDP 40(RSP), (R21, R22)
464 LDP 56(RSP), (R23, R24)
465 LDP 72(RSP), (R25, R26)
466 MOVD 88(RSP), R27
467 ADD $96, RSP
468 JMP (LR)
469
470 // tls_g, g value for each thread in TLS
471 GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
View as plain text