Text file src/runtime/race_amd64.s
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build race
6
7 #include "go_asm.h"
8 #include "go_tls.h"
9 #include "funcdata.h"
10 #include "textflag.h"
11
12 // The following thunks allow calling the gcc-compiled race runtime directly
13 // from Go code without going all the way through cgo.
14 // First, it's much faster (up to 50% speedup for real Go programs).
15 // Second, it eliminates race-related special cases from cgocall and scheduler.
16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
17
18 // A brief recap of the amd64 calling convention.
19 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
20 // Callee-saved registers are: BX, BP, R12-R15.
21 // SP must be 16-byte aligned.
22 // On Windows:
23 // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
24 // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
25 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
26 // https://msdn.microsoft.com/en-us/library/ms235286.aspx
27 // We do not do this, because it seems to be intended for vararg/unprototyped functions.
28 // Gcc-compiled race runtime does not try to use that space.
29
30 #ifdef GOOS_windows
31 #define RARG0 CX
32 #define RARG1 DX
33 #define RARG2 R8
34 #define RARG3 R9
35 #else
36 #define RARG0 DI
37 #define RARG1 SI
38 #define RARG2 DX
39 #define RARG3 CX
40 #endif
41
42 // func runtime·raceread(addr uintptr)
43 // Called from instrumented code.
44 TEXT runtime·raceread(SB), NOSPLIT, $0-8
45 MOVQ addr+0(FP), RARG1
46 MOVQ (SP), RARG2
47 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
48 MOVQ $__tsan_read(SB), AX
49 JMP racecalladdr<>(SB)
50
51 // func runtime·RaceRead(addr uintptr)
52 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
53 // This needs to be a tail call, because raceread reads caller pc.
54 JMP runtime·raceread(SB)
55
56 // void runtime·racereadpc(void *addr, void *callpc, void *pc)
57 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
58 MOVQ addr+0(FP), RARG1
59 MOVQ callpc+8(FP), RARG2
60 MOVQ pc+16(FP), RARG3
61 ADDQ $1, RARG3 // pc is function start, tsan wants return address
62 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
63 MOVQ $__tsan_read_pc(SB), AX
64 JMP racecalladdr<>(SB)
65
66 // func runtime·racewrite(addr uintptr)
67 // Called from instrumented code.
68 TEXT runtime·racewrite(SB), NOSPLIT, $0-8
69 MOVQ addr+0(FP), RARG1
70 MOVQ (SP), RARG2
71 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
72 MOVQ $__tsan_write(SB), AX
73 JMP racecalladdr<>(SB)
74
75 // func runtime·RaceWrite(addr uintptr)
76 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
77 // This needs to be a tail call, because racewrite reads caller pc.
78 JMP runtime·racewrite(SB)
79
80 // void runtime·racewritepc(void *addr, void *callpc, void *pc)
81 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
82 MOVQ addr+0(FP), RARG1
83 MOVQ callpc+8(FP), RARG2
84 MOVQ pc+16(FP), RARG3
85 ADDQ $1, RARG3 // pc is function start, tsan wants return address
86 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
87 MOVQ $__tsan_write_pc(SB), AX
88 JMP racecalladdr<>(SB)
89
90 // func runtime·racereadrange(addr, size uintptr)
91 // Called from instrumented code.
92 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16
93 MOVQ addr+0(FP), RARG1
94 MOVQ size+8(FP), RARG2
95 MOVQ (SP), RARG3
96 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
97 MOVQ $__tsan_read_range(SB), AX
98 JMP racecalladdr<>(SB)
99
100 // func runtime·RaceReadRange(addr, size uintptr)
101 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
102 // This needs to be a tail call, because racereadrange reads caller pc.
103 JMP runtime·racereadrange(SB)
104
105 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
106 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
107 MOVQ addr+0(FP), RARG1
108 MOVQ size+8(FP), RARG2
109 MOVQ pc+16(FP), RARG3
110 ADDQ $1, RARG3 // pc is function start, tsan wants return address
111 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
112 MOVQ $__tsan_read_range(SB), AX
113 JMP racecalladdr<>(SB)
114
115 // func runtime·racewriterange(addr, size uintptr)
116 // Called from instrumented code.
117 TEXT runtime·racewriterange(SB), NOSPLIT, $0-16
118 MOVQ addr+0(FP), RARG1
119 MOVQ size+8(FP), RARG2
120 MOVQ (SP), RARG3
121 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
122 MOVQ $__tsan_write_range(SB), AX
123 JMP racecalladdr<>(SB)
124
125 // func runtime·RaceWriteRange(addr, size uintptr)
126 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
127 // This needs to be a tail call, because racewriterange reads caller pc.
128 JMP runtime·racewriterange(SB)
129
130 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
131 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
132 MOVQ addr+0(FP), RARG1
133 MOVQ size+8(FP), RARG2
134 MOVQ pc+16(FP), RARG3
135 ADDQ $1, RARG3 // pc is function start, tsan wants return address
136 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
137 MOVQ $__tsan_write_range(SB), AX
138 JMP racecalladdr<>(SB)
139
140 // If addr (RARG1) is out of range, do nothing.
141 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
142 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
143 get_tls(R12)
144 MOVQ g(R12), R14
145 MOVQ g_racectx(R14), RARG0 // goroutine context
146 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
147 CMPQ RARG1, runtime·racearenastart(SB)
148 JB data
149 CMPQ RARG1, runtime·racearenaend(SB)
150 JB call
151 data:
152 CMPQ RARG1, runtime·racedatastart(SB)
153 JB ret
154 CMPQ RARG1, runtime·racedataend(SB)
155 JAE ret
156 call:
157 MOVQ AX, AX // w/o this 6a miscompiles this function
158 JMP racecall<>(SB)
159 ret:
160 RET
161
162 // func runtime·racefuncenterfp(fp uintptr)
163 // Called from instrumented code.
164 // Like racefuncenter but passes FP, not PC
165 TEXT runtime·racefuncenterfp(SB), NOSPLIT, $0-8
166 MOVQ fp+0(FP), R11
167 MOVQ -8(R11), R11
168 JMP racefuncenter<>(SB)
169
170 // func runtime·racefuncenter(pc uintptr)
171 // Called from instrumented code.
172 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
173 MOVQ callpc+0(FP), R11
174 JMP racefuncenter<>(SB)
175
176 // Common code for racefuncenter/racefuncenterfp
177 // R11 = caller's return address
178 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
179 MOVQ DX, R15 // save function entry context (for closures)
180 get_tls(R12)
181 MOVQ g(R12), R14
182 MOVQ g_racectx(R14), RARG0 // goroutine context
183 MOVQ R11, RARG1
184 // void __tsan_func_enter(ThreadState *thr, void *pc);
185 MOVQ $__tsan_func_enter(SB), AX
186 // racecall<> preserves R15
187 CALL racecall<>(SB)
188 MOVQ R15, DX // restore function entry context
189 RET
190
191 // func runtime·racefuncexit()
192 // Called from instrumented code.
193 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
194 get_tls(R12)
195 MOVQ g(R12), R14
196 MOVQ g_racectx(R14), RARG0 // goroutine context
197 // void __tsan_func_exit(ThreadState *thr);
198 MOVQ $__tsan_func_exit(SB), AX
199 JMP racecall<>(SB)
200
201 // Atomic operations for sync/atomic package.
202
203 // Load
204 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
205 MOVQ $__tsan_go_atomic32_load(SB), AX
206 CALL racecallatomic<>(SB)
207 RET
208
209 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
210 MOVQ $__tsan_go_atomic64_load(SB), AX
211 CALL racecallatomic<>(SB)
212 RET
213
214 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
215 JMP sync∕atomic·LoadInt32(SB)
216
217 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
218 JMP sync∕atomic·LoadInt64(SB)
219
220 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
221 JMP sync∕atomic·LoadInt64(SB)
222
223 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
224 JMP sync∕atomic·LoadInt64(SB)
225
226 // Store
227 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
228 MOVQ $__tsan_go_atomic32_store(SB), AX
229 CALL racecallatomic<>(SB)
230 RET
231
232 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
233 MOVQ $__tsan_go_atomic64_store(SB), AX
234 CALL racecallatomic<>(SB)
235 RET
236
237 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
238 JMP sync∕atomic·StoreInt32(SB)
239
240 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
241 JMP sync∕atomic·StoreInt64(SB)
242
243 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
244 JMP sync∕atomic·StoreInt64(SB)
245
246 // Swap
247 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
248 MOVQ $__tsan_go_atomic32_exchange(SB), AX
249 CALL racecallatomic<>(SB)
250 RET
251
252 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
253 MOVQ $__tsan_go_atomic64_exchange(SB), AX
254 CALL racecallatomic<>(SB)
255 RET
256
257 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
258 JMP sync∕atomic·SwapInt32(SB)
259
260 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
261 JMP sync∕atomic·SwapInt64(SB)
262
263 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
264 JMP sync∕atomic·SwapInt64(SB)
265
266 // Add
267 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
268 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX
269 CALL racecallatomic<>(SB)
270 MOVL add+8(FP), AX // convert fetch_add to add_fetch
271 ADDL AX, ret+16(FP)
272 RET
273
274 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
275 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX
276 CALL racecallatomic<>(SB)
277 MOVQ add+8(FP), AX // convert fetch_add to add_fetch
278 ADDQ AX, ret+16(FP)
279 RET
280
281 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
282 JMP sync∕atomic·AddInt32(SB)
283
284 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
285 JMP sync∕atomic·AddInt64(SB)
286
287 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
288 JMP sync∕atomic·AddInt64(SB)
289
290 // CompareAndSwap
291 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
292 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX
293 CALL racecallatomic<>(SB)
294 RET
295
296 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
297 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX
298 CALL racecallatomic<>(SB)
299 RET
300
301 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
302 JMP sync∕atomic·CompareAndSwapInt32(SB)
303
304 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
305 JMP sync∕atomic·CompareAndSwapInt64(SB)
306
307 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
308 JMP sync∕atomic·CompareAndSwapInt64(SB)
309
310 // Generic atomic operation implementation.
311 // AX already contains target function.
312 TEXT racecallatomic<>(SB), NOSPLIT, $0-0
313 // Trigger SIGSEGV early.
314 MOVQ 16(SP), R12
315 MOVL (R12), R13
316 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
317 CMPQ R12, runtime·racearenastart(SB)
318 JB racecallatomic_data
319 CMPQ R12, runtime·racearenaend(SB)
320 JB racecallatomic_ok
321 racecallatomic_data:
322 CMPQ R12, runtime·racedatastart(SB)
323 JB racecallatomic_ignore
324 CMPQ R12, runtime·racedataend(SB)
325 JAE racecallatomic_ignore
326 racecallatomic_ok:
327 // Addr is within the good range, call the atomic function.
328 get_tls(R12)
329 MOVQ g(R12), R14
330 MOVQ g_racectx(R14), RARG0 // goroutine context
331 MOVQ 8(SP), RARG1 // caller pc
332 MOVQ (SP), RARG2 // pc
333 LEAQ 16(SP), RARG3 // arguments
334 JMP racecall<>(SB) // does not return
335 racecallatomic_ignore:
336 // Addr is outside the good range.
337 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
338 // An attempt to synchronize on the address would cause crash.
339 MOVQ AX, R15 // remember the original function
340 MOVQ $__tsan_go_ignore_sync_begin(SB), AX
341 get_tls(R12)
342 MOVQ g(R12), R14
343 MOVQ g_racectx(R14), RARG0 // goroutine context
344 CALL racecall<>(SB)
345 MOVQ R15, AX // restore the original function
346 // Call the atomic function.
347 MOVQ g_racectx(R14), RARG0 // goroutine context
348 MOVQ 8(SP), RARG1 // caller pc
349 MOVQ (SP), RARG2 // pc
350 LEAQ 16(SP), RARG3 // arguments
351 CALL racecall<>(SB)
352 // Call __tsan_go_ignore_sync_end.
353 MOVQ $__tsan_go_ignore_sync_end(SB), AX
354 MOVQ g_racectx(R14), RARG0 // goroutine context
355 JMP racecall<>(SB)
356
357 // void runtime·racecall(void(*f)(...), ...)
358 // Calls C function f from race runtime and passes up to 4 arguments to it.
359 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
360 TEXT runtime·racecall(SB), NOSPLIT, $0-0
361 MOVQ fn+0(FP), AX
362 MOVQ arg0+8(FP), RARG0
363 MOVQ arg1+16(FP), RARG1
364 MOVQ arg2+24(FP), RARG2
365 MOVQ arg3+32(FP), RARG3
366 JMP racecall<>(SB)
367
368 // Switches SP to g0 stack and calls (AX). Arguments already set.
369 TEXT racecall<>(SB), NOSPLIT, $0-0
370 get_tls(R12)
371 MOVQ g(R12), R14
372 MOVQ g_m(R14), R13
373 // Switch to g0 stack.
374 MOVQ SP, R12 // callee-saved, preserved across the CALL
375 MOVQ m_g0(R13), R10
376 CMPQ R10, R14
377 JE call // already on g0
378 MOVQ (g_sched+gobuf_sp)(R10), SP
379 call:
380 ANDQ $~15, SP // alignment for gcc ABI
381 CALL AX
382 MOVQ R12, SP
383 RET
384
385 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
386 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
387 // The overall effect of Go->C->Go call chain is similar to that of mcall.
388 // RARG0 contains command code. RARG1 contains command-specific context.
389 // See racecallback for command codes.
390 TEXT runtime·racecallbackthunk(SB), NOSPLIT, $56-8
391 // Handle command raceGetProcCmd (0) here.
392 // First, code below assumes that we are on curg, while raceGetProcCmd
393 // can be executed on g0. Second, it is called frequently, so will
394 // benefit from this fast path.
395 CMPQ RARG0, $0
396 JNE rest
397 get_tls(RARG0)
398 MOVQ g(RARG0), RARG0
399 MOVQ g_m(RARG0), RARG0
400 MOVQ m_p(RARG0), RARG0
401 MOVQ p_raceprocctx(RARG0), RARG0
402 MOVQ RARG0, (RARG1)
403 RET
404
405 rest:
406 // Save callee-saved registers (Go code won't respect that).
407 // This is superset of darwin/linux/windows registers.
408 PUSHQ BX
409 PUSHQ BP
410 PUSHQ DI
411 PUSHQ SI
412 PUSHQ R12
413 PUSHQ R13
414 PUSHQ R14
415 PUSHQ R15
416 // Set g = g0.
417 get_tls(R12)
418 MOVQ g(R12), R13
419 MOVQ g_m(R13), R13
420 MOVQ m_g0(R13), R14
421 MOVQ R14, g(R12) // g = m->g0
422 PUSHQ RARG1 // func arg
423 PUSHQ RARG0 // func arg
424 CALL runtime·racecallback(SB)
425 POPQ R12
426 POPQ R12
427 // All registers are smashed after Go code, reload.
428 get_tls(R12)
429 MOVQ g(R12), R13
430 MOVQ g_m(R13), R13
431 MOVQ m_curg(R13), R14
432 MOVQ R14, g(R12) // g = m->curg
433 // Restore callee-saved registers.
434 POPQ R15
435 POPQ R14
436 POPQ R13
437 POPQ R12
438 POPQ SI
439 POPQ DI
440 POPQ BP
441 POPQ BX
442 RET
View as plain text