Text file src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9
10 // _rt0_amd64 is common startup code for most amd64 systems when using
11 // internal linking. This is the entry point for the program from the
12 // kernel for an ordinary -buildmode=exe program. The stack holds the
13 // number of arguments and the C-style argv.
14 TEXT _rt0_amd64(SB),NOSPLIT,$-8
15 MOVQ 0(SP), DI // argc
16 LEAQ 8(SP), SI // argv
17 JMP runtime·rt0_go(SB)
18
19 // main is common startup code for most amd64 systems when using
20 // external linking. The C startup code will call the symbol "main"
21 // passing argc and argv in the usual C ABI registers DI and SI.
22 TEXT main(SB),NOSPLIT,$-8
23 JMP runtime·rt0_go(SB)
24
25 // _rt0_amd64_lib is common startup code for most amd64 systems when
26 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
27 // arrange to invoke this function as a global constructor (for
28 // c-archive) or when the shared library is loaded (for c-shared).
29 // We expect argc and argv to be passed in the usual C ABI registers
30 // DI and SI.
31 TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
32 // Align stack per ELF ABI requirements.
33 MOVQ SP, AX
34 ANDQ $~15, SP
35 // Save C ABI callee-saved registers, as caller may need them.
36 MOVQ BX, 0x10(SP)
37 MOVQ BP, 0x18(SP)
38 MOVQ R12, 0x20(SP)
39 MOVQ R13, 0x28(SP)
40 MOVQ R14, 0x30(SP)
41 MOVQ R15, 0x38(SP)
42 MOVQ AX, 0x40(SP)
43
44 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
45 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
46
47 // Synchronous initialization.
48 CALL runtime·libpreinit(SB)
49
50 // Create a new thread to finish Go runtime initialization.
51 MOVQ _cgo_sys_thread_create(SB), AX
52 TESTQ AX, AX
53 JZ nocgo
54 MOVQ $_rt0_amd64_lib_go(SB), DI
55 MOVQ $0, SI
56 CALL AX
57 JMP restore
58
59 nocgo:
60 MOVQ $0x800000, 0(SP) // stacksize
61 MOVQ $_rt0_amd64_lib_go(SB), AX
62 MOVQ AX, 8(SP) // fn
63 CALL runtime·newosproc0(SB)
64
65 restore:
66 MOVQ 0x10(SP), BX
67 MOVQ 0x18(SP), BP
68 MOVQ 0x20(SP), R12
69 MOVQ 0x28(SP), R13
70 MOVQ 0x30(SP), R14
71 MOVQ 0x38(SP), R15
72 MOVQ 0x40(SP), SP
73 RET
74
75 // _rt0_amd64_lib_go initializes the Go runtime.
76 // This is started in a separate thread by _rt0_amd64_lib.
77 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
78 MOVQ _rt0_amd64_lib_argc<>(SB), DI
79 MOVQ _rt0_amd64_lib_argv<>(SB), SI
80 JMP runtime·rt0_go(SB)
81
82 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
83 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
84 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
85 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
86
87 TEXT runtime·rt0_go(SB),NOSPLIT,$0
88 // copy arguments forward on an even stack
89 MOVQ DI, AX // argc
90 MOVQ SI, BX // argv
91 SUBQ $(4*8+7), SP // 2args 2auto
92 ANDQ $~15, SP
93 MOVQ AX, 16(SP)
94 MOVQ BX, 24(SP)
95
96 // create istack out of the given (operating system) stack.
97 // _cgo_init may update stackguard.
98 MOVQ $runtime·g0(SB), DI
99 LEAQ (-64*1024+104)(SP), BX
100 MOVQ BX, g_stackguard0(DI)
101 MOVQ BX, g_stackguard1(DI)
102 MOVQ BX, (g_stack+stack_lo)(DI)
103 MOVQ SP, (g_stack+stack_hi)(DI)
104
105 // find out information about the processor we're on
106 MOVL $0, AX
107 CPUID
108 MOVL AX, SI
109 CMPL AX, $0
110 JE nocpuinfo
111
112 // Figure out how to serialize RDTSC.
113 // On Intel processors LFENCE is enough. AMD requires MFENCE.
114 // Don't know about the rest, so let's do MFENCE.
115 CMPL BX, $0x756E6547 // "Genu"
116 JNE notintel
117 CMPL DX, $0x49656E69 // "ineI"
118 JNE notintel
119 CMPL CX, $0x6C65746E // "ntel"
120 JNE notintel
121 MOVB $1, runtime·isIntel(SB)
122 MOVB $1, runtime·lfenceBeforeRdtsc(SB)
123 notintel:
124
125 // Load EAX=1 cpuid flags
126 MOVL $1, AX
127 CPUID
128 MOVL AX, runtime·processorVersionInfo(SB)
129
130 nocpuinfo:
131 // if there is an _cgo_init, call it.
132 MOVQ _cgo_init(SB), AX
133 TESTQ AX, AX
134 JZ needtls
135 // arg 1: g0, already in DI
136 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
137 #ifdef GOOS_android
138 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
139 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
140 // Compensate for tls_g (+16).
141 MOVQ -16(TLS), CX
142 #else
143 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
144 MOVQ $0, CX
145 #endif
146 #ifdef GOOS_windows
147 // Adjust for the Win64 calling convention.
148 MOVQ CX, R9 // arg 4
149 MOVQ DX, R8 // arg 3
150 MOVQ SI, DX // arg 2
151 MOVQ DI, CX // arg 1
152 #endif
153 CALL AX
154
155 // update stackguard after _cgo_init
156 MOVQ $runtime·g0(SB), CX
157 MOVQ (g_stack+stack_lo)(CX), AX
158 ADDQ $const__StackGuard, AX
159 MOVQ AX, g_stackguard0(CX)
160 MOVQ AX, g_stackguard1(CX)
161
162 #ifndef GOOS_windows
163 JMP ok
164 #endif
165 needtls:
166 #ifdef GOOS_plan9
167 // skip TLS setup on Plan 9
168 JMP ok
169 #endif
170 #ifdef GOOS_solaris
171 // skip TLS setup on Solaris
172 JMP ok
173 #endif
174 #ifdef GOOS_illumos
175 // skip TLS setup on illumos
176 JMP ok
177 #endif
178 #ifdef GOOS_darwin
179 // skip TLS setup on Darwin
180 JMP ok
181 #endif
182
183 LEAQ runtime·m0+m_tls(SB), DI
184 CALL runtime·settls(SB)
185
186 // store through it, to make sure it works
187 get_tls(BX)
188 MOVQ $0x123, g(BX)
189 MOVQ runtime·m0+m_tls(SB), AX
190 CMPQ AX, $0x123
191 JEQ 2(PC)
192 CALL runtime·abort(SB)
193 ok:
194 // set the per-goroutine and per-mach "registers"
195 get_tls(BX)
196 LEAQ runtime·g0(SB), CX
197 MOVQ CX, g(BX)
198 LEAQ runtime·m0(SB), AX
199
200 // save m->g0 = g0
201 MOVQ CX, m_g0(AX)
202 // save m0 to g0->m
203 MOVQ AX, g_m(CX)
204
205 CLD // convention is D is always left cleared
206 CALL runtime·check(SB)
207
208 MOVL 16(SP), AX // copy argc
209 MOVL AX, 0(SP)
210 MOVQ 24(SP), AX // copy argv
211 MOVQ AX, 8(SP)
212 CALL runtime·args(SB)
213 CALL runtime·osinit(SB)
214 CALL runtime·schedinit(SB)
215
216 // create a new goroutine to start program
217 MOVQ $runtime·mainPC(SB), AX // entry
218 PUSHQ AX
219 PUSHQ $0 // arg size
220 CALL runtime·newproc(SB)
221 POPQ AX
222 POPQ AX
223
224 // start this M
225 CALL runtime·mstart(SB)
226
227 CALL runtime·abort(SB) // mstart should never return
228 RET
229
230 // Prevent dead-code elimination of debugCallV1, which is
231 // intended to be called by debuggers.
232 MOVQ $runtime·debugCallV1(SB), AX
233 RET
234
235 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB)
236 GLOBL runtime·mainPC(SB),RODATA,$8
237
238 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
239 BYTE $0xcc
240 RET
241
242 TEXT runtime·asminit(SB),NOSPLIT,$0-0
243 // No per-thread init.
244 RET
245
246 /*
247 * go-routine
248 */
249
250 // func gosave(buf *gobuf)
251 // save state in Gobuf; setjmp
252 TEXT runtime·gosave(SB), NOSPLIT, $0-8
253 MOVQ buf+0(FP), AX // gobuf
254 LEAQ buf+0(FP), BX // caller's SP
255 MOVQ BX, gobuf_sp(AX)
256 MOVQ 0(SP), BX // caller's PC
257 MOVQ BX, gobuf_pc(AX)
258 MOVQ $0, gobuf_ret(AX)
259 MOVQ BP, gobuf_bp(AX)
260 // Assert ctxt is zero. See func save.
261 MOVQ gobuf_ctxt(AX), BX
262 TESTQ BX, BX
263 JZ 2(PC)
264 CALL runtime·badctxt(SB)
265 get_tls(CX)
266 MOVQ g(CX), BX
267 MOVQ BX, gobuf_g(AX)
268 RET
269
270 // func gogo(buf *gobuf)
271 // restore state from Gobuf; longjmp
272 TEXT runtime·gogo(SB), NOSPLIT, $16-8
273 MOVQ buf+0(FP), BX // gobuf
274 MOVQ gobuf_g(BX), DX
275 MOVQ 0(DX), CX // make sure g != nil
276 get_tls(CX)
277 MOVQ DX, g(CX)
278 MOVQ gobuf_sp(BX), SP // restore SP
279 MOVQ gobuf_ret(BX), AX
280 MOVQ gobuf_ctxt(BX), DX
281 MOVQ gobuf_bp(BX), BP
282 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
283 MOVQ $0, gobuf_ret(BX)
284 MOVQ $0, gobuf_ctxt(BX)
285 MOVQ $0, gobuf_bp(BX)
286 MOVQ gobuf_pc(BX), BX
287 JMP BX
288
289 // func mcall(fn func(*g))
290 // Switch to m->g0's stack, call fn(g).
291 // Fn must never return. It should gogo(&g->sched)
292 // to keep running g.
293 TEXT runtime·mcall(SB), NOSPLIT, $0-8
294 MOVQ fn+0(FP), DI
295
296 get_tls(CX)
297 MOVQ g(CX), AX // save state in g->sched
298 MOVQ 0(SP), BX // caller's PC
299 MOVQ BX, (g_sched+gobuf_pc)(AX)
300 LEAQ fn+0(FP), BX // caller's SP
301 MOVQ BX, (g_sched+gobuf_sp)(AX)
302 MOVQ AX, (g_sched+gobuf_g)(AX)
303 MOVQ BP, (g_sched+gobuf_bp)(AX)
304
305 // switch to m->g0 & its stack, call fn
306 MOVQ g(CX), BX
307 MOVQ g_m(BX), BX
308 MOVQ m_g0(BX), SI
309 CMPQ SI, AX // if g == m->g0 call badmcall
310 JNE 3(PC)
311 MOVQ $runtime·badmcall(SB), AX
312 JMP AX
313 MOVQ SI, g(CX) // g = m->g0
314 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
315 PUSHQ AX
316 MOVQ DI, DX
317 MOVQ 0(DI), DI
318 CALL DI
319 POPQ AX
320 MOVQ $runtime·badmcall2(SB), AX
321 JMP AX
322 RET
323
324 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
325 // of the G stack. We need to distinguish the routine that
326 // lives at the bottom of the G stack from the one that lives
327 // at the top of the system stack because the one at the top of
328 // the system stack terminates the stack walk (see topofstack()).
329 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
330 RET
331
332 // func systemstack(fn func())
333 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
334 MOVQ fn+0(FP), DI // DI = fn
335 get_tls(CX)
336 MOVQ g(CX), AX // AX = g
337 MOVQ g_m(AX), BX // BX = m
338
339 CMPQ AX, m_gsignal(BX)
340 JEQ noswitch
341
342 MOVQ m_g0(BX), DX // DX = g0
343 CMPQ AX, DX
344 JEQ noswitch
345
346 CMPQ AX, m_curg(BX)
347 JNE bad
348
349 // switch stacks
350 // save our state in g->sched. Pretend to
351 // be systemstack_switch if the G stack is scanned.
352 MOVQ $runtime·systemstack_switch(SB), SI
353 MOVQ SI, (g_sched+gobuf_pc)(AX)
354 MOVQ SP, (g_sched+gobuf_sp)(AX)
355 MOVQ AX, (g_sched+gobuf_g)(AX)
356 MOVQ BP, (g_sched+gobuf_bp)(AX)
357
358 // switch to g0
359 MOVQ DX, g(CX)
360 MOVQ (g_sched+gobuf_sp)(DX), BX
361 // make it look like mstart called systemstack on g0, to stop traceback
362 SUBQ $8, BX
363 MOVQ $runtime·mstart(SB), DX
364 MOVQ DX, 0(BX)
365 MOVQ BX, SP
366
367 // call target function
368 MOVQ DI, DX
369 MOVQ 0(DI), DI
370 CALL DI
371
372 // switch back to g
373 get_tls(CX)
374 MOVQ g(CX), AX
375 MOVQ g_m(AX), BX
376 MOVQ m_curg(BX), AX
377 MOVQ AX, g(CX)
378 MOVQ (g_sched+gobuf_sp)(AX), SP
379 MOVQ $0, (g_sched+gobuf_sp)(AX)
380 RET
381
382 noswitch:
383 // already on m stack; tail call the function
384 // Using a tail call here cleans up tracebacks since we won't stop
385 // at an intermediate systemstack.
386 MOVQ DI, DX
387 MOVQ 0(DI), DI
388 JMP DI
389
390 bad:
391 // Bad: g is not gsignal, not g0, not curg. What is it?
392 MOVQ $runtime·badsystemstack(SB), AX
393 CALL AX
394 INT $3
395
396
397 /*
398 * support for morestack
399 */
400
401 // Called during function prolog when more stack is needed.
402 //
403 // The traceback routines see morestack on a g0 as being
404 // the top of a stack (for example, morestack calling newstack
405 // calling the scheduler calling newm calling gc), so we must
406 // record an argument size. For that purpose, it has no arguments.
407 TEXT runtime·morestack(SB),NOSPLIT,$0-0
408 // Cannot grow scheduler stack (m->g0).
409 get_tls(CX)
410 MOVQ g(CX), BX
411 MOVQ g_m(BX), BX
412 MOVQ m_g0(BX), SI
413 CMPQ g(CX), SI
414 JNE 3(PC)
415 CALL runtime·badmorestackg0(SB)
416 CALL runtime·abort(SB)
417
418 // Cannot grow signal stack (m->gsignal).
419 MOVQ m_gsignal(BX), SI
420 CMPQ g(CX), SI
421 JNE 3(PC)
422 CALL runtime·badmorestackgsignal(SB)
423 CALL runtime·abort(SB)
424
425 // Called from f.
426 // Set m->morebuf to f's caller.
427 NOP SP // tell vet SP changed - stop checking offsets
428 MOVQ 8(SP), AX // f's caller's PC
429 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
430 LEAQ 16(SP), AX // f's caller's SP
431 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
432 get_tls(CX)
433 MOVQ g(CX), SI
434 MOVQ SI, (m_morebuf+gobuf_g)(BX)
435
436 // Set g->sched to context in f.
437 MOVQ 0(SP), AX // f's PC
438 MOVQ AX, (g_sched+gobuf_pc)(SI)
439 MOVQ SI, (g_sched+gobuf_g)(SI)
440 LEAQ 8(SP), AX // f's SP
441 MOVQ AX, (g_sched+gobuf_sp)(SI)
442 MOVQ BP, (g_sched+gobuf_bp)(SI)
443 MOVQ DX, (g_sched+gobuf_ctxt)(SI)
444
445 // Call newstack on m->g0's stack.
446 MOVQ m_g0(BX), BX
447 MOVQ BX, g(CX)
448 MOVQ (g_sched+gobuf_sp)(BX), SP
449 CALL runtime·newstack(SB)
450 CALL runtime·abort(SB) // crash if newstack returns
451 RET
452
453 // morestack but not preserving ctxt.
454 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
455 MOVL $0, DX
456 JMP runtime·morestack(SB)
457
458 // reflectcall: call a function with the given argument list
459 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
460 // we don't have variable-sized frames, so we use a small number
461 // of constant-sized-frame functions to encode a few bits of size in the pc.
462 // Caution: ugly multiline assembly macros in your future!
463
464 #define DISPATCH(NAME,MAXSIZE) \
465 CMPQ CX, $MAXSIZE; \
466 JA 3(PC); \
467 MOVQ $NAME(SB), AX; \
468 JMP AX
469 // Note: can't just "JMP NAME(SB)" - bad inlining results.
470
471 TEXT ·reflectcall(SB), NOSPLIT, $0-32
472 MOVLQZX argsize+24(FP), CX
473 DISPATCH(runtime·call32, 32)
474 DISPATCH(runtime·call64, 64)
475 DISPATCH(runtime·call128, 128)
476 DISPATCH(runtime·call256, 256)
477 DISPATCH(runtime·call512, 512)
478 DISPATCH(runtime·call1024, 1024)
479 DISPATCH(runtime·call2048, 2048)
480 DISPATCH(runtime·call4096, 4096)
481 DISPATCH(runtime·call8192, 8192)
482 DISPATCH(runtime·call16384, 16384)
483 DISPATCH(runtime·call32768, 32768)
484 DISPATCH(runtime·call65536, 65536)
485 DISPATCH(runtime·call131072, 131072)
486 DISPATCH(runtime·call262144, 262144)
487 DISPATCH(runtime·call524288, 524288)
488 DISPATCH(runtime·call1048576, 1048576)
489 DISPATCH(runtime·call2097152, 2097152)
490 DISPATCH(runtime·call4194304, 4194304)
491 DISPATCH(runtime·call8388608, 8388608)
492 DISPATCH(runtime·call16777216, 16777216)
493 DISPATCH(runtime·call33554432, 33554432)
494 DISPATCH(runtime·call67108864, 67108864)
495 DISPATCH(runtime·call134217728, 134217728)
496 DISPATCH(runtime·call268435456, 268435456)
497 DISPATCH(runtime·call536870912, 536870912)
498 DISPATCH(runtime·call1073741824, 1073741824)
499 MOVQ $runtime·badreflectcall(SB), AX
500 JMP AX
501
502 #define CALLFN(NAME,MAXSIZE) \
503 TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \
504 NO_LOCAL_POINTERS; \
505 /* copy arguments to stack */ \
506 MOVQ argptr+16(FP), SI; \
507 MOVLQZX argsize+24(FP), CX; \
508 MOVQ SP, DI; \
509 REP;MOVSB; \
510 /* call function */ \
511 MOVQ f+8(FP), DX; \
512 PCDATA $PCDATA_StackMapIndex, $0; \
513 CALL (DX); \
514 /* copy return values back */ \
515 MOVQ argtype+0(FP), DX; \
516 MOVQ argptr+16(FP), DI; \
517 MOVLQZX argsize+24(FP), CX; \
518 MOVLQZX retoffset+28(FP), BX; \
519 MOVQ SP, SI; \
520 ADDQ BX, DI; \
521 ADDQ BX, SI; \
522 SUBQ BX, CX; \
523 CALL callRet<>(SB); \
524 RET
525
526 // callRet copies return values back at the end of call*. This is a
527 // separate function so it can allocate stack space for the arguments
528 // to reflectcallmove. It does not follow the Go ABI; it expects its
529 // arguments in registers.
530 TEXT callRet<>(SB), NOSPLIT, $32-0
531 NO_LOCAL_POINTERS
532 MOVQ DX, 0(SP)
533 MOVQ DI, 8(SP)
534 MOVQ SI, 16(SP)
535 MOVQ CX, 24(SP)
536 CALL runtime·reflectcallmove(SB)
537 RET
538
539 CALLFN(·call32, 32)
540 CALLFN(·call64, 64)
541 CALLFN(·call128, 128)
542 CALLFN(·call256, 256)
543 CALLFN(·call512, 512)
544 CALLFN(·call1024, 1024)
545 CALLFN(·call2048, 2048)
546 CALLFN(·call4096, 4096)
547 CALLFN(·call8192, 8192)
548 CALLFN(·call16384, 16384)
549 CALLFN(·call32768, 32768)
550 CALLFN(·call65536, 65536)
551 CALLFN(·call131072, 131072)
552 CALLFN(·call262144, 262144)
553 CALLFN(·call524288, 524288)
554 CALLFN(·call1048576, 1048576)
555 CALLFN(·call2097152, 2097152)
556 CALLFN(·call4194304, 4194304)
557 CALLFN(·call8388608, 8388608)
558 CALLFN(·call16777216, 16777216)
559 CALLFN(·call33554432, 33554432)
560 CALLFN(·call67108864, 67108864)
561 CALLFN(·call134217728, 134217728)
562 CALLFN(·call268435456, 268435456)
563 CALLFN(·call536870912, 536870912)
564 CALLFN(·call1073741824, 1073741824)
565
566 TEXT runtime·procyield(SB),NOSPLIT,$0-0
567 MOVL cycles+0(FP), AX
568 again:
569 PAUSE
570 SUBL $1, AX
571 JNZ again
572 RET
573
574
575 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
576 // Stores are already ordered on x86, so this is just a
577 // compile barrier.
578 RET
579
580 // func jmpdefer(fv *funcval, argp uintptr)
581 // argp is a caller SP.
582 // called from deferreturn.
583 // 1. pop the caller
584 // 2. sub 5 bytes from the callers return
585 // 3. jmp to the argument
586 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
587 MOVQ fv+0(FP), DX // fn
588 MOVQ argp+8(FP), BX // caller sp
589 LEAQ -8(BX), SP // caller sp after CALL
590 MOVQ -8(SP), BP // restore BP as if deferreturn returned (harmless if framepointers not in use)
591 SUBQ $5, (SP) // return to CALL again
592 MOVQ 0(DX), BX
593 JMP BX // but first run the deferred function
594
595 // Save state of caller into g->sched. Smashes R8, R9.
596 TEXT gosave<>(SB),NOSPLIT,$0
597 get_tls(R8)
598 MOVQ g(R8), R8
599 MOVQ 0(SP), R9
600 MOVQ R9, (g_sched+gobuf_pc)(R8)
601 LEAQ 8(SP), R9
602 MOVQ R9, (g_sched+gobuf_sp)(R8)
603 MOVQ $0, (g_sched+gobuf_ret)(R8)
604 MOVQ BP, (g_sched+gobuf_bp)(R8)
605 // Assert ctxt is zero. See func save.
606 MOVQ (g_sched+gobuf_ctxt)(R8), R9
607 TESTQ R9, R9
608 JZ 2(PC)
609 CALL runtime·badctxt(SB)
610 RET
611
612 // func asmcgocall(fn, arg unsafe.Pointer) int32
613 // Call fn(arg) on the scheduler stack,
614 // aligned appropriately for the gcc ABI.
615 // See cgocall.go for more details.
616 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
617 MOVQ fn+0(FP), AX
618 MOVQ arg+8(FP), BX
619
620 MOVQ SP, DX
621
622 // Figure out if we need to switch to m->g0 stack.
623 // We get called to create new OS threads too, and those
624 // come in on the m->g0 stack already.
625 get_tls(CX)
626 MOVQ g(CX), R8
627 CMPQ R8, $0
628 JEQ nosave
629 MOVQ g_m(R8), R8
630 MOVQ m_g0(R8), SI
631 MOVQ g(CX), DI
632 CMPQ SI, DI
633 JEQ nosave
634 MOVQ m_gsignal(R8), SI
635 CMPQ SI, DI
636 JEQ nosave
637
638 // Switch to system stack.
639 MOVQ m_g0(R8), SI
640 CALL gosave<>(SB)
641 MOVQ SI, g(CX)
642 MOVQ (g_sched+gobuf_sp)(SI), SP
643
644 // Now on a scheduling stack (a pthread-created stack).
645 // Make sure we have enough room for 4 stack-backed fast-call
646 // registers as per windows amd64 calling convention.
647 SUBQ $64, SP
648 ANDQ $~15, SP // alignment for gcc ABI
649 MOVQ DI, 48(SP) // save g
650 MOVQ (g_stack+stack_hi)(DI), DI
651 SUBQ DX, DI
652 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
653 MOVQ BX, DI // DI = first argument in AMD64 ABI
654 MOVQ BX, CX // CX = first argument in Win64
655 CALL AX
656
657 // Restore registers, g, stack pointer.
658 get_tls(CX)
659 MOVQ 48(SP), DI
660 MOVQ (g_stack+stack_hi)(DI), SI
661 SUBQ 40(SP), SI
662 MOVQ DI, g(CX)
663 MOVQ SI, SP
664
665 MOVL AX, ret+16(FP)
666 RET
667
668 nosave:
669 // Running on a system stack, perhaps even without a g.
670 // Having no g can happen during thread creation or thread teardown
671 // (see needm/dropm on Solaris, for example).
672 // This code is like the above sequence but without saving/restoring g
673 // and without worrying about the stack moving out from under us
674 // (because we're on a system stack, not a goroutine stack).
675 // The above code could be used directly if already on a system stack,
676 // but then the only path through this code would be a rare case on Solaris.
677 // Using this code for all "already on system stack" calls exercises it more,
678 // which should help keep it correct.
679 SUBQ $64, SP
680 ANDQ $~15, SP
681 MOVQ $0, 48(SP) // where above code stores g, in case someone looks during debugging
682 MOVQ DX, 40(SP) // save original stack pointer
683 MOVQ BX, DI // DI = first argument in AMD64 ABI
684 MOVQ BX, CX // CX = first argument in Win64
685 CALL AX
686 MOVQ 40(SP), SI // restore original stack pointer
687 MOVQ SI, SP
688 MOVL AX, ret+16(FP)
689 RET
690
691 // func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr)
692 // Turn the fn into a Go func (by taking its address) and call
693 // cgocallback_gofunc.
694 TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
695 LEAQ fn+0(FP), AX
696 MOVQ AX, 0(SP)
697 MOVQ frame+8(FP), AX
698 MOVQ AX, 8(SP)
699 MOVQ framesize+16(FP), AX
700 MOVQ AX, 16(SP)
701 MOVQ ctxt+24(FP), AX
702 MOVQ AX, 24(SP)
703 MOVQ $runtime·cgocallback_gofunc(SB), AX
704 CALL AX
705 RET
706
707 // func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr)
708 // See cgocall.go for more details.
709 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
710 NO_LOCAL_POINTERS
711
712 // If g is nil, Go did not create the current thread.
713 // Call needm to obtain one m for temporary use.
714 // In this case, we're running on the thread stack, so there's
715 // lots of space, but the linker doesn't know. Hide the call from
716 // the linker analysis by using an indirect call through AX.
717 get_tls(CX)
718 #ifdef GOOS_windows
719 MOVL $0, BX
720 CMPQ CX, $0
721 JEQ 2(PC)
722 #endif
723 MOVQ g(CX), BX
724 CMPQ BX, $0
725 JEQ needm
726 MOVQ g_m(BX), BX
727 MOVQ BX, R8 // holds oldm until end of function
728 JMP havem
729 needm:
730 MOVQ $0, 0(SP)
731 MOVQ $runtime·needm(SB), AX
732 CALL AX
733 MOVQ 0(SP), R8
734 get_tls(CX)
735 MOVQ g(CX), BX
736 MOVQ g_m(BX), BX
737
738 // Set m->sched.sp = SP, so that if a panic happens
739 // during the function we are about to execute, it will
740 // have a valid SP to run on the g0 stack.
741 // The next few lines (after the havem label)
742 // will save this SP onto the stack and then write
743 // the same SP back to m->sched.sp. That seems redundant,
744 // but if an unrecovered panic happens, unwindm will
745 // restore the g->sched.sp from the stack location
746 // and then systemstack will try to use it. If we don't set it here,
747 // that restored SP will be uninitialized (typically 0) and
748 // will not be usable.
749 MOVQ m_g0(BX), SI
750 MOVQ SP, (g_sched+gobuf_sp)(SI)
751
752 havem:
753 // Now there's a valid m, and we're running on its m->g0.
754 // Save current m->g0->sched.sp on stack and then set it to SP.
755 // Save current sp in m->g0->sched.sp in preparation for
756 // switch back to m->curg stack.
757 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
758 MOVQ m_g0(BX), SI
759 MOVQ (g_sched+gobuf_sp)(SI), AX
760 MOVQ AX, 0(SP)
761 MOVQ SP, (g_sched+gobuf_sp)(SI)
762
763 // Switch to m->curg stack and call runtime.cgocallbackg.
764 // Because we are taking over the execution of m->curg
765 // but *not* resuming what had been running, we need to
766 // save that information (m->curg->sched) so we can restore it.
767 // We can restore m->curg->sched.sp easily, because calling
768 // runtime.cgocallbackg leaves SP unchanged upon return.
769 // To save m->curg->sched.pc, we push it onto the stack.
770 // This has the added benefit that it looks to the traceback
771 // routine like cgocallbackg is going to return to that
772 // PC (because the frame we allocate below has the same
773 // size as cgocallback_gofunc's frame declared above)
774 // so that the traceback will seamlessly trace back into
775 // the earlier calls.
776 //
777 // In the new goroutine, 8(SP) holds the saved R8.
778 MOVQ m_curg(BX), SI
779 MOVQ SI, g(CX)
780 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
781 MOVQ (g_sched+gobuf_pc)(SI), BX
782 MOVQ BX, -8(DI)
783 // Compute the size of the frame, including return PC and, if
784 // GOEXPERIMENT=framepointer, the saved base pointer
785 MOVQ ctxt+24(FP), BX
786 LEAQ fv+0(FP), AX
787 SUBQ SP, AX
788 SUBQ AX, DI
789 MOVQ DI, SP
790
791 MOVQ R8, 8(SP)
792 MOVQ BX, 0(SP)
793 CALL runtime·cgocallbackg(SB)
794 MOVQ 8(SP), R8
795
796 // Compute the size of the frame again. FP and SP have
797 // completely different values here than they did above,
798 // but only their difference matters.
799 LEAQ fv+0(FP), AX
800 SUBQ SP, AX
801
802 // Restore g->sched (== m->curg->sched) from saved values.
803 get_tls(CX)
804 MOVQ g(CX), SI
805 MOVQ SP, DI
806 ADDQ AX, DI
807 MOVQ -8(DI), BX
808 MOVQ BX, (g_sched+gobuf_pc)(SI)
809 MOVQ DI, (g_sched+gobuf_sp)(SI)
810
811 // Switch back to m->g0's stack and restore m->g0->sched.sp.
812 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
813 // so we do not have to restore it.)
814 MOVQ g(CX), BX
815 MOVQ g_m(BX), BX
816 MOVQ m_g0(BX), SI
817 MOVQ SI, g(CX)
818 MOVQ (g_sched+gobuf_sp)(SI), SP
819 MOVQ 0(SP), AX
820 MOVQ AX, (g_sched+gobuf_sp)(SI)
821
822 // If the m on entry was nil, we called needm above to borrow an m
823 // for the duration of the call. Since the call is over, return it with dropm.
824 CMPQ R8, $0
825 JNE 3(PC)
826 MOVQ $runtime·dropm(SB), AX
827 CALL AX
828
829 // Done!
830 RET
831
832 // func setg(gg *g)
833 // set g. for use by needm.
834 TEXT runtime·setg(SB), NOSPLIT, $0-8
835 MOVQ gg+0(FP), BX
836 #ifdef GOOS_windows
837 CMPQ BX, $0
838 JNE settls
839 MOVQ $0, 0x28(GS)
840 RET
841 settls:
842 MOVQ g_m(BX), AX
843 LEAQ m_tls(AX), AX
844 MOVQ AX, 0x28(GS)
845 #endif
846 get_tls(CX)
847 MOVQ BX, g(CX)
848 RET
849
850 // void setg_gcc(G*); set g called from gcc.
851 TEXT setg_gcc<>(SB),NOSPLIT,$0
852 get_tls(AX)
853 MOVQ DI, g(AX)
854 RET
855
856 TEXT runtime·abort(SB),NOSPLIT,$0-0
857 INT $3
858 loop:
859 JMP loop
860
861 // check that SP is in range [g->stack.lo, g->stack.hi)
862 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
863 get_tls(CX)
864 MOVQ g(CX), AX
865 CMPQ (g_stack+stack_hi)(AX), SP
866 JHI 2(PC)
867 CALL runtime·abort(SB)
868 CMPQ SP, (g_stack+stack_lo)(AX)
869 JHI 2(PC)
870 CALL runtime·abort(SB)
871 RET
872
873 // func cputicks() int64
874 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
875 CMPB runtime·lfenceBeforeRdtsc(SB), $1
876 JNE mfence
877 LFENCE
878 JMP done
879 mfence:
880 MFENCE
881 done:
882 RDTSC
883 SHLQ $32, DX
884 ADDQ DX, AX
885 MOVQ AX, ret+0(FP)
886 RET
887
888 // func aeshash(p unsafe.Pointer, h, s uintptr) uintptr
889 // hash function using AES hardware instructions
890 TEXT runtime·aeshash(SB),NOSPLIT,$0-32
891 MOVQ p+0(FP), AX // ptr to data
892 MOVQ s+16(FP), CX // size
893 LEAQ ret+24(FP), DX
894 JMP aeshashbody<>(SB)
895
896 // func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
897 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24
898 MOVQ p+0(FP), AX // ptr to string struct
899 MOVQ 8(AX), CX // length of string
900 MOVQ (AX), AX // string data
901 LEAQ ret+16(FP), DX
902 JMP aeshashbody<>(SB)
903
904 // AX: data
905 // CX: length
906 // DX: address to put return value
907 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
908 // Fill an SSE register with our seeds.
909 MOVQ h+8(FP), X0 // 64 bits of per-table hash seed
910 PINSRW $4, CX, X0 // 16 bits of length
911 PSHUFHW $0, X0, X0 // repeat length 4 times total
912 MOVO X0, X1 // save unscrambled seed
913 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
914 AESENC X0, X0 // scramble seed
915
916 CMPQ CX, $16
917 JB aes0to15
918 JE aes16
919 CMPQ CX, $32
920 JBE aes17to32
921 CMPQ CX, $64
922 JBE aes33to64
923 CMPQ CX, $128
924 JBE aes65to128
925 JMP aes129plus
926
927 aes0to15:
928 TESTQ CX, CX
929 JE aes0
930
931 ADDQ $16, AX
932 TESTW $0xff0, AX
933 JE endofpage
934
935 // 16 bytes loaded at this address won't cross
936 // a page boundary, so we can load it directly.
937 MOVOU -16(AX), X1
938 ADDQ CX, CX
939 MOVQ $masks<>(SB), AX
940 PAND (AX)(CX*8), X1
941 final1:
942 PXOR X0, X1 // xor data with seed
943 AESENC X1, X1 // scramble combo 3 times
944 AESENC X1, X1
945 AESENC X1, X1
946 MOVQ X1, (DX)
947 RET
948
949 endofpage:
950 // address ends in 1111xxxx. Might be up against
951 // a page boundary, so load ending at last byte.
952 // Then shift bytes down using pshufb.
953 MOVOU -32(AX)(CX*1), X1
954 ADDQ CX, CX
955 MOVQ $shifts<>(SB), AX
956 PSHUFB (AX)(CX*8), X1
957 JMP final1
958
959 aes0:
960 // Return scrambled input seed
961 AESENC X0, X0
962 MOVQ X0, (DX)
963 RET
964
965 aes16:
966 MOVOU (AX), X1
967 JMP final1
968
969 aes17to32:
970 // make second starting seed
971 PXOR runtime·aeskeysched+16(SB), X1
972 AESENC X1, X1
973
974 // load data to be hashed
975 MOVOU (AX), X2
976 MOVOU -16(AX)(CX*1), X3
977
978 // xor with seed
979 PXOR X0, X2
980 PXOR X1, X3
981
982 // scramble 3 times
983 AESENC X2, X2
984 AESENC X3, X3
985 AESENC X2, X2
986 AESENC X3, X3
987 AESENC X2, X2
988 AESENC X3, X3
989
990 // combine results
991 PXOR X3, X2
992 MOVQ X2, (DX)
993 RET
994
995 aes33to64:
996 // make 3 more starting seeds
997 MOVO X1, X2
998 MOVO X1, X3
999 PXOR runtime·aeskeysched+16(SB), X1
1000 PXOR runtime·aeskeysched+32(SB), X2
1001 PXOR runtime·aeskeysched+48(SB), X3
1002 AESENC X1, X1
1003 AESENC X2, X2
1004 AESENC X3, X3
1005
1006 MOVOU (AX), X4
1007 MOVOU 16(AX), X5
1008 MOVOU -32(AX)(CX*1), X6
1009 MOVOU -16(AX)(CX*1), X7
1010
1011 PXOR X0, X4
1012 PXOR X1, X5
1013 PXOR X2, X6
1014 PXOR X3, X7
1015
1016 AESENC X4, X4
1017 AESENC X5, X5
1018 AESENC X6, X6
1019 AESENC X7, X7
1020
1021 AESENC X4, X4
1022 AESENC X5, X5
1023 AESENC X6, X6
1024 AESENC X7, X7
1025
1026 AESENC X4, X4
1027 AESENC X5, X5
1028 AESENC X6, X6
1029 AESENC X7, X7
1030
1031 PXOR X6, X4
1032 PXOR X7, X5
1033 PXOR X5, X4
1034 MOVQ X4, (DX)
1035 RET
1036
1037 aes65to128:
1038 // make 7 more starting seeds
1039 MOVO X1, X2
1040 MOVO X1, X3
1041 MOVO X1, X4
1042 MOVO X1, X5
1043 MOVO X1, X6
1044 MOVO X1, X7
1045 PXOR runtime·aeskeysched+16(SB), X1
1046 PXOR runtime·aeskeysched+32(SB), X2
1047 PXOR runtime·aeskeysched+48(SB), X3
1048 PXOR runtime·aeskeysched+64(SB), X4
1049 PXOR runtime·aeskeysched+80(SB), X5
1050 PXOR runtime·aeskeysched+96(SB), X6
1051 PXOR runtime·aeskeysched+112(SB), X7
1052 AESENC X1, X1
1053 AESENC X2, X2
1054 AESENC X3, X3
1055 AESENC X4, X4
1056 AESENC X5, X5
1057 AESENC X6, X6
1058 AESENC X7, X7
1059
1060 // load data
1061 MOVOU (AX), X8
1062 MOVOU 16(AX), X9
1063 MOVOU 32(AX), X10
1064 MOVOU 48(AX), X11
1065 MOVOU -64(AX)(CX*1), X12
1066 MOVOU -48(AX)(CX*1), X13
1067 MOVOU -32(AX)(CX*1), X14
1068 MOVOU -16(AX)(CX*1), X15
1069
1070 // xor with seed
1071 PXOR X0, X8
1072 PXOR X1, X9
1073 PXOR X2, X10
1074 PXOR X3, X11
1075 PXOR X4, X12
1076 PXOR X5, X13
1077 PXOR X6, X14
1078 PXOR X7, X15
1079
1080 // scramble 3 times
1081 AESENC X8, X8
1082 AESENC X9, X9
1083 AESENC X10, X10
1084 AESENC X11, X11
1085 AESENC X12, X12
1086 AESENC X13, X13
1087 AESENC X14, X14
1088 AESENC X15, X15
1089
1090 AESENC X8, X8
1091 AESENC X9, X9
1092 AESENC X10, X10
1093 AESENC X11, X11
1094 AESENC X12, X12
1095 AESENC X13, X13
1096 AESENC X14, X14
1097 AESENC X15, X15
1098
1099 AESENC X8, X8
1100 AESENC X9, X9
1101 AESENC X10, X10
1102 AESENC X11, X11
1103 AESENC X12, X12
1104 AESENC X13, X13
1105 AESENC X14, X14
1106 AESENC X15, X15
1107
1108 // combine results
1109 PXOR X12, X8
1110 PXOR X13, X9
1111 PXOR X14, X10
1112 PXOR X15, X11
1113 PXOR X10, X8
1114 PXOR X11, X9
1115 PXOR X9, X8
1116 MOVQ X8, (DX)
1117 RET
1118
1119 aes129plus:
1120 // make 7 more starting seeds
1121 MOVO X1, X2
1122 MOVO X1, X3
1123 MOVO X1, X4
1124 MOVO X1, X5
1125 MOVO X1, X6
1126 MOVO X1, X7
1127 PXOR runtime·aeskeysched+16(SB), X1
1128 PXOR runtime·aeskeysched+32(SB), X2
1129 PXOR runtime·aeskeysched+48(SB), X3
1130 PXOR runtime·aeskeysched+64(SB), X4
1131 PXOR runtime·aeskeysched+80(SB), X5
1132 PXOR runtime·aeskeysched+96(SB), X6
1133 PXOR runtime·aeskeysched+112(SB), X7
1134 AESENC X1, X1
1135 AESENC X2, X2
1136 AESENC X3, X3
1137 AESENC X4, X4
1138 AESENC X5, X5
1139 AESENC X6, X6
1140 AESENC X7, X7
1141
1142 // start with last (possibly overlapping) block
1143 MOVOU -128(AX)(CX*1), X8
1144 MOVOU -112(AX)(CX*1), X9
1145 MOVOU -96(AX)(CX*1), X10
1146 MOVOU -80(AX)(CX*1), X11
1147 MOVOU -64(AX)(CX*1), X12
1148 MOVOU -48(AX)(CX*1), X13
1149 MOVOU -32(AX)(CX*1), X14
1150 MOVOU -16(AX)(CX*1), X15
1151
1152 // xor in seed
1153 PXOR X0, X8
1154 PXOR X1, X9
1155 PXOR X2, X10
1156 PXOR X3, X11
1157 PXOR X4, X12
1158 PXOR X5, X13
1159 PXOR X6, X14
1160 PXOR X7, X15
1161
1162 // compute number of remaining 128-byte blocks
1163 DECQ CX
1164 SHRQ $7, CX
1165
1166 aesloop:
1167 // scramble state
1168 AESENC X8, X8
1169 AESENC X9, X9
1170 AESENC X10, X10
1171 AESENC X11, X11
1172 AESENC X12, X12
1173 AESENC X13, X13
1174 AESENC X14, X14
1175 AESENC X15, X15
1176
1177 // scramble state, xor in a block
1178 MOVOU (AX), X0
1179 MOVOU 16(AX), X1
1180 MOVOU 32(AX), X2
1181 MOVOU 48(AX), X3
1182 AESENC X0, X8
1183 AESENC X1, X9
1184 AESENC X2, X10
1185 AESENC X3, X11
1186 MOVOU 64(AX), X4
1187 MOVOU 80(AX), X5
1188 MOVOU 96(AX), X6
1189 MOVOU 112(AX), X7
1190 AESENC X4, X12
1191 AESENC X5, X13
1192 AESENC X6, X14
1193 AESENC X7, X15
1194
1195 ADDQ $128, AX
1196 DECQ CX
1197 JNE aesloop
1198
1199 // 3 more scrambles to finish
1200 AESENC X8, X8
1201 AESENC X9, X9
1202 AESENC X10, X10
1203 AESENC X11, X11
1204 AESENC X12, X12
1205 AESENC X13, X13
1206 AESENC X14, X14
1207 AESENC X15, X15
1208 AESENC X8, X8
1209 AESENC X9, X9
1210 AESENC X10, X10
1211 AESENC X11, X11
1212 AESENC X12, X12
1213 AESENC X13, X13
1214 AESENC X14, X14
1215 AESENC X15, X15
1216 AESENC X8, X8
1217 AESENC X9, X9
1218 AESENC X10, X10
1219 AESENC X11, X11
1220 AESENC X12, X12
1221 AESENC X13, X13
1222 AESENC X14, X14
1223 AESENC X15, X15
1224
1225 PXOR X12, X8
1226 PXOR X13, X9
1227 PXOR X14, X10
1228 PXOR X15, X11
1229 PXOR X10, X8
1230 PXOR X11, X9
1231 PXOR X9, X8
1232 MOVQ X8, (DX)
1233 RET
1234
1235 // func aeshash32(p unsafe.Pointer, h uintptr) uintptr
1236 TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
1237 MOVQ p+0(FP), AX // ptr to data
1238 MOVQ h+8(FP), X0 // seed
1239 PINSRD $2, (AX), X0 // data
1240 AESENC runtime·aeskeysched+0(SB), X0
1241 AESENC runtime·aeskeysched+16(SB), X0
1242 AESENC runtime·aeskeysched+32(SB), X0
1243 MOVQ X0, ret+16(FP)
1244 RET
1245
1246 // func aeshash64(p unsafe.Pointer, h uintptr) uintptr
1247 TEXT runtime·aeshash64(SB),NOSPLIT,$0-24
1248 MOVQ p+0(FP), AX // ptr to data
1249 MOVQ h+8(FP), X0 // seed
1250 PINSRQ $1, (AX), X0 // data
1251 AESENC runtime·aeskeysched+0(SB), X0
1252 AESENC runtime·aeskeysched+16(SB), X0
1253 AESENC runtime·aeskeysched+32(SB), X0
1254 MOVQ X0, ret+16(FP)
1255 RET
1256
1257 // simple mask to get rid of data in the high part of the register.
1258 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1259 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1260 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1261 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1262 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1263 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1264 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1265 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1266 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1267 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1268 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1269 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1270 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1271 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1272 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1273 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1274 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1275 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1276 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1277 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1278 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1279 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1280 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1281 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1282 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1283 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1284 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1285 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1286 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1287 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1288 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1289 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1290 GLOBL masks<>(SB),RODATA,$256
1291
1292 // func checkASM() bool
1293 TEXT ·checkASM(SB),NOSPLIT,$0-1
1294 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1295 MOVQ $masks<>(SB), AX
1296 MOVQ $shifts<>(SB), BX
1297 ORQ BX, AX
1298 TESTQ $15, AX
1299 SETEQ ret+0(FP)
1300 RET
1301
1302 // these are arguments to pshufb. They move data down from
1303 // the high bytes of the register to the low bytes of the register.
1304 // index is how many bytes to move.
1305 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1306 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1307 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1308 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1309 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1310 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1311 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1312 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1313 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1314 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1315 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1316 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1317 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1318 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1319 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1320 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1321 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1322 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1323 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1324 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1325 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1326 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1327 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1328 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1329 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1330 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1331 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1332 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1333 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1334 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1335 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1336 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1337 GLOBL shifts<>(SB),RODATA,$256
1338
1339 TEXT runtime·return0(SB), NOSPLIT, $0
1340 MOVL $0, AX
1341 RET
1342
1343
1344 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1345 // Must obey the gcc calling convention.
1346 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1347 get_tls(CX)
1348 MOVQ g(CX), AX
1349 MOVQ g_m(AX), AX
1350 MOVQ m_curg(AX), AX
1351 MOVQ (g_stack+stack_hi)(AX), AX
1352 RET
1353
1354 // The top-most function running on a goroutine
1355 // returns to goexit+PCQuantum.
1356 TEXT runtime·goexit(SB),NOSPLIT,$0-0
1357 BYTE $0x90 // NOP
1358 CALL runtime·goexit1(SB) // does not return
1359 // traceback from goexit1 must hit code range of goexit
1360 BYTE $0x90 // NOP
1361
1362 // This is called from .init_array and follows the platform, not Go, ABI.
1363 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1364 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1365 MOVQ runtime·lastmoduledatap(SB), AX
1366 MOVQ DI, moduledata_next(AX)
1367 MOVQ DI, runtime·lastmoduledatap(SB)
1368 POPQ R15
1369 RET
1370
1371 // gcWriteBarrier performs a heap pointer write and informs the GC.
1372 //
1373 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
1374 // - DI is the destination of the write
1375 // - AX is the value being written at DI
1376 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1377 // but may clobber others (e.g., SSE registers).
1378 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120
1379 // Save the registers clobbered by the fast path. This is slightly
1380 // faster than having the caller spill these.
1381 MOVQ R14, 104(SP)
1382 MOVQ R13, 112(SP)
1383 // TODO: Consider passing g.m.p in as an argument so they can be shared
1384 // across a sequence of write barriers.
1385 get_tls(R13)
1386 MOVQ g(R13), R13
1387 MOVQ g_m(R13), R13
1388 MOVQ m_p(R13), R13
1389 MOVQ (p_wbBuf+wbBuf_next)(R13), R14
1390 // Increment wbBuf.next position.
1391 LEAQ 16(R14), R14
1392 MOVQ R14, (p_wbBuf+wbBuf_next)(R13)
1393 CMPQ R14, (p_wbBuf+wbBuf_end)(R13)
1394 // Record the write.
1395 MOVQ AX, -16(R14) // Record value
1396 // Note: This turns bad pointer writes into bad
1397 // pointer reads, which could be confusing. We could avoid
1398 // reading from obviously bad pointers, which would
1399 // take care of the vast majority of these. We could
1400 // patch this up in the signal handler, or use XCHG to
1401 // combine the read and the write.
1402 MOVQ (DI), R13
1403 MOVQ R13, -8(R14) // Record *slot
1404 // Is the buffer full? (flags set in CMPQ above)
1405 JEQ flush
1406 ret:
1407 MOVQ 104(SP), R14
1408 MOVQ 112(SP), R13
1409 // Do the write.
1410 MOVQ AX, (DI)
1411 RET
1412
1413 flush:
1414 // Save all general purpose registers since these could be
1415 // clobbered by wbBufFlush and were not saved by the caller.
1416 // It is possible for wbBufFlush to clobber other registers
1417 // (e.g., SSE registers), but the compiler takes care of saving
1418 // those in the caller if necessary. This strikes a balance
1419 // with registers that are likely to be used.
1420 //
1421 // We don't have type information for these, but all code under
1422 // here is NOSPLIT, so nothing will observe these.
1423 //
1424 // TODO: We could strike a different balance; e.g., saving X0
1425 // and not saving GP registers that are less likely to be used.
1426 MOVQ DI, 0(SP) // Also first argument to wbBufFlush
1427 MOVQ AX, 8(SP) // Also second argument to wbBufFlush
1428 MOVQ BX, 16(SP)
1429 MOVQ CX, 24(SP)
1430 MOVQ DX, 32(SP)
1431 // DI already saved
1432 MOVQ SI, 40(SP)
1433 MOVQ BP, 48(SP)
1434 MOVQ R8, 56(SP)
1435 MOVQ R9, 64(SP)
1436 MOVQ R10, 72(SP)
1437 MOVQ R11, 80(SP)
1438 MOVQ R12, 88(SP)
1439 // R13 already saved
1440 // R14 already saved
1441 MOVQ R15, 96(SP)
1442
1443 // This takes arguments DI and AX
1444 CALL runtime·wbBufFlush(SB)
1445
1446 MOVQ 0(SP), DI
1447 MOVQ 8(SP), AX
1448 MOVQ 16(SP), BX
1449 MOVQ 24(SP), CX
1450 MOVQ 32(SP), DX
1451 MOVQ 40(SP), SI
1452 MOVQ 48(SP), BP
1453 MOVQ 56(SP), R8
1454 MOVQ 64(SP), R9
1455 MOVQ 72(SP), R10
1456 MOVQ 80(SP), R11
1457 MOVQ 88(SP), R12
1458 MOVQ 96(SP), R15
1459 JMP ret
1460
1461 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1462 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1463
1464 // debugCallV1 is the entry point for debugger-injected function
1465 // calls on running goroutines. It informs the runtime that a
1466 // debug call has been injected and creates a call frame for the
1467 // debugger to fill in.
1468 //
1469 // To inject a function call, a debugger should:
1470 // 1. Check that the goroutine is in state _Grunning and that
1471 // there are at least 256 bytes free on the stack.
1472 // 2. Push the current PC on the stack (updating SP).
1473 // 3. Write the desired argument frame size at SP-16 (using the SP
1474 // after step 2).
1475 // 4. Save all machine registers (including flags and XMM reigsters)
1476 // so they can be restored later by the debugger.
1477 // 5. Set the PC to debugCallV1 and resume execution.
1478 //
1479 // If the goroutine is in state _Grunnable, then it's not generally
1480 // safe to inject a call because it may return out via other runtime
1481 // operations. Instead, the debugger should unwind the stack to find
1482 // the return to non-runtime code, add a temporary breakpoint there,
1483 // and inject the call once that breakpoint is hit.
1484 //
1485 // If the goroutine is in any other state, it's not safe to inject a call.
1486 //
1487 // This function communicates back to the debugger by setting RAX and
1488 // invoking INT3 to raise a breakpoint signal. See the comments in the
1489 // implementation for the protocol the debugger is expected to
1490 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1491 //
1492 // The debugger must ensure that any pointers passed to the function
1493 // obey escape analysis requirements. Specifically, it must not pass
1494 // a stack pointer to an escaping argument. debugCallV1 cannot check
1495 // this invariant.
1496 TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0
1497 // Save all registers that may contain pointers in GC register
1498 // map order (see ssa.registersAMD64). This makes it possible
1499 // to copy the stack while updating pointers currently held in
1500 // registers, and for the GC to find roots in registers.
1501 //
1502 // We can't do anything that might clobber any of these
1503 // registers before this.
1504 MOVQ R15, r15-(14*8+8)(SP)
1505 MOVQ R14, r14-(13*8+8)(SP)
1506 MOVQ R13, r13-(12*8+8)(SP)
1507 MOVQ R12, r12-(11*8+8)(SP)
1508 MOVQ R11, r11-(10*8+8)(SP)
1509 MOVQ R10, r10-(9*8+8)(SP)
1510 MOVQ R9, r9-(8*8+8)(SP)
1511 MOVQ R8, r8-(7*8+8)(SP)
1512 MOVQ DI, di-(6*8+8)(SP)
1513 MOVQ SI, si-(5*8+8)(SP)
1514 MOVQ BP, bp-(4*8+8)(SP)
1515 MOVQ BX, bx-(3*8+8)(SP)
1516 MOVQ DX, dx-(2*8+8)(SP)
1517 // Save the frame size before we clobber it. Either of the last
1518 // saves could clobber this depending on whether there's a saved BP.
1519 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1520 MOVQ CX, cx-(1*8+8)(SP)
1521 MOVQ AX, ax-(0*8+8)(SP)
1522
1523 // Save the argument frame size.
1524 MOVQ DX, frameSize-128(SP)
1525
1526 // Perform a safe-point check.
1527 MOVQ retpc-8(FP), AX // Caller's PC
1528 MOVQ AX, 0(SP)
1529 CALL runtime·debugCallCheck(SB)
1530 MOVQ 8(SP), AX
1531 TESTQ AX, AX
1532 JZ good
1533 // The safety check failed. Put the reason string at the top
1534 // of the stack.
1535 MOVQ AX, 0(SP)
1536 MOVQ 16(SP), AX
1537 MOVQ AX, 8(SP)
1538 // Set AX to 8 and invoke INT3. The debugger should get the
1539 // reason a call can't be injected from the top of the stack
1540 // and resume execution.
1541 MOVQ $8, AX
1542 BYTE $0xcc
1543 JMP restore
1544
1545 good:
1546 // Registers are saved and it's safe to make a call.
1547 // Open up a call frame, moving the stack if necessary.
1548 //
1549 // Once the frame is allocated, this will set AX to 0 and
1550 // invoke INT3. The debugger should write the argument
1551 // frame for the call at SP, push the trapping PC on the
1552 // stack, set the PC to the function to call, set RCX to point
1553 // to the closure (if a closure call), and resume execution.
1554 //
1555 // If the function returns, this will set AX to 1 and invoke
1556 // INT3. The debugger can then inspect any return value saved
1557 // on the stack at SP and resume execution again.
1558 //
1559 // If the function panics, this will set AX to 2 and invoke INT3.
1560 // The interface{} value of the panic will be at SP. The debugger
1561 // can inspect the panic value and resume execution again.
1562 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1563 CMPQ AX, $MAXSIZE; \
1564 JA 5(PC); \
1565 MOVQ $NAME(SB), AX; \
1566 MOVQ AX, 0(SP); \
1567 CALL runtime·debugCallWrap(SB); \
1568 JMP restore
1569
1570 MOVQ frameSize-128(SP), AX
1571 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1572 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1573 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1574 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1575 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1576 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1577 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1578 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1579 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1580 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1581 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1582 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1583 // The frame size is too large. Report the error.
1584 MOVQ $debugCallFrameTooLarge<>(SB), AX
1585 MOVQ AX, 0(SP)
1586 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
1587 MOVQ $8, AX
1588 BYTE $0xcc
1589 JMP restore
1590
1591 restore:
1592 // Calls and failures resume here.
1593 //
1594 // Set AX to 16 and invoke INT3. The debugger should restore
1595 // all registers except RIP and RSP and resume execution.
1596 MOVQ $16, AX
1597 BYTE $0xcc
1598 // We must not modify flags after this point.
1599
1600 // Restore pointer-containing registers, which may have been
1601 // modified from the debugger's copy by stack copying.
1602 MOVQ ax-(0*8+8)(SP), AX
1603 MOVQ cx-(1*8+8)(SP), CX
1604 MOVQ dx-(2*8+8)(SP), DX
1605 MOVQ bx-(3*8+8)(SP), BX
1606 MOVQ bp-(4*8+8)(SP), BP
1607 MOVQ si-(5*8+8)(SP), SI
1608 MOVQ di-(6*8+8)(SP), DI
1609 MOVQ r8-(7*8+8)(SP), R8
1610 MOVQ r9-(8*8+8)(SP), R9
1611 MOVQ r10-(9*8+8)(SP), R10
1612 MOVQ r11-(10*8+8)(SP), R11
1613 MOVQ r12-(11*8+8)(SP), R12
1614 MOVQ r13-(12*8+8)(SP), R13
1615 MOVQ r14-(13*8+8)(SP), R14
1616 MOVQ r15-(14*8+8)(SP), R15
1617
1618 RET
1619
1620 // runtime.debugCallCheck assumes that functions defined with the
1621 // DEBUG_CALL_FN macro are safe points to inject calls.
1622 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
1623 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1624 NO_LOCAL_POINTERS; \
1625 MOVQ $0, AX; \
1626 BYTE $0xcc; \
1627 MOVQ $1, AX; \
1628 BYTE $0xcc; \
1629 RET
1630 DEBUG_CALL_FN(debugCall32<>, 32)
1631 DEBUG_CALL_FN(debugCall64<>, 64)
1632 DEBUG_CALL_FN(debugCall128<>, 128)
1633 DEBUG_CALL_FN(debugCall256<>, 256)
1634 DEBUG_CALL_FN(debugCall512<>, 512)
1635 DEBUG_CALL_FN(debugCall1024<>, 1024)
1636 DEBUG_CALL_FN(debugCall2048<>, 2048)
1637 DEBUG_CALL_FN(debugCall4096<>, 4096)
1638 DEBUG_CALL_FN(debugCall8192<>, 8192)
1639 DEBUG_CALL_FN(debugCall16384<>, 16384)
1640 DEBUG_CALL_FN(debugCall32768<>, 32768)
1641 DEBUG_CALL_FN(debugCall65536<>, 65536)
1642
1643 // func debugCallPanicked(val interface{})
1644 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1645 // Copy the panic value to the top of stack.
1646 MOVQ val_type+0(FP), AX
1647 MOVQ AX, 0(SP)
1648 MOVQ val_data+8(FP), AX
1649 MOVQ AX, 8(SP)
1650 MOVQ $2, AX
1651 BYTE $0xcc
1652 RET
1653
1654 // Note: these functions use a special calling convention to save generated code space.
1655 // Arguments are passed in registers, but the space for those arguments are allocated
1656 // in the caller's stack frame. These stubs write the args into that stack space and
1657 // then tail call to the corresponding runtime handler.
1658 // The tail call makes these stubs disappear in backtraces.
1659 TEXT runtime·panicIndex(SB),NOSPLIT,$0-16
1660 MOVQ AX, x+0(FP)
1661 MOVQ CX, y+8(FP)
1662 JMP runtime·goPanicIndex(SB)
1663 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16
1664 MOVQ AX, x+0(FP)
1665 MOVQ CX, y+8(FP)
1666 JMP runtime·goPanicIndexU(SB)
1667 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16
1668 MOVQ CX, x+0(FP)
1669 MOVQ DX, y+8(FP)
1670 JMP runtime·goPanicSliceAlen(SB)
1671 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16
1672 MOVQ CX, x+0(FP)
1673 MOVQ DX, y+8(FP)
1674 JMP runtime·goPanicSliceAlenU(SB)
1675 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16
1676 MOVQ CX, x+0(FP)
1677 MOVQ DX, y+8(FP)
1678 JMP runtime·goPanicSliceAcap(SB)
1679 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16
1680 MOVQ CX, x+0(FP)
1681 MOVQ DX, y+8(FP)
1682 JMP runtime·goPanicSliceAcapU(SB)
1683 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16
1684 MOVQ AX, x+0(FP)
1685 MOVQ CX, y+8(FP)
1686 JMP runtime·goPanicSliceB(SB)
1687 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16
1688 MOVQ AX, x+0(FP)
1689 MOVQ CX, y+8(FP)
1690 JMP runtime·goPanicSliceBU(SB)
1691 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16
1692 MOVQ DX, x+0(FP)
1693 MOVQ BX, y+8(FP)
1694 JMP runtime·goPanicSlice3Alen(SB)
1695 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16
1696 MOVQ DX, x+0(FP)
1697 MOVQ BX, y+8(FP)
1698 JMP runtime·goPanicSlice3AlenU(SB)
1699 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16
1700 MOVQ DX, x+0(FP)
1701 MOVQ BX, y+8(FP)
1702 JMP runtime·goPanicSlice3Acap(SB)
1703 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16
1704 MOVQ DX, x+0(FP)
1705 MOVQ BX, y+8(FP)
1706 JMP runtime·goPanicSlice3AcapU(SB)
1707 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16
1708 MOVQ CX, x+0(FP)
1709 MOVQ DX, y+8(FP)
1710 JMP runtime·goPanicSlice3B(SB)
1711 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16
1712 MOVQ CX, x+0(FP)
1713 MOVQ DX, y+8(FP)
1714 JMP runtime·goPanicSlice3BU(SB)
1715 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16
1716 MOVQ AX, x+0(FP)
1717 MOVQ CX, y+8(FP)
1718 JMP runtime·goPanicSlice3C(SB)
1719 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16
1720 MOVQ AX, x+0(FP)
1721 MOVQ CX, y+8(FP)
1722 JMP runtime·goPanicSlice3CU(SB)
1723
1724 #ifdef GOOS_android
1725 // Use the free TLS_SLOT_APP slot #2 on Android Q.
1726 // Earlier androids are set up in gcc_android.c.
1727 DATA runtime·tls_g+0(SB)/8, $16
1728 GLOBL runtime·tls_g+0(SB), NOPTR, $8
1729 #endif
View as plain text