Text file src/runtime/asm_386.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9
10 // _rt0_386 is common startup code for most 386 systems when using
11 // internal linking. This is the entry point for the program from the
12 // kernel for an ordinary -buildmode=exe program. The stack holds the
13 // number of arguments and the C-style argv.
14 TEXT _rt0_386(SB),NOSPLIT,$8
15 MOVL 8(SP), AX // argc
16 LEAL 12(SP), BX // argv
17 MOVL AX, 0(SP)
18 MOVL BX, 4(SP)
19 JMP runtime·rt0_go(SB)
20
21 // _rt0_386_lib is common startup code for most 386 systems when
22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
23 // arrange to invoke this function as a global constructor (for
24 // c-archive) or when the shared library is loaded (for c-shared).
25 // We expect argc and argv to be passed on the stack following the
26 // usual C ABI.
27 TEXT _rt0_386_lib(SB),NOSPLIT,$0
28 PUSHL BP
29 MOVL SP, BP
30 PUSHL BX
31 PUSHL SI
32 PUSHL DI
33
34 MOVL 8(BP), AX
35 MOVL AX, _rt0_386_lib_argc<>(SB)
36 MOVL 12(BP), AX
37 MOVL AX, _rt0_386_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 SUBL $8, SP
43
44 // Create a new thread to do the runtime initialization.
45 MOVL _cgo_sys_thread_create(SB), AX
46 TESTL AX, AX
47 JZ nocgo
48
49 // Align stack to call C function.
50 // We moved SP to BP above, but BP was clobbered by the libpreinit call.
51 MOVL SP, BP
52 ANDL $~15, SP
53
54 MOVL $_rt0_386_lib_go(SB), BX
55 MOVL BX, 0(SP)
56 MOVL $0, 4(SP)
57
58 CALL AX
59
60 MOVL BP, SP
61
62 JMP restore
63
64 nocgo:
65 MOVL $0x800000, 0(SP) // stacksize = 8192KB
66 MOVL $_rt0_386_lib_go(SB), AX
67 MOVL AX, 4(SP) // fn
68 CALL runtime·newosproc0(SB)
69
70 restore:
71 ADDL $8, SP
72 POPL DI
73 POPL SI
74 POPL BX
75 POPL BP
76 RET
77
78 // _rt0_386_lib_go initializes the Go runtime.
79 // This is started in a separate thread by _rt0_386_lib.
80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
81 MOVL _rt0_386_lib_argc<>(SB), AX
82 MOVL AX, 0(SP)
83 MOVL _rt0_386_lib_argv<>(SB), AX
84 MOVL AX, 4(SP)
85 JMP runtime·rt0_go(SB)
86
87 DATA _rt0_386_lib_argc<>(SB)/4, $0
88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
89 DATA _rt0_386_lib_argv<>(SB)/4, $0
90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
91
92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME,$0
93 // Copy arguments forward on an even stack.
94 // Users of this function jump to it, they don't call it.
95 MOVL 0(SP), AX
96 MOVL 4(SP), BX
97 SUBL $128, SP // plenty of scratch
98 ANDL $~15, SP
99 MOVL AX, 120(SP) // save argc, argv away
100 MOVL BX, 124(SP)
101
102 // set default stack bounds.
103 // _cgo_init may update stackguard.
104 MOVL $runtime·g0(SB), BP
105 LEAL (-64*1024+104)(SP), BX
106 MOVL BX, g_stackguard0(BP)
107 MOVL BX, g_stackguard1(BP)
108 MOVL BX, (g_stack+stack_lo)(BP)
109 MOVL SP, (g_stack+stack_hi)(BP)
110
111 // find out information about the processor we're on
112 #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
113 JMP has_cpuid
114 #else
115 // first see if CPUID instruction is supported.
116 PUSHFL
117 PUSHFL
118 XORL $(1<<21), 0(SP) // flip ID bit
119 POPFL
120 PUSHFL
121 POPL AX
122 XORL 0(SP), AX
123 POPFL // restore EFLAGS
124 TESTL $(1<<21), AX
125 JNE has_cpuid
126 #endif
127
128 bad_proc: // show that the program requires MMX.
129 MOVL $2, 0(SP)
130 MOVL $bad_proc_msg<>(SB), 4(SP)
131 MOVL $0x3d, 8(SP)
132 CALL runtime·write(SB)
133 MOVL $1, 0(SP)
134 CALL runtime·exit(SB)
135 CALL runtime·abort(SB)
136
137 has_cpuid:
138 MOVL $0, AX
139 CPUID
140 MOVL AX, SI
141 CMPL AX, $0
142 JE nocpuinfo
143
144 // Figure out how to serialize RDTSC.
145 // On Intel processors LFENCE is enough. AMD requires MFENCE.
146 // Don't know about the rest, so let's do MFENCE.
147 CMPL BX, $0x756E6547 // "Genu"
148 JNE notintel
149 CMPL DX, $0x49656E69 // "ineI"
150 JNE notintel
151 CMPL CX, $0x6C65746E // "ntel"
152 JNE notintel
153 MOVB $1, runtime·isIntel(SB)
154 MOVB $1, runtime·lfenceBeforeRdtsc(SB)
155 notintel:
156
157 // Load EAX=1 cpuid flags
158 MOVL $1, AX
159 CPUID
160 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
161 MOVL AX, runtime·processorVersionInfo(SB)
162
163 // Check for MMX support
164 TESTL $(1<<23), DX // MMX
165 JZ bad_proc
166
167 nocpuinfo:
168 // if there is an _cgo_init, call it to let it
169 // initialize and to set up GS. if not,
170 // we set up GS ourselves.
171 MOVL _cgo_init(SB), AX
172 TESTL AX, AX
173 JZ needtls
174 #ifdef GOOS_android
175 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
176 // Compensate for tls_g (+8).
177 MOVL -8(TLS), BX
178 MOVL BX, 12(SP)
179 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
180 #else
181 MOVL $0, BX
182 MOVL BX, 12(SP) // arg 3,4: not used when using platform's TLS
183 MOVL BX, 8(SP)
184 #endif
185 MOVL $setg_gcc<>(SB), BX
186 MOVL BX, 4(SP) // arg 2: setg_gcc
187 MOVL BP, 0(SP) // arg 1: g0
188 CALL AX
189
190 // update stackguard after _cgo_init
191 MOVL $runtime·g0(SB), CX
192 MOVL (g_stack+stack_lo)(CX), AX
193 ADDL $const__StackGuard, AX
194 MOVL AX, g_stackguard0(CX)
195 MOVL AX, g_stackguard1(CX)
196
197 #ifndef GOOS_windows
198 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
199 JMP ok
200 #endif
201 needtls:
202 #ifdef GOOS_plan9
203 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
204 JMP ok
205 #endif
206 #ifdef GOOS_darwin
207 // skip runtime·ldt0setup(SB) on Darwin
208 JMP ok
209 #endif
210
211 // set up %gs
212 CALL ldt0setup<>(SB)
213
214 // store through it, to make sure it works
215 get_tls(BX)
216 MOVL $0x123, g(BX)
217 MOVL runtime·m0+m_tls(SB), AX
218 CMPL AX, $0x123
219 JEQ ok
220 MOVL AX, 0 // abort
221 ok:
222 // set up m and g "registers"
223 get_tls(BX)
224 LEAL runtime·g0(SB), DX
225 MOVL DX, g(BX)
226 LEAL runtime·m0(SB), AX
227
228 // save m->g0 = g0
229 MOVL DX, m_g0(AX)
230 // save g0->m = m0
231 MOVL AX, g_m(DX)
232
233 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
234
235 // convention is D is always cleared
236 CLD
237
238 CALL runtime·check(SB)
239
240 // saved argc, argv
241 MOVL 120(SP), AX
242 MOVL AX, 0(SP)
243 MOVL 124(SP), AX
244 MOVL AX, 4(SP)
245 CALL runtime·args(SB)
246 CALL runtime·osinit(SB)
247 CALL runtime·schedinit(SB)
248
249 // create a new goroutine to start program
250 PUSHL $runtime·mainPC(SB) // entry
251 PUSHL $0 // arg size
252 CALL runtime·newproc(SB)
253 POPL AX
254 POPL AX
255
256 // start this M
257 CALL runtime·mstart(SB)
258
259 CALL runtime·abort(SB)
260 RET
261
262 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
263 GLOBL bad_proc_msg<>(SB), RODATA, $61
264
265 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
266 GLOBL runtime·mainPC(SB),RODATA,$4
267
268 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
269 INT $3
270 RET
271
272 TEXT runtime·asminit(SB),NOSPLIT,$0-0
273 // Linux and MinGW start the FPU in extended double precision.
274 // Other operating systems use double precision.
275 // Change to double precision to match them,
276 // and to match other hardware that only has double.
277 FLDCW runtime·controlWord64(SB)
278 RET
279
280 /*
281 * go-routine
282 */
283
284 // void gosave(Gobuf*)
285 // save state in Gobuf; setjmp
286 TEXT runtime·gosave(SB), NOSPLIT, $0-4
287 MOVL buf+0(FP), AX // gobuf
288 LEAL buf+0(FP), BX // caller's SP
289 MOVL BX, gobuf_sp(AX)
290 MOVL 0(SP), BX // caller's PC
291 MOVL BX, gobuf_pc(AX)
292 MOVL $0, gobuf_ret(AX)
293 // Assert ctxt is zero. See func save.
294 MOVL gobuf_ctxt(AX), BX
295 TESTL BX, BX
296 JZ 2(PC)
297 CALL runtime·badctxt(SB)
298 get_tls(CX)
299 MOVL g(CX), BX
300 MOVL BX, gobuf_g(AX)
301 RET
302
303 // void gogo(Gobuf*)
304 // restore state from Gobuf; longjmp
305 TEXT runtime·gogo(SB), NOSPLIT, $8-4
306 MOVL buf+0(FP), BX // gobuf
307 MOVL gobuf_g(BX), DX
308 MOVL 0(DX), CX // make sure g != nil
309 get_tls(CX)
310 MOVL DX, g(CX)
311 MOVL gobuf_sp(BX), SP // restore SP
312 MOVL gobuf_ret(BX), AX
313 MOVL gobuf_ctxt(BX), DX
314 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
315 MOVL $0, gobuf_ret(BX)
316 MOVL $0, gobuf_ctxt(BX)
317 MOVL gobuf_pc(BX), BX
318 JMP BX
319
320 // func mcall(fn func(*g))
321 // Switch to m->g0's stack, call fn(g).
322 // Fn must never return. It should gogo(&g->sched)
323 // to keep running g.
324 TEXT runtime·mcall(SB), NOSPLIT, $0-4
325 MOVL fn+0(FP), DI
326
327 get_tls(DX)
328 MOVL g(DX), AX // save state in g->sched
329 MOVL 0(SP), BX // caller's PC
330 MOVL BX, (g_sched+gobuf_pc)(AX)
331 LEAL fn+0(FP), BX // caller's SP
332 MOVL BX, (g_sched+gobuf_sp)(AX)
333 MOVL AX, (g_sched+gobuf_g)(AX)
334
335 // switch to m->g0 & its stack, call fn
336 MOVL g(DX), BX
337 MOVL g_m(BX), BX
338 MOVL m_g0(BX), SI
339 CMPL SI, AX // if g == m->g0 call badmcall
340 JNE 3(PC)
341 MOVL $runtime·badmcall(SB), AX
342 JMP AX
343 MOVL SI, g(DX) // g = m->g0
344 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
345 PUSHL AX
346 MOVL DI, DX
347 MOVL 0(DI), DI
348 CALL DI
349 POPL AX
350 MOVL $runtime·badmcall2(SB), AX
351 JMP AX
352 RET
353
354 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
355 // of the G stack. We need to distinguish the routine that
356 // lives at the bottom of the G stack from the one that lives
357 // at the top of the system stack because the one at the top of
358 // the system stack terminates the stack walk (see topofstack()).
359 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
360 RET
361
362 // func systemstack(fn func())
363 TEXT runtime·systemstack(SB), NOSPLIT, $0-4
364 MOVL fn+0(FP), DI // DI = fn
365 get_tls(CX)
366 MOVL g(CX), AX // AX = g
367 MOVL g_m(AX), BX // BX = m
368
369 CMPL AX, m_gsignal(BX)
370 JEQ noswitch
371
372 MOVL m_g0(BX), DX // DX = g0
373 CMPL AX, DX
374 JEQ noswitch
375
376 CMPL AX, m_curg(BX)
377 JNE bad
378
379 // switch stacks
380 // save our state in g->sched. Pretend to
381 // be systemstack_switch if the G stack is scanned.
382 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
383 MOVL SP, (g_sched+gobuf_sp)(AX)
384 MOVL AX, (g_sched+gobuf_g)(AX)
385
386 // switch to g0
387 get_tls(CX)
388 MOVL DX, g(CX)
389 MOVL (g_sched+gobuf_sp)(DX), BX
390 // make it look like mstart called systemstack on g0, to stop traceback
391 SUBL $4, BX
392 MOVL $runtime·mstart(SB), DX
393 MOVL DX, 0(BX)
394 MOVL BX, SP
395
396 // call target function
397 MOVL DI, DX
398 MOVL 0(DI), DI
399 CALL DI
400
401 // switch back to g
402 get_tls(CX)
403 MOVL g(CX), AX
404 MOVL g_m(AX), BX
405 MOVL m_curg(BX), AX
406 MOVL AX, g(CX)
407 MOVL (g_sched+gobuf_sp)(AX), SP
408 MOVL $0, (g_sched+gobuf_sp)(AX)
409 RET
410
411 noswitch:
412 // already on system stack; tail call the function
413 // Using a tail call here cleans up tracebacks since we won't stop
414 // at an intermediate systemstack.
415 MOVL DI, DX
416 MOVL 0(DI), DI
417 JMP DI
418
419 bad:
420 // Bad: g is not gsignal, not g0, not curg. What is it?
421 // Hide call from linker nosplit analysis.
422 MOVL $runtime·badsystemstack(SB), AX
423 CALL AX
424 INT $3
425
426 /*
427 * support for morestack
428 */
429
430 // Called during function prolog when more stack is needed.
431 //
432 // The traceback routines see morestack on a g0 as being
433 // the top of a stack (for example, morestack calling newstack
434 // calling the scheduler calling newm calling gc), so we must
435 // record an argument size. For that purpose, it has no arguments.
436 TEXT runtime·morestack(SB),NOSPLIT,$0-0
437 // Cannot grow scheduler stack (m->g0).
438 get_tls(CX)
439 MOVL g(CX), BX
440 MOVL g_m(BX), BX
441 MOVL m_g0(BX), SI
442 CMPL g(CX), SI
443 JNE 3(PC)
444 CALL runtime·badmorestackg0(SB)
445 CALL runtime·abort(SB)
446
447 // Cannot grow signal stack.
448 MOVL m_gsignal(BX), SI
449 CMPL g(CX), SI
450 JNE 3(PC)
451 CALL runtime·badmorestackgsignal(SB)
452 CALL runtime·abort(SB)
453
454 // Called from f.
455 // Set m->morebuf to f's caller.
456 NOP SP // tell vet SP changed - stop checking offsets
457 MOVL 4(SP), DI // f's caller's PC
458 MOVL DI, (m_morebuf+gobuf_pc)(BX)
459 LEAL 8(SP), CX // f's caller's SP
460 MOVL CX, (m_morebuf+gobuf_sp)(BX)
461 get_tls(CX)
462 MOVL g(CX), SI
463 MOVL SI, (m_morebuf+gobuf_g)(BX)
464
465 // Set g->sched to context in f.
466 MOVL 0(SP), AX // f's PC
467 MOVL AX, (g_sched+gobuf_pc)(SI)
468 MOVL SI, (g_sched+gobuf_g)(SI)
469 LEAL 4(SP), AX // f's SP
470 MOVL AX, (g_sched+gobuf_sp)(SI)
471 MOVL DX, (g_sched+gobuf_ctxt)(SI)
472
473 // Call newstack on m->g0's stack.
474 MOVL m_g0(BX), BP
475 MOVL BP, g(CX)
476 MOVL (g_sched+gobuf_sp)(BP), AX
477 MOVL -4(AX), BX // fault if CALL would, before smashing SP
478 MOVL AX, SP
479 CALL runtime·newstack(SB)
480 CALL runtime·abort(SB) // crash if newstack returns
481 RET
482
483 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
484 MOVL $0, DX
485 JMP runtime·morestack(SB)
486
487 // reflectcall: call a function with the given argument list
488 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
489 // we don't have variable-sized frames, so we use a small number
490 // of constant-sized-frame functions to encode a few bits of size in the pc.
491 // Caution: ugly multiline assembly macros in your future!
492
493 #define DISPATCH(NAME,MAXSIZE) \
494 CMPL CX, $MAXSIZE; \
495 JA 3(PC); \
496 MOVL $NAME(SB), AX; \
497 JMP AX
498 // Note: can't just "JMP NAME(SB)" - bad inlining results.
499
500 TEXT ·reflectcall(SB), NOSPLIT, $0-20
501 MOVL argsize+12(FP), CX
502 DISPATCH(runtime·call16, 16)
503 DISPATCH(runtime·call32, 32)
504 DISPATCH(runtime·call64, 64)
505 DISPATCH(runtime·call128, 128)
506 DISPATCH(runtime·call256, 256)
507 DISPATCH(runtime·call512, 512)
508 DISPATCH(runtime·call1024, 1024)
509 DISPATCH(runtime·call2048, 2048)
510 DISPATCH(runtime·call4096, 4096)
511 DISPATCH(runtime·call8192, 8192)
512 DISPATCH(runtime·call16384, 16384)
513 DISPATCH(runtime·call32768, 32768)
514 DISPATCH(runtime·call65536, 65536)
515 DISPATCH(runtime·call131072, 131072)
516 DISPATCH(runtime·call262144, 262144)
517 DISPATCH(runtime·call524288, 524288)
518 DISPATCH(runtime·call1048576, 1048576)
519 DISPATCH(runtime·call2097152, 2097152)
520 DISPATCH(runtime·call4194304, 4194304)
521 DISPATCH(runtime·call8388608, 8388608)
522 DISPATCH(runtime·call16777216, 16777216)
523 DISPATCH(runtime·call33554432, 33554432)
524 DISPATCH(runtime·call67108864, 67108864)
525 DISPATCH(runtime·call134217728, 134217728)
526 DISPATCH(runtime·call268435456, 268435456)
527 DISPATCH(runtime·call536870912, 536870912)
528 DISPATCH(runtime·call1073741824, 1073741824)
529 MOVL $runtime·badreflectcall(SB), AX
530 JMP AX
531
532 #define CALLFN(NAME,MAXSIZE) \
533 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
534 NO_LOCAL_POINTERS; \
535 /* copy arguments to stack */ \
536 MOVL argptr+8(FP), SI; \
537 MOVL argsize+12(FP), CX; \
538 MOVL SP, DI; \
539 REP;MOVSB; \
540 /* call function */ \
541 MOVL f+4(FP), DX; \
542 MOVL (DX), AX; \
543 PCDATA $PCDATA_StackMapIndex, $0; \
544 CALL AX; \
545 /* copy return values back */ \
546 MOVL argtype+0(FP), DX; \
547 MOVL argptr+8(FP), DI; \
548 MOVL argsize+12(FP), CX; \
549 MOVL retoffset+16(FP), BX; \
550 MOVL SP, SI; \
551 ADDL BX, DI; \
552 ADDL BX, SI; \
553 SUBL BX, CX; \
554 CALL callRet<>(SB); \
555 RET
556
557 // callRet copies return values back at the end of call*. This is a
558 // separate function so it can allocate stack space for the arguments
559 // to reflectcallmove. It does not follow the Go ABI; it expects its
560 // arguments in registers.
561 TEXT callRet<>(SB), NOSPLIT, $16-0
562 MOVL DX, 0(SP)
563 MOVL DI, 4(SP)
564 MOVL SI, 8(SP)
565 MOVL CX, 12(SP)
566 CALL runtime·reflectcallmove(SB)
567 RET
568
569 CALLFN(·call16, 16)
570 CALLFN(·call32, 32)
571 CALLFN(·call64, 64)
572 CALLFN(·call128, 128)
573 CALLFN(·call256, 256)
574 CALLFN(·call512, 512)
575 CALLFN(·call1024, 1024)
576 CALLFN(·call2048, 2048)
577 CALLFN(·call4096, 4096)
578 CALLFN(·call8192, 8192)
579 CALLFN(·call16384, 16384)
580 CALLFN(·call32768, 32768)
581 CALLFN(·call65536, 65536)
582 CALLFN(·call131072, 131072)
583 CALLFN(·call262144, 262144)
584 CALLFN(·call524288, 524288)
585 CALLFN(·call1048576, 1048576)
586 CALLFN(·call2097152, 2097152)
587 CALLFN(·call4194304, 4194304)
588 CALLFN(·call8388608, 8388608)
589 CALLFN(·call16777216, 16777216)
590 CALLFN(·call33554432, 33554432)
591 CALLFN(·call67108864, 67108864)
592 CALLFN(·call134217728, 134217728)
593 CALLFN(·call268435456, 268435456)
594 CALLFN(·call536870912, 536870912)
595 CALLFN(·call1073741824, 1073741824)
596
597 TEXT runtime·procyield(SB),NOSPLIT,$0-0
598 MOVL cycles+0(FP), AX
599 again:
600 PAUSE
601 SUBL $1, AX
602 JNZ again
603 RET
604
605 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
606 // Stores are already ordered on x86, so this is just a
607 // compile barrier.
608 RET
609
610 // void jmpdefer(fn, sp);
611 // called from deferreturn.
612 // 1. pop the caller
613 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
614 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes
615 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
616 // LEAL to load the offset into BX, and finally 5 for the call & displacement)
617 // 3. jmp to the argument
618 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
619 MOVL fv+0(FP), DX // fn
620 MOVL argp+4(FP), BX // caller sp
621 LEAL -4(BX), SP // caller sp after CALL
622 #ifdef GOBUILDMODE_shared
623 SUBL $16, (SP) // return to CALL again
624 #else
625 SUBL $5, (SP) // return to CALL again
626 #endif
627 MOVL 0(DX), BX
628 JMP BX // but first run the deferred function
629
630 // Save state of caller into g->sched.
631 TEXT gosave<>(SB),NOSPLIT,$0
632 PUSHL AX
633 PUSHL BX
634 get_tls(BX)
635 MOVL g(BX), BX
636 LEAL arg+0(FP), AX
637 MOVL AX, (g_sched+gobuf_sp)(BX)
638 MOVL -4(AX), AX
639 MOVL AX, (g_sched+gobuf_pc)(BX)
640 MOVL $0, (g_sched+gobuf_ret)(BX)
641 // Assert ctxt is zero. See func save.
642 MOVL (g_sched+gobuf_ctxt)(BX), AX
643 TESTL AX, AX
644 JZ 2(PC)
645 CALL runtime·badctxt(SB)
646 POPL BX
647 POPL AX
648 RET
649
650 // func asmcgocall(fn, arg unsafe.Pointer) int32
651 // Call fn(arg) on the scheduler stack,
652 // aligned appropriately for the gcc ABI.
653 // See cgocall.go for more details.
654 TEXT ·asmcgocall(SB),NOSPLIT,$0-12
655 MOVL fn+0(FP), AX
656 MOVL arg+4(FP), BX
657
658 MOVL SP, DX
659
660 // Figure out if we need to switch to m->g0 stack.
661 // We get called to create new OS threads too, and those
662 // come in on the m->g0 stack already.
663 get_tls(CX)
664 MOVL g(CX), BP
665 CMPL BP, $0
666 JEQ nosave // Don't even have a G yet.
667 MOVL g_m(BP), BP
668 MOVL m_g0(BP), SI
669 MOVL g(CX), DI
670 CMPL SI, DI
671 JEQ noswitch
672 CMPL DI, m_gsignal(BP)
673 JEQ noswitch
674 CALL gosave<>(SB)
675 get_tls(CX)
676 MOVL SI, g(CX)
677 MOVL (g_sched+gobuf_sp)(SI), SP
678
679 noswitch:
680 // Now on a scheduling stack (a pthread-created stack).
681 SUBL $32, SP
682 ANDL $~15, SP // alignment, perhaps unnecessary
683 MOVL DI, 8(SP) // save g
684 MOVL (g_stack+stack_hi)(DI), DI
685 SUBL DX, DI
686 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
687 MOVL BX, 0(SP) // first argument in x86-32 ABI
688 CALL AX
689
690 // Restore registers, g, stack pointer.
691 get_tls(CX)
692 MOVL 8(SP), DI
693 MOVL (g_stack+stack_hi)(DI), SI
694 SUBL 4(SP), SI
695 MOVL DI, g(CX)
696 MOVL SI, SP
697
698 MOVL AX, ret+8(FP)
699 RET
700 nosave:
701 // Now on a scheduling stack (a pthread-created stack).
702 SUBL $32, SP
703 ANDL $~15, SP // alignment, perhaps unnecessary
704 MOVL DX, 4(SP) // save original stack pointer
705 MOVL BX, 0(SP) // first argument in x86-32 ABI
706 CALL AX
707
708 MOVL 4(SP), CX // restore original stack pointer
709 MOVL CX, SP
710 MOVL AX, ret+8(FP)
711 RET
712
713 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
714 // Turn the fn into a Go func (by taking its address) and call
715 // cgocallback_gofunc.
716 TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
717 LEAL fn+0(FP), AX
718 MOVL AX, 0(SP)
719 MOVL frame+4(FP), AX
720 MOVL AX, 4(SP)
721 MOVL framesize+8(FP), AX
722 MOVL AX, 8(SP)
723 MOVL ctxt+12(FP), AX
724 MOVL AX, 12(SP)
725 MOVL $runtime·cgocallback_gofunc(SB), AX
726 CALL AX
727 RET
728
729 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
730 // See cgocall.go for more details.
731 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
732 NO_LOCAL_POINTERS
733
734 // If g is nil, Go did not create the current thread.
735 // Call needm to obtain one for temporary use.
736 // In this case, we're running on the thread stack, so there's
737 // lots of space, but the linker doesn't know. Hide the call from
738 // the linker analysis by using an indirect call through AX.
739 get_tls(CX)
740 #ifdef GOOS_windows
741 MOVL $0, BP
742 CMPL CX, $0
743 JEQ 2(PC) // TODO
744 #endif
745 MOVL g(CX), BP
746 CMPL BP, $0
747 JEQ needm
748 MOVL g_m(BP), BP
749 MOVL BP, DX // saved copy of oldm
750 JMP havem
751 needm:
752 MOVL $0, 0(SP)
753 MOVL $runtime·needm(SB), AX
754 CALL AX
755 MOVL 0(SP), DX
756 get_tls(CX)
757 MOVL g(CX), BP
758 MOVL g_m(BP), BP
759
760 // Set m->sched.sp = SP, so that if a panic happens
761 // during the function we are about to execute, it will
762 // have a valid SP to run on the g0 stack.
763 // The next few lines (after the havem label)
764 // will save this SP onto the stack and then write
765 // the same SP back to m->sched.sp. That seems redundant,
766 // but if an unrecovered panic happens, unwindm will
767 // restore the g->sched.sp from the stack location
768 // and then systemstack will try to use it. If we don't set it here,
769 // that restored SP will be uninitialized (typically 0) and
770 // will not be usable.
771 MOVL m_g0(BP), SI
772 MOVL SP, (g_sched+gobuf_sp)(SI)
773
774 havem:
775 // Now there's a valid m, and we're running on its m->g0.
776 // Save current m->g0->sched.sp on stack and then set it to SP.
777 // Save current sp in m->g0->sched.sp in preparation for
778 // switch back to m->curg stack.
779 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
780 MOVL m_g0(BP), SI
781 MOVL (g_sched+gobuf_sp)(SI), AX
782 MOVL AX, 0(SP)
783 MOVL SP, (g_sched+gobuf_sp)(SI)
784
785 // Switch to m->curg stack and call runtime.cgocallbackg.
786 // Because we are taking over the execution of m->curg
787 // but *not* resuming what had been running, we need to
788 // save that information (m->curg->sched) so we can restore it.
789 // We can restore m->curg->sched.sp easily, because calling
790 // runtime.cgocallbackg leaves SP unchanged upon return.
791 // To save m->curg->sched.pc, we push it onto the stack.
792 // This has the added benefit that it looks to the traceback
793 // routine like cgocallbackg is going to return to that
794 // PC (because the frame we allocate below has the same
795 // size as cgocallback_gofunc's frame declared above)
796 // so that the traceback will seamlessly trace back into
797 // the earlier calls.
798 //
799 // In the new goroutine, 4(SP) holds the saved oldm (DX) register.
800 // 8(SP) is unused.
801 MOVL m_curg(BP), SI
802 MOVL SI, g(CX)
803 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
804 MOVL (g_sched+gobuf_pc)(SI), BP
805 MOVL BP, -4(DI)
806 MOVL ctxt+12(FP), CX
807 LEAL -(4+12)(DI), SP
808 MOVL DX, 4(SP)
809 MOVL CX, 0(SP)
810 CALL runtime·cgocallbackg(SB)
811 MOVL 4(SP), DX
812
813 // Restore g->sched (== m->curg->sched) from saved values.
814 get_tls(CX)
815 MOVL g(CX), SI
816 MOVL 12(SP), BP
817 MOVL BP, (g_sched+gobuf_pc)(SI)
818 LEAL (12+4)(SP), DI
819 MOVL DI, (g_sched+gobuf_sp)(SI)
820
821 // Switch back to m->g0's stack and restore m->g0->sched.sp.
822 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
823 // so we do not have to restore it.)
824 MOVL g(CX), BP
825 MOVL g_m(BP), BP
826 MOVL m_g0(BP), SI
827 MOVL SI, g(CX)
828 MOVL (g_sched+gobuf_sp)(SI), SP
829 MOVL 0(SP), AX
830 MOVL AX, (g_sched+gobuf_sp)(SI)
831
832 // If the m on entry was nil, we called needm above to borrow an m
833 // for the duration of the call. Since the call is over, return it with dropm.
834 CMPL DX, $0
835 JNE 3(PC)
836 MOVL $runtime·dropm(SB), AX
837 CALL AX
838
839 // Done!
840 RET
841
842 // void setg(G*); set g. for use by needm.
843 TEXT runtime·setg(SB), NOSPLIT, $0-4
844 MOVL gg+0(FP), BX
845 #ifdef GOOS_windows
846 CMPL BX, $0
847 JNE settls
848 MOVL $0, 0x14(FS)
849 RET
850 settls:
851 MOVL g_m(BX), AX
852 LEAL m_tls(AX), AX
853 MOVL AX, 0x14(FS)
854 #endif
855 get_tls(CX)
856 MOVL BX, g(CX)
857 RET
858
859 // void setg_gcc(G*); set g. for use by gcc
860 TEXT setg_gcc<>(SB), NOSPLIT, $0
861 get_tls(AX)
862 MOVL gg+0(FP), DX
863 MOVL DX, g(AX)
864 RET
865
866 TEXT runtime·abort(SB),NOSPLIT,$0-0
867 INT $3
868 loop:
869 JMP loop
870
871 // check that SP is in range [g->stack.lo, g->stack.hi)
872 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
873 get_tls(CX)
874 MOVL g(CX), AX
875 CMPL (g_stack+stack_hi)(AX), SP
876 JHI 2(PC)
877 CALL runtime·abort(SB)
878 CMPL SP, (g_stack+stack_lo)(AX)
879 JHI 2(PC)
880 CALL runtime·abort(SB)
881 RET
882
883 // func cputicks() int64
884 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
885 CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
886 JNE done
887 CMPB runtime·lfenceBeforeRdtsc(SB), $1
888 JNE mfence
889 LFENCE
890 JMP done
891 mfence:
892 MFENCE
893 done:
894 RDTSC
895 MOVL AX, ret_lo+0(FP)
896 MOVL DX, ret_hi+4(FP)
897 RET
898
899 TEXT ldt0setup<>(SB),NOSPLIT,$16-0
900 // set up ldt 7 to point at m0.tls
901 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
902 // the entry number is just a hint. setldt will set up GS with what it used.
903 MOVL $7, 0(SP)
904 LEAL runtime·m0+m_tls(SB), AX
905 MOVL AX, 4(SP)
906 MOVL $32, 8(SP) // sizeof(tls array)
907 CALL runtime·setldt(SB)
908 RET
909
910 TEXT runtime·emptyfunc(SB),0,$0-0
911 RET
912
913 // hash function using AES hardware instructions
914 TEXT runtime·aeshash(SB),NOSPLIT,$0-16
915 MOVL p+0(FP), AX // ptr to data
916 MOVL s+8(FP), BX // size
917 LEAL ret+12(FP), DX
918 JMP aeshashbody<>(SB)
919
920 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
921 MOVL p+0(FP), AX // ptr to string object
922 MOVL 4(AX), BX // length of string
923 MOVL (AX), AX // string data
924 LEAL ret+8(FP), DX
925 JMP aeshashbody<>(SB)
926
927 // AX: data
928 // BX: length
929 // DX: address to put return value
930 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
931 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
932 PINSRW $4, BX, X0 // 16 bits of length
933 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
934 MOVO X0, X1 // save unscrambled seed
935 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
936 AESENC X0, X0 // scramble seed
937
938 CMPL BX, $16
939 JB aes0to15
940 JE aes16
941 CMPL BX, $32
942 JBE aes17to32
943 CMPL BX, $64
944 JBE aes33to64
945 JMP aes65plus
946
947 aes0to15:
948 TESTL BX, BX
949 JE aes0
950
951 ADDL $16, AX
952 TESTW $0xff0, AX
953 JE endofpage
954
955 // 16 bytes loaded at this address won't cross
956 // a page boundary, so we can load it directly.
957 MOVOU -16(AX), X1
958 ADDL BX, BX
959 PAND masks<>(SB)(BX*8), X1
960
961 final1:
962 AESENC X0, X1 // scramble input, xor in seed
963 AESENC X1, X1 // scramble combo 2 times
964 AESENC X1, X1
965 MOVL X1, (DX)
966 RET
967
968 endofpage:
969 // address ends in 1111xxxx. Might be up against
970 // a page boundary, so load ending at last byte.
971 // Then shift bytes down using pshufb.
972 MOVOU -32(AX)(BX*1), X1
973 ADDL BX, BX
974 PSHUFB shifts<>(SB)(BX*8), X1
975 JMP final1
976
977 aes0:
978 // Return scrambled input seed
979 AESENC X0, X0
980 MOVL X0, (DX)
981 RET
982
983 aes16:
984 MOVOU (AX), X1
985 JMP final1
986
987 aes17to32:
988 // make second starting seed
989 PXOR runtime·aeskeysched+16(SB), X1
990 AESENC X1, X1
991
992 // load data to be hashed
993 MOVOU (AX), X2
994 MOVOU -16(AX)(BX*1), X3
995
996 // scramble 3 times
997 AESENC X0, X2
998 AESENC X1, X3
999 AESENC X2, X2
1000 AESENC X3, X3
1001 AESENC X2, X2
1002 AESENC X3, X3
1003
1004 // combine results
1005 PXOR X3, X2
1006 MOVL X2, (DX)
1007 RET
1008
1009 aes33to64:
1010 // make 3 more starting seeds
1011 MOVO X1, X2
1012 MOVO X1, X3
1013 PXOR runtime·aeskeysched+16(SB), X1
1014 PXOR runtime·aeskeysched+32(SB), X2
1015 PXOR runtime·aeskeysched+48(SB), X3
1016 AESENC X1, X1
1017 AESENC X2, X2
1018 AESENC X3, X3
1019
1020 MOVOU (AX), X4
1021 MOVOU 16(AX), X5
1022 MOVOU -32(AX)(BX*1), X6
1023 MOVOU -16(AX)(BX*1), X7
1024
1025 AESENC X0, X4
1026 AESENC X1, X5
1027 AESENC X2, X6
1028 AESENC X3, X7
1029
1030 AESENC X4, X4
1031 AESENC X5, X5
1032 AESENC X6, X6
1033 AESENC X7, X7
1034
1035 AESENC X4, X4
1036 AESENC X5, X5
1037 AESENC X6, X6
1038 AESENC X7, X7
1039
1040 PXOR X6, X4
1041 PXOR X7, X5
1042 PXOR X5, X4
1043 MOVL X4, (DX)
1044 RET
1045
1046 aes65plus:
1047 // make 3 more starting seeds
1048 MOVO X1, X2
1049 MOVO X1, X3
1050 PXOR runtime·aeskeysched+16(SB), X1
1051 PXOR runtime·aeskeysched+32(SB), X2
1052 PXOR runtime·aeskeysched+48(SB), X3
1053 AESENC X1, X1
1054 AESENC X2, X2
1055 AESENC X3, X3
1056
1057 // start with last (possibly overlapping) block
1058 MOVOU -64(AX)(BX*1), X4
1059 MOVOU -48(AX)(BX*1), X5
1060 MOVOU -32(AX)(BX*1), X6
1061 MOVOU -16(AX)(BX*1), X7
1062
1063 // scramble state once
1064 AESENC X0, X4
1065 AESENC X1, X5
1066 AESENC X2, X6
1067 AESENC X3, X7
1068
1069 // compute number of remaining 64-byte blocks
1070 DECL BX
1071 SHRL $6, BX
1072
1073 aesloop:
1074 // scramble state, xor in a block
1075 MOVOU (AX), X0
1076 MOVOU 16(AX), X1
1077 MOVOU 32(AX), X2
1078 MOVOU 48(AX), X3
1079 AESENC X0, X4
1080 AESENC X1, X5
1081 AESENC X2, X6
1082 AESENC X3, X7
1083
1084 // scramble state
1085 AESENC X4, X4
1086 AESENC X5, X5
1087 AESENC X6, X6
1088 AESENC X7, X7
1089
1090 ADDL $64, AX
1091 DECL BX
1092 JNE aesloop
1093
1094 // 2 more scrambles to finish
1095 AESENC X4, X4
1096 AESENC X5, X5
1097 AESENC X6, X6
1098 AESENC X7, X7
1099
1100 AESENC X4, X4
1101 AESENC X5, X5
1102 AESENC X6, X6
1103 AESENC X7, X7
1104
1105 PXOR X6, X4
1106 PXOR X7, X5
1107 PXOR X5, X4
1108 MOVL X4, (DX)
1109 RET
1110
1111 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
1112 MOVL p+0(FP), AX // ptr to data
1113 MOVL h+4(FP), X0 // seed
1114 PINSRD $1, (AX), X0 // data
1115 AESENC runtime·aeskeysched+0(SB), X0
1116 AESENC runtime·aeskeysched+16(SB), X0
1117 AESENC runtime·aeskeysched+32(SB), X0
1118 MOVL X0, ret+8(FP)
1119 RET
1120
1121 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
1122 MOVL p+0(FP), AX // ptr to data
1123 MOVQ (AX), X0 // data
1124 PINSRD $2, h+4(FP), X0 // seed
1125 AESENC runtime·aeskeysched+0(SB), X0
1126 AESENC runtime·aeskeysched+16(SB), X0
1127 AESENC runtime·aeskeysched+32(SB), X0
1128 MOVL X0, ret+8(FP)
1129 RET
1130
1131 // simple mask to get rid of data in the high part of the register.
1132 DATA masks<>+0x00(SB)/4, $0x00000000
1133 DATA masks<>+0x04(SB)/4, $0x00000000
1134 DATA masks<>+0x08(SB)/4, $0x00000000
1135 DATA masks<>+0x0c(SB)/4, $0x00000000
1136
1137 DATA masks<>+0x10(SB)/4, $0x000000ff
1138 DATA masks<>+0x14(SB)/4, $0x00000000
1139 DATA masks<>+0x18(SB)/4, $0x00000000
1140 DATA masks<>+0x1c(SB)/4, $0x00000000
1141
1142 DATA masks<>+0x20(SB)/4, $0x0000ffff
1143 DATA masks<>+0x24(SB)/4, $0x00000000
1144 DATA masks<>+0x28(SB)/4, $0x00000000
1145 DATA masks<>+0x2c(SB)/4, $0x00000000
1146
1147 DATA masks<>+0x30(SB)/4, $0x00ffffff
1148 DATA masks<>+0x34(SB)/4, $0x00000000
1149 DATA masks<>+0x38(SB)/4, $0x00000000
1150 DATA masks<>+0x3c(SB)/4, $0x00000000
1151
1152 DATA masks<>+0x40(SB)/4, $0xffffffff
1153 DATA masks<>+0x44(SB)/4, $0x00000000
1154 DATA masks<>+0x48(SB)/4, $0x00000000
1155 DATA masks<>+0x4c(SB)/4, $0x00000000
1156
1157 DATA masks<>+0x50(SB)/4, $0xffffffff
1158 DATA masks<>+0x54(SB)/4, $0x000000ff
1159 DATA masks<>+0x58(SB)/4, $0x00000000
1160 DATA masks<>+0x5c(SB)/4, $0x00000000
1161
1162 DATA masks<>+0x60(SB)/4, $0xffffffff
1163 DATA masks<>+0x64(SB)/4, $0x0000ffff
1164 DATA masks<>+0x68(SB)/4, $0x00000000
1165 DATA masks<>+0x6c(SB)/4, $0x00000000
1166
1167 DATA masks<>+0x70(SB)/4, $0xffffffff
1168 DATA masks<>+0x74(SB)/4, $0x00ffffff
1169 DATA masks<>+0x78(SB)/4, $0x00000000
1170 DATA masks<>+0x7c(SB)/4, $0x00000000
1171
1172 DATA masks<>+0x80(SB)/4, $0xffffffff
1173 DATA masks<>+0x84(SB)/4, $0xffffffff
1174 DATA masks<>+0x88(SB)/4, $0x00000000
1175 DATA masks<>+0x8c(SB)/4, $0x00000000
1176
1177 DATA masks<>+0x90(SB)/4, $0xffffffff
1178 DATA masks<>+0x94(SB)/4, $0xffffffff
1179 DATA masks<>+0x98(SB)/4, $0x000000ff
1180 DATA masks<>+0x9c(SB)/4, $0x00000000
1181
1182 DATA masks<>+0xa0(SB)/4, $0xffffffff
1183 DATA masks<>+0xa4(SB)/4, $0xffffffff
1184 DATA masks<>+0xa8(SB)/4, $0x0000ffff
1185 DATA masks<>+0xac(SB)/4, $0x00000000
1186
1187 DATA masks<>+0xb0(SB)/4, $0xffffffff
1188 DATA masks<>+0xb4(SB)/4, $0xffffffff
1189 DATA masks<>+0xb8(SB)/4, $0x00ffffff
1190 DATA masks<>+0xbc(SB)/4, $0x00000000
1191
1192 DATA masks<>+0xc0(SB)/4, $0xffffffff
1193 DATA masks<>+0xc4(SB)/4, $0xffffffff
1194 DATA masks<>+0xc8(SB)/4, $0xffffffff
1195 DATA masks<>+0xcc(SB)/4, $0x00000000
1196
1197 DATA masks<>+0xd0(SB)/4, $0xffffffff
1198 DATA masks<>+0xd4(SB)/4, $0xffffffff
1199 DATA masks<>+0xd8(SB)/4, $0xffffffff
1200 DATA masks<>+0xdc(SB)/4, $0x000000ff
1201
1202 DATA masks<>+0xe0(SB)/4, $0xffffffff
1203 DATA masks<>+0xe4(SB)/4, $0xffffffff
1204 DATA masks<>+0xe8(SB)/4, $0xffffffff
1205 DATA masks<>+0xec(SB)/4, $0x0000ffff
1206
1207 DATA masks<>+0xf0(SB)/4, $0xffffffff
1208 DATA masks<>+0xf4(SB)/4, $0xffffffff
1209 DATA masks<>+0xf8(SB)/4, $0xffffffff
1210 DATA masks<>+0xfc(SB)/4, $0x00ffffff
1211
1212 GLOBL masks<>(SB),RODATA,$256
1213
1214 // these are arguments to pshufb. They move data down from
1215 // the high bytes of the register to the low bytes of the register.
1216 // index is how many bytes to move.
1217 DATA shifts<>+0x00(SB)/4, $0x00000000
1218 DATA shifts<>+0x04(SB)/4, $0x00000000
1219 DATA shifts<>+0x08(SB)/4, $0x00000000
1220 DATA shifts<>+0x0c(SB)/4, $0x00000000
1221
1222 DATA shifts<>+0x10(SB)/4, $0xffffff0f
1223 DATA shifts<>+0x14(SB)/4, $0xffffffff
1224 DATA shifts<>+0x18(SB)/4, $0xffffffff
1225 DATA shifts<>+0x1c(SB)/4, $0xffffffff
1226
1227 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1228 DATA shifts<>+0x24(SB)/4, $0xffffffff
1229 DATA shifts<>+0x28(SB)/4, $0xffffffff
1230 DATA shifts<>+0x2c(SB)/4, $0xffffffff
1231
1232 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1233 DATA shifts<>+0x34(SB)/4, $0xffffffff
1234 DATA shifts<>+0x38(SB)/4, $0xffffffff
1235 DATA shifts<>+0x3c(SB)/4, $0xffffffff
1236
1237 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1238 DATA shifts<>+0x44(SB)/4, $0xffffffff
1239 DATA shifts<>+0x48(SB)/4, $0xffffffff
1240 DATA shifts<>+0x4c(SB)/4, $0xffffffff
1241
1242 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1243 DATA shifts<>+0x54(SB)/4, $0xffffff0f
1244 DATA shifts<>+0x58(SB)/4, $0xffffffff
1245 DATA shifts<>+0x5c(SB)/4, $0xffffffff
1246
1247 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1248 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1249 DATA shifts<>+0x68(SB)/4, $0xffffffff
1250 DATA shifts<>+0x6c(SB)/4, $0xffffffff
1251
1252 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1253 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1254 DATA shifts<>+0x78(SB)/4, $0xffffffff
1255 DATA shifts<>+0x7c(SB)/4, $0xffffffff
1256
1257 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1258 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1259 DATA shifts<>+0x88(SB)/4, $0xffffffff
1260 DATA shifts<>+0x8c(SB)/4, $0xffffffff
1261
1262 DATA shifts<>+0x90(SB)/4, $0x0a090807
1263 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1264 DATA shifts<>+0x98(SB)/4, $0xffffff0f
1265 DATA shifts<>+0x9c(SB)/4, $0xffffffff
1266
1267 DATA shifts<>+0xa0(SB)/4, $0x09080706
1268 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1269 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1270 DATA shifts<>+0xac(SB)/4, $0xffffffff
1271
1272 DATA shifts<>+0xb0(SB)/4, $0x08070605
1273 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1274 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1275 DATA shifts<>+0xbc(SB)/4, $0xffffffff
1276
1277 DATA shifts<>+0xc0(SB)/4, $0x07060504
1278 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1279 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1280 DATA shifts<>+0xcc(SB)/4, $0xffffffff
1281
1282 DATA shifts<>+0xd0(SB)/4, $0x06050403
1283 DATA shifts<>+0xd4(SB)/4, $0x0a090807
1284 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1285 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1286
1287 DATA shifts<>+0xe0(SB)/4, $0x05040302
1288 DATA shifts<>+0xe4(SB)/4, $0x09080706
1289 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1290 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1291
1292 DATA shifts<>+0xf0(SB)/4, $0x04030201
1293 DATA shifts<>+0xf4(SB)/4, $0x08070605
1294 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1295 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1296
1297 GLOBL shifts<>(SB),RODATA,$256
1298
1299 TEXT ·checkASM(SB),NOSPLIT,$0-1
1300 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1301 MOVL $masks<>(SB), AX
1302 MOVL $shifts<>(SB), BX
1303 ORL BX, AX
1304 TESTL $15, AX
1305 SETEQ ret+0(FP)
1306 RET
1307
1308 TEXT runtime·return0(SB), NOSPLIT, $0
1309 MOVL $0, AX
1310 RET
1311
1312 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1313 // Must obey the gcc calling convention.
1314 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1315 get_tls(CX)
1316 MOVL g(CX), AX
1317 MOVL g_m(AX), AX
1318 MOVL m_curg(AX), AX
1319 MOVL (g_stack+stack_hi)(AX), AX
1320 RET
1321
1322 // The top-most function running on a goroutine
1323 // returns to goexit+PCQuantum.
1324 TEXT runtime·goexit(SB),NOSPLIT,$0-0
1325 BYTE $0x90 // NOP
1326 CALL runtime·goexit1(SB) // does not return
1327 // traceback from goexit1 must hit code range of goexit
1328 BYTE $0x90 // NOP
1329
1330 // Add a module's moduledata to the linked list of moduledata objects. This
1331 // is called from .init_array by a function generated in the linker and so
1332 // follows the platform ABI wrt register preservation -- it only touches AX,
1333 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1334 // instead the pointer to the moduledata is passed in AX.
1335 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1336 MOVL runtime·lastmoduledatap(SB), DX
1337 MOVL AX, moduledata_next(DX)
1338 MOVL AX, runtime·lastmoduledatap(SB)
1339 RET
1340
1341 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
1342 MOVL a+0(FP), AX
1343 MOVL AX, 0(SP)
1344 MOVL $0, 4(SP)
1345 FMOVV 0(SP), F0
1346 FMOVDP F0, ret+4(FP)
1347 RET
1348
1349 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1350 FMOVD a+0(FP), F0
1351 FSTCW 0(SP)
1352 FLDCW runtime·controlWord64trunc(SB)
1353 FMOVVP F0, 4(SP)
1354 FLDCW 0(SP)
1355 MOVL 4(SP), AX
1356 MOVL AX, ret+8(FP)
1357 RET
1358
1359 // gcWriteBarrier performs a heap pointer write and informs the GC.
1360 //
1361 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
1362 // - DI is the destination of the write
1363 // - AX is the value being written at DI
1364 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1365 // but may clobber others (e.g., SSE registers).
1366 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28
1367 // Save the registers clobbered by the fast path. This is slightly
1368 // faster than having the caller spill these.
1369 MOVL CX, 20(SP)
1370 MOVL BX, 24(SP)
1371 // TODO: Consider passing g.m.p in as an argument so they can be shared
1372 // across a sequence of write barriers.
1373 get_tls(BX)
1374 MOVL g(BX), BX
1375 MOVL g_m(BX), BX
1376 MOVL m_p(BX), BX
1377 MOVL (p_wbBuf+wbBuf_next)(BX), CX
1378 // Increment wbBuf.next position.
1379 LEAL 8(CX), CX
1380 MOVL CX, (p_wbBuf+wbBuf_next)(BX)
1381 CMPL CX, (p_wbBuf+wbBuf_end)(BX)
1382 // Record the write.
1383 MOVL AX, -8(CX) // Record value
1384 MOVL (DI), BX // TODO: This turns bad writes into bad reads.
1385 MOVL BX, -4(CX) // Record *slot
1386 // Is the buffer full? (flags set in CMPL above)
1387 JEQ flush
1388 ret:
1389 MOVL 20(SP), CX
1390 MOVL 24(SP), BX
1391 // Do the write.
1392 MOVL AX, (DI)
1393 RET
1394
1395 flush:
1396 // Save all general purpose registers since these could be
1397 // clobbered by wbBufFlush and were not saved by the caller.
1398 MOVL DI, 0(SP) // Also first argument to wbBufFlush
1399 MOVL AX, 4(SP) // Also second argument to wbBufFlush
1400 // BX already saved
1401 // CX already saved
1402 MOVL DX, 8(SP)
1403 MOVL BP, 12(SP)
1404 MOVL SI, 16(SP)
1405 // DI already saved
1406
1407 // This takes arguments DI and AX
1408 CALL runtime·wbBufFlush(SB)
1409
1410 MOVL 0(SP), DI
1411 MOVL 4(SP), AX
1412 MOVL 8(SP), DX
1413 MOVL 12(SP), BP
1414 MOVL 16(SP), SI
1415 JMP ret
1416
1417 // Note: these functions use a special calling convention to save generated code space.
1418 // Arguments are passed in registers, but the space for those arguments are allocated
1419 // in the caller's stack frame. These stubs write the args into that stack space and
1420 // then tail call to the corresponding runtime handler.
1421 // The tail call makes these stubs disappear in backtraces.
1422 TEXT runtime·panicIndex(SB),NOSPLIT,$0-8
1423 MOVL AX, x+0(FP)
1424 MOVL CX, y+4(FP)
1425 JMP runtime·goPanicIndex(SB)
1426 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8
1427 MOVL AX, x+0(FP)
1428 MOVL CX, y+4(FP)
1429 JMP runtime·goPanicIndexU(SB)
1430 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8
1431 MOVL CX, x+0(FP)
1432 MOVL DX, y+4(FP)
1433 JMP runtime·goPanicSliceAlen(SB)
1434 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8
1435 MOVL CX, x+0(FP)
1436 MOVL DX, y+4(FP)
1437 JMP runtime·goPanicSliceAlenU(SB)
1438 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8
1439 MOVL CX, x+0(FP)
1440 MOVL DX, y+4(FP)
1441 JMP runtime·goPanicSliceAcap(SB)
1442 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8
1443 MOVL CX, x+0(FP)
1444 MOVL DX, y+4(FP)
1445 JMP runtime·goPanicSliceAcapU(SB)
1446 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8
1447 MOVL AX, x+0(FP)
1448 MOVL CX, y+4(FP)
1449 JMP runtime·goPanicSliceB(SB)
1450 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8
1451 MOVL AX, x+0(FP)
1452 MOVL CX, y+4(FP)
1453 JMP runtime·goPanicSliceBU(SB)
1454 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8
1455 MOVL DX, x+0(FP)
1456 MOVL BX, y+4(FP)
1457 JMP runtime·goPanicSlice3Alen(SB)
1458 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8
1459 MOVL DX, x+0(FP)
1460 MOVL BX, y+4(FP)
1461 JMP runtime·goPanicSlice3AlenU(SB)
1462 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8
1463 MOVL DX, x+0(FP)
1464 MOVL BX, y+4(FP)
1465 JMP runtime·goPanicSlice3Acap(SB)
1466 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8
1467 MOVL DX, x+0(FP)
1468 MOVL BX, y+4(FP)
1469 JMP runtime·goPanicSlice3AcapU(SB)
1470 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8
1471 MOVL CX, x+0(FP)
1472 MOVL DX, y+4(FP)
1473 JMP runtime·goPanicSlice3B(SB)
1474 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8
1475 MOVL CX, x+0(FP)
1476 MOVL DX, y+4(FP)
1477 JMP runtime·goPanicSlice3BU(SB)
1478 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8
1479 MOVL AX, x+0(FP)
1480 MOVL CX, y+4(FP)
1481 JMP runtime·goPanicSlice3C(SB)
1482 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8
1483 MOVL AX, x+0(FP)
1484 MOVL CX, y+4(FP)
1485 JMP runtime·goPanicSlice3CU(SB)
1486
1487 // Extended versions for 64-bit indexes.
1488 TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12
1489 MOVL SI, hi+0(FP)
1490 MOVL AX, lo+4(FP)
1491 MOVL CX, y+8(FP)
1492 JMP runtime·goPanicExtendIndex(SB)
1493 TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12
1494 MOVL SI, hi+0(FP)
1495 MOVL AX, lo+4(FP)
1496 MOVL CX, y+8(FP)
1497 JMP runtime·goPanicExtendIndexU(SB)
1498 TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12
1499 MOVL SI, hi+0(FP)
1500 MOVL CX, lo+4(FP)
1501 MOVL DX, y+8(FP)
1502 JMP runtime·goPanicExtendSliceAlen(SB)
1503 TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12
1504 MOVL SI, hi+0(FP)
1505 MOVL CX, lo+4(FP)
1506 MOVL DX, y+8(FP)
1507 JMP runtime·goPanicExtendSliceAlenU(SB)
1508 TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12
1509 MOVL SI, hi+0(FP)
1510 MOVL CX, lo+4(FP)
1511 MOVL DX, y+8(FP)
1512 JMP runtime·goPanicExtendSliceAcap(SB)
1513 TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12
1514 MOVL SI, hi+0(FP)
1515 MOVL CX, lo+4(FP)
1516 MOVL DX, y+8(FP)
1517 JMP runtime·goPanicExtendSliceAcapU(SB)
1518 TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12
1519 MOVL SI, hi+0(FP)
1520 MOVL AX, lo+4(FP)
1521 MOVL CX, y+8(FP)
1522 JMP runtime·goPanicExtendSliceB(SB)
1523 TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12
1524 MOVL SI, hi+0(FP)
1525 MOVL AX, lo+4(FP)
1526 MOVL CX, y+8(FP)
1527 JMP runtime·goPanicExtendSliceBU(SB)
1528 TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12
1529 MOVL SI, hi+0(FP)
1530 MOVL DX, lo+4(FP)
1531 MOVL BX, y+8(FP)
1532 JMP runtime·goPanicExtendSlice3Alen(SB)
1533 TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12
1534 MOVL SI, hi+0(FP)
1535 MOVL DX, lo+4(FP)
1536 MOVL BX, y+8(FP)
1537 JMP runtime·goPanicExtendSlice3AlenU(SB)
1538 TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12
1539 MOVL SI, hi+0(FP)
1540 MOVL DX, lo+4(FP)
1541 MOVL BX, y+8(FP)
1542 JMP runtime·goPanicExtendSlice3Acap(SB)
1543 TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12
1544 MOVL SI, hi+0(FP)
1545 MOVL DX, lo+4(FP)
1546 MOVL BX, y+8(FP)
1547 JMP runtime·goPanicExtendSlice3AcapU(SB)
1548 TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12
1549 MOVL SI, hi+0(FP)
1550 MOVL CX, lo+4(FP)
1551 MOVL DX, y+8(FP)
1552 JMP runtime·goPanicExtendSlice3B(SB)
1553 TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12
1554 MOVL SI, hi+0(FP)
1555 MOVL CX, lo+4(FP)
1556 MOVL DX, y+8(FP)
1557 JMP runtime·goPanicExtendSlice3BU(SB)
1558 TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12
1559 MOVL SI, hi+0(FP)
1560 MOVL AX, lo+4(FP)
1561 MOVL CX, y+8(FP)
1562 JMP runtime·goPanicExtendSlice3C(SB)
1563 TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12
1564 MOVL SI, hi+0(FP)
1565 MOVL AX, lo+4(FP)
1566 MOVL CX, y+8(FP)
1567 JMP runtime·goPanicExtendSlice3CU(SB)
1568
1569 #ifdef GOOS_android
1570 // Use the free TLS_SLOT_APP slot #2 on Android Q.
1571 // Earlier androids are set up in gcc_android.c.
1572 DATA runtime·tls_g+0(SB)/4, $8
1573 GLOBL runtime·tls_g+0(SB), NOPTR, $4
1574 #endif
View as plain text