Source file src/pkg/cmd/compile/internal/ssa/regalloc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/types"
118 "cmd/internal/objabi"
119 "cmd/internal/src"
120 "cmd/internal/sys"
121 "fmt"
122 "math/bits"
123 "unsafe"
124 )
125
126 const (
127 moveSpills = iota
128 logSpills
129 regDebug
130 stackDebug
131 )
132
133
134
135 const (
136 likelyDistance = 1
137 normalDistance = 10
138 unlikelyDistance = 100
139 )
140
141
142
143 func regalloc(f *Func) {
144 var s regAllocState
145 s.init(f)
146 s.regalloc(f)
147 }
148
149 type register uint8
150
151 const noRegister register = 255
152
153
154
155 type regMask uint64
156
157 func (m regMask) String() string {
158 s := ""
159 for r := register(0); m != 0; r++ {
160 if m>>r&1 == 0 {
161 continue
162 }
163 m &^= regMask(1) << r
164 if s != "" {
165 s += " "
166 }
167 s += fmt.Sprintf("r%d", r)
168 }
169 return s
170 }
171
172 func (s *regAllocState) RegMaskString(m regMask) string {
173 str := ""
174 for r := register(0); m != 0; r++ {
175 if m>>r&1 == 0 {
176 continue
177 }
178 m &^= regMask(1) << r
179 if str != "" {
180 str += " "
181 }
182 str += s.registers[r].String()
183 }
184 return str
185 }
186
187
188 func countRegs(r regMask) int {
189 return bits.OnesCount64(uint64(r))
190 }
191
192
193 func pickReg(r regMask) register {
194 if r == 0 {
195 panic("can't pick a register from an empty set")
196 }
197
198 return register(bits.TrailingZeros64(uint64(r)))
199 }
200
201 type use struct {
202 dist int32
203 pos src.XPos
204 next *use
205 }
206
207
208 type valState struct {
209 regs regMask
210 uses *use
211 spill *Value
212 restoreMin int32
213 restoreMax int32
214 needReg bool
215 rematerializeable bool
216 }
217
218 type regState struct {
219 v *Value
220 c *Value
221
222 }
223
224 type regAllocState struct {
225 f *Func
226
227 sdom SparseTree
228 registers []Register
229 numRegs register
230 SPReg register
231 SBReg register
232 GReg register
233 allocatable regMask
234
235
236
237
238
239 primary []int32
240
241
242
243
244 live [][]liveInfo
245
246
247
248
249 desired []desiredState
250
251
252 values []valState
253
254
255 sp, sb ID
256
257
258
259 orig []*Value
260
261
262 regs []regState
263
264
265 nospill regMask
266
267
268 used regMask
269
270
271 tmpused regMask
272
273
274 curBlock *Block
275
276
277 freeUseRecords *use
278
279
280
281 endRegs [][]endReg
282
283
284
285 startRegs [][]startReg
286
287
288 spillLive [][]ID
289
290
291
292 copies map[*Value]bool
293
294 loopnest *loopnest
295
296
297 visitOrder []*Block
298 }
299
300 type endReg struct {
301 r register
302 v *Value
303 c *Value
304 }
305
306 type startReg struct {
307 r register
308 v *Value
309 c *Value
310 pos src.XPos
311 }
312
313
314 func (s *regAllocState) freeReg(r register) {
315 v := s.regs[r].v
316 if v == nil {
317 s.f.Fatalf("tried to free an already free register %d\n", r)
318 }
319
320
321 if s.f.pass.debug > regDebug {
322 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
323 }
324 s.regs[r] = regState{}
325 s.values[v.ID].regs &^= regMask(1) << r
326 s.used &^= regMask(1) << r
327 }
328
329
330 func (s *regAllocState) freeRegs(m regMask) {
331 for m&s.used != 0 {
332 s.freeReg(pickReg(m & s.used))
333 }
334 }
335
336
337
338 func (s *regAllocState) setOrig(c *Value, v *Value) {
339 for int(c.ID) >= len(s.orig) {
340 s.orig = append(s.orig, nil)
341 }
342 if s.orig[c.ID] != nil {
343 s.f.Fatalf("orig value set twice %s %s", c, v)
344 }
345 s.orig[c.ID] = s.orig[v.ID]
346 }
347
348
349
350 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
351 if s.f.pass.debug > regDebug {
352 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
353 }
354 if s.regs[r].v != nil {
355 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
356 }
357
358
359 s.regs[r] = regState{v, c}
360 s.values[v.ID].regs |= regMask(1) << r
361 s.used |= regMask(1) << r
362 s.f.setHome(c, &s.registers[r])
363 }
364
365
366
367
368 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
369 if v.OnWasmStack {
370 return noRegister
371 }
372
373 mask &= s.allocatable
374 mask &^= s.nospill
375 if mask == 0 {
376 s.f.Fatalf("no register available for %s", v.LongString())
377 }
378
379
380 if mask&^s.used != 0 {
381 return pickReg(mask &^ s.used)
382 }
383
384
385
386
387
388
389
390
391
392
393
394 var r register
395 maxuse := int32(-1)
396 for t := register(0); t < s.numRegs; t++ {
397 if mask>>t&1 == 0 {
398 continue
399 }
400 v := s.regs[t].v
401 if n := s.values[v.ID].uses.dist; n > maxuse {
402
403
404 r = t
405 maxuse = n
406 }
407 }
408 if maxuse == -1 {
409 s.f.Fatalf("couldn't find register to spill")
410 }
411
412 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
413
414
415
416 s.freeReg(r)
417 return r
418 }
419
420
421
422 v2 := s.regs[r].v
423 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
424 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
425 r2 := pickReg(m)
426 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
427 s.copies[c] = false
428 if s.f.pass.debug > regDebug {
429 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
430 }
431 s.setOrig(c, v2)
432 s.assignReg(r2, v2, c)
433 }
434 s.freeReg(r)
435 return r
436 }
437
438
439
440 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
441 vi := &s.values[v.ID]
442 if vi.spill != nil {
443
444 vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry)
445 vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit)
446 return vi.spill
447 }
448
449
450 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
451
452
453 s.setOrig(spill, v)
454 vi.spill = spill
455 vi.restoreMin = s.sdom[b.ID].entry
456 vi.restoreMax = s.sdom[b.ID].exit
457 return spill
458 }
459
460
461
462
463
464
465
466 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
467 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
468 c := v.copyIntoWithXPos(s.curBlock, pos)
469 c.OnWasmStack = true
470 s.setOrig(c, v)
471 return c
472 }
473 if v.OnWasmStack {
474 return v
475 }
476
477 vi := &s.values[v.ID]
478 pos = pos.WithNotStmt()
479
480 if mask&vi.regs != 0 {
481 r := pickReg(mask & vi.regs)
482 if s.regs[r].v != v || s.regs[r].c == nil {
483 panic("bad register state")
484 }
485 if nospill {
486 s.nospill |= regMask(1) << r
487 }
488 return s.regs[r].c
489 }
490
491 var r register
492
493 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
494 if !onWasmStack {
495
496 r = s.allocReg(mask, v)
497 }
498
499
500 var c *Value
501 if vi.regs != 0 {
502
503 r2 := pickReg(vi.regs)
504 if s.regs[r2].v != v {
505 panic("bad register state")
506 }
507 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
508 } else if v.rematerializeable() {
509
510 c = v.copyIntoWithXPos(s.curBlock, pos)
511 } else {
512
513 spill := s.makeSpill(v, s.curBlock)
514 if s.f.pass.debug > logSpills {
515 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
516 }
517 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
518 }
519
520 s.setOrig(c, v)
521
522 if onWasmStack {
523 c.OnWasmStack = true
524 return c
525 }
526
527 s.assignReg(r, v, c)
528 if c.Op == OpLoadReg && s.isGReg(r) {
529 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
530 }
531 if nospill {
532 s.nospill |= regMask(1) << r
533 }
534 return c
535 }
536
537
538 func isLeaf(f *Func) bool {
539 for _, b := range f.Blocks {
540 for _, v := range b.Values {
541 if opcodeTable[v.Op].call {
542 return false
543 }
544 }
545 }
546 return true
547 }
548
549 func (s *regAllocState) init(f *Func) {
550 s.f = f
551 s.f.RegAlloc = s.f.Cache.locs[:0]
552 s.registers = f.Config.registers
553 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
554 s.f.Fatalf("bad number of registers: %d", nr)
555 } else {
556 s.numRegs = register(nr)
557 }
558
559 s.SPReg = noRegister
560 s.SBReg = noRegister
561 s.GReg = noRegister
562 for r := register(0); r < s.numRegs; r++ {
563 switch s.registers[r].String() {
564 case "SP":
565 s.SPReg = r
566 case "SB":
567 s.SBReg = r
568 case "g":
569 s.GReg = r
570 }
571 }
572
573 switch noRegister {
574 case s.SPReg:
575 s.f.Fatalf("no SP register found")
576 case s.SBReg:
577 s.f.Fatalf("no SB register found")
578 case s.GReg:
579 if f.Config.hasGReg {
580 s.f.Fatalf("no g register found")
581 }
582 }
583
584
585 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
586 s.allocatable &^= 1 << s.SPReg
587 s.allocatable &^= 1 << s.SBReg
588 if s.f.Config.hasGReg {
589 s.allocatable &^= 1 << s.GReg
590 }
591 if s.f.Config.ctxt.Framepointer_enabled && s.f.Config.FPReg >= 0 {
592 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
593 }
594 if s.f.Config.LinkReg != -1 {
595 if isLeaf(f) {
596
597 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
598 }
599 if s.f.Config.arch == "arm" && objabi.GOARM == 5 {
600
601
602
603 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
604 }
605 }
606 if s.f.Config.ctxt.Flag_dynlink {
607 switch s.f.Config.arch {
608 case "amd64":
609 s.allocatable &^= 1 << 15
610 case "arm":
611 s.allocatable &^= 1 << 9
612 case "ppc64le":
613
614 case "arm64":
615
616 case "386":
617
618
619
620
621
622 case "s390x":
623 s.allocatable &^= 1 << 11
624 default:
625 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
626 }
627 }
628 if s.f.Config.nacl {
629 switch s.f.Config.arch {
630 case "arm":
631 s.allocatable &^= 1 << 9
632 case "amd64p32":
633 s.allocatable &^= 1 << 5
634 s.allocatable &^= 1 << 15
635 }
636 }
637 if s.f.Config.use387 {
638 s.allocatable &^= 1 << 15
639 }
640
641
642
643
644 s.visitOrder = layoutRegallocOrder(f)
645
646
647
648 blockOrder := make([]int32, f.NumBlocks())
649 for i, b := range s.visitOrder {
650 blockOrder[b.ID] = int32(i)
651 }
652
653 s.regs = make([]regState, s.numRegs)
654 nv := f.NumValues()
655 if cap(s.f.Cache.regallocValues) >= nv {
656 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
657 } else {
658 s.f.Cache.regallocValues = make([]valState, nv)
659 }
660 s.values = s.f.Cache.regallocValues
661 s.orig = make([]*Value, nv)
662 s.copies = make(map[*Value]bool)
663 for _, b := range s.visitOrder {
664 for _, v := range b.Values {
665 if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
666 s.values[v.ID].needReg = true
667 s.values[v.ID].rematerializeable = v.rematerializeable()
668 s.orig[v.ID] = v
669 }
670
671
672 }
673 }
674 s.computeLive()
675
676
677 s.primary = make([]int32, f.NumBlocks())
678 for _, b := range s.visitOrder {
679 best := -1
680 for i, e := range b.Preds {
681 p := e.b
682 if blockOrder[p.ID] >= blockOrder[b.ID] {
683 continue
684 }
685 if best == -1 || blockOrder[p.ID] > blockOrder[b.Preds[best].b.ID] {
686 best = i
687 }
688 }
689 s.primary[b.ID] = int32(best)
690 }
691
692 s.endRegs = make([][]endReg, f.NumBlocks())
693 s.startRegs = make([][]startReg, f.NumBlocks())
694 s.spillLive = make([][]ID, f.NumBlocks())
695 s.sdom = f.sdom()
696
697
698 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
699 canLiveOnStack := f.newSparseSet(f.NumValues())
700 defer f.retSparseSet(canLiveOnStack)
701 for _, b := range f.Blocks {
702
703 canLiveOnStack.clear()
704 if b.Control != nil && b.Control.Uses == 1 && !opcodeTable[b.Control.Op].generic {
705 canLiveOnStack.add(b.Control.ID)
706 }
707
708 for i := len(b.Values) - 1; i >= 0; i-- {
709 v := b.Values[i]
710 if canLiveOnStack.contains(v.ID) {
711 v.OnWasmStack = true
712 } else {
713
714 canLiveOnStack.clear()
715 }
716 for _, arg := range v.Args {
717
718
719
720
721
722 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
723 canLiveOnStack.add(arg.ID)
724 }
725 }
726 }
727 }
728 }
729 }
730
731
732
733 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
734 r := s.freeUseRecords
735 if r != nil {
736 s.freeUseRecords = r.next
737 } else {
738 r = &use{}
739 }
740 r.dist = dist
741 r.pos = pos
742 r.next = s.values[id].uses
743 s.values[id].uses = r
744 if r.next != nil && dist > r.next.dist {
745 s.f.Fatalf("uses added in wrong order")
746 }
747 }
748
749
750
751 func (s *regAllocState) advanceUses(v *Value) {
752 for _, a := range v.Args {
753 if !s.values[a.ID].needReg {
754 continue
755 }
756 ai := &s.values[a.ID]
757 r := ai.uses
758 ai.uses = r.next
759 if r.next == nil {
760
761 s.freeRegs(ai.regs)
762 }
763 r.next = s.freeUseRecords
764 s.freeUseRecords = r
765 }
766 }
767
768
769
770
771 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
772 u := s.values[v.ID].uses
773 d := u.dist
774 for u != nil && u.dist == d {
775 u = u.next
776 }
777 return u != nil && u.dist > d
778 }
779
780
781 func (s *regAllocState) setState(regs []endReg) {
782 s.freeRegs(s.used)
783 for _, x := range regs {
784 s.assignReg(x.r, x.v, x.c)
785 }
786 }
787
788
789 func (s *regAllocState) compatRegs(t *types.Type) regMask {
790 var m regMask
791 if t.IsTuple() || t.IsFlags() {
792 return 0
793 }
794 if t.IsFloat() || t == types.TypeInt128 {
795 m = s.f.Config.fpRegMask
796 } else {
797 m = s.f.Config.gpRegMask
798 }
799 return m & s.allocatable
800 }
801
802
803 func (s *regAllocState) regspec(op Op) regInfo {
804 if op == OpConvert {
805
806
807
808 m := s.allocatable & s.f.Config.gpRegMask
809 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
810 }
811 return opcodeTable[op].reg
812 }
813
814 func (s *regAllocState) isGReg(r register) bool {
815 return s.f.Config.hasGReg && s.GReg == r
816 }
817
818 func (s *regAllocState) regalloc(f *Func) {
819 regValLiveSet := f.newSparseSet(f.NumValues())
820 defer f.retSparseSet(regValLiveSet)
821 var oldSched []*Value
822 var phis []*Value
823 var phiRegs []register
824 var args []*Value
825
826
827 var desired desiredState
828
829
830 type dentry struct {
831 out [4]register
832 in [3][4]register
833 }
834 var dinfo []dentry
835
836 if f.Entry != f.Blocks[0] {
837 f.Fatalf("entry block must be first")
838 }
839
840 for _, b := range s.visitOrder {
841 if s.f.pass.debug > regDebug {
842 fmt.Printf("Begin processing block %v\n", b)
843 }
844 s.curBlock = b
845
846
847
848 regValLiveSet.clear()
849 for _, e := range s.live[b.ID] {
850 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
851 regValLiveSet.add(e.ID)
852 }
853 if v := b.Control; v != nil && s.values[v.ID].needReg {
854 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
855 regValLiveSet.add(v.ID)
856 }
857 for i := len(b.Values) - 1; i >= 0; i-- {
858 v := b.Values[i]
859 regValLiveSet.remove(v.ID)
860 if v.Op == OpPhi {
861
862
863
864 continue
865 }
866 if opcodeTable[v.Op].call {
867
868 regValLiveSet.clear()
869 if s.sp != 0 && s.values[s.sp].uses != nil {
870 regValLiveSet.add(s.sp)
871 }
872 if s.sb != 0 && s.values[s.sb].uses != nil {
873 regValLiveSet.add(s.sb)
874 }
875 }
876 for _, a := range v.Args {
877 if !s.values[a.ID].needReg {
878 continue
879 }
880 s.addUse(a.ID, int32(i), v.Pos)
881 regValLiveSet.add(a.ID)
882 }
883 }
884 if s.f.pass.debug > regDebug {
885 fmt.Printf("use distances for %s\n", b)
886 for i := range s.values {
887 vi := &s.values[i]
888 u := vi.uses
889 if u == nil {
890 continue
891 }
892 fmt.Printf(" v%d:", i)
893 for u != nil {
894 fmt.Printf(" %d", u.dist)
895 u = u.next
896 }
897 fmt.Println()
898 }
899 }
900
901
902
903 nphi := 0
904 for _, v := range b.Values {
905 if v.Op != OpPhi {
906 break
907 }
908 nphi++
909 }
910 phis = append(phis[:0], b.Values[:nphi]...)
911 oldSched = append(oldSched[:0], b.Values[nphi:]...)
912 b.Values = b.Values[:0]
913
914
915 if b == f.Entry {
916
917 if nphi > 0 {
918 f.Fatalf("phis in entry block")
919 }
920 } else if len(b.Preds) == 1 {
921
922 s.setState(s.endRegs[b.Preds[0].b.ID])
923 if nphi > 0 {
924 f.Fatalf("phis in single-predecessor block")
925 }
926
927
928
929 for r := register(0); r < s.numRegs; r++ {
930 v := s.regs[r].v
931 if v != nil && !regValLiveSet.contains(v.ID) {
932 s.freeReg(r)
933 }
934 }
935 } else {
936
937
938
939
940 idx := s.primary[b.ID]
941 if idx < 0 {
942 f.Fatalf("block with no primary predecessor %s", b)
943 }
944 p := b.Preds[idx].b
945 s.setState(s.endRegs[p.ID])
946
947 if s.f.pass.debug > regDebug {
948 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
949 for _, x := range s.endRegs[p.ID] {
950 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
951 }
952 }
953
954
955
956
957
958 phiRegs = phiRegs[:0]
959 var phiUsed regMask
960
961 for _, v := range phis {
962 if !s.values[v.ID].needReg {
963 phiRegs = append(phiRegs, noRegister)
964 continue
965 }
966 a := v.Args[idx]
967
968
969 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
970 if m != 0 {
971 r := pickReg(m)
972 phiUsed |= regMask(1) << r
973 phiRegs = append(phiRegs, r)
974 } else {
975 phiRegs = append(phiRegs, noRegister)
976 }
977 }
978
979
980 for i, v := range phis {
981 if !s.values[v.ID].needReg {
982 continue
983 }
984 a := v.Args[idx]
985 if !regValLiveSet.contains(a.ID) {
986
987
988 s.freeRegs(s.values[a.ID].regs)
989 } else {
990
991
992
993
994 r := phiRegs[i]
995 if r == noRegister {
996 continue
997 }
998
999
1000
1001 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1002 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1003 r2 := pickReg(m)
1004 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1005 s.copies[c] = false
1006 if s.f.pass.debug > regDebug {
1007 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1008 }
1009 s.setOrig(c, a)
1010 s.assignReg(r2, a, c)
1011 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1012 }
1013 s.freeReg(r)
1014 }
1015 }
1016
1017
1018 b.Values = append(b.Values, phis...)
1019
1020
1021
1022 for i, v := range phis {
1023 if !s.values[v.ID].needReg {
1024 continue
1025 }
1026 if phiRegs[i] != noRegister {
1027 continue
1028 }
1029 if s.f.Config.use387 && v.Type.IsFloat() {
1030 continue
1031 }
1032 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1033 if m != 0 {
1034 r := pickReg(m)
1035 phiRegs[i] = r
1036 phiUsed |= regMask(1) << r
1037 }
1038 }
1039
1040
1041 for i, v := range phis {
1042 if !s.values[v.ID].needReg {
1043 continue
1044 }
1045 r := phiRegs[i]
1046 if r == noRegister {
1047
1048
1049 s.values[v.ID].spill = v
1050 continue
1051 }
1052
1053 s.assignReg(r, v, v)
1054 }
1055
1056
1057 for r := register(0); r < s.numRegs; r++ {
1058 if phiUsed>>r&1 != 0 {
1059 continue
1060 }
1061 v := s.regs[r].v
1062 if v != nil && !regValLiveSet.contains(v.ID) {
1063 s.freeReg(r)
1064 }
1065 }
1066
1067
1068
1069
1070
1071 regList := make([]startReg, 0, 32)
1072 for r := register(0); r < s.numRegs; r++ {
1073 v := s.regs[r].v
1074 if v == nil {
1075 continue
1076 }
1077 if phiUsed>>r&1 != 0 {
1078
1079
1080 continue
1081 }
1082 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1083 }
1084 s.startRegs[b.ID] = make([]startReg, len(regList))
1085 copy(s.startRegs[b.ID], regList)
1086
1087 if s.f.pass.debug > regDebug {
1088 fmt.Printf("after phis\n")
1089 for _, x := range s.startRegs[b.ID] {
1090 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1091 }
1092 }
1093 }
1094
1095
1096 if l := len(oldSched); cap(dinfo) < l {
1097 dinfo = make([]dentry, l)
1098 } else {
1099 dinfo = dinfo[:l]
1100 for i := range dinfo {
1101 dinfo[i] = dentry{}
1102 }
1103 }
1104
1105
1106 desired.copy(&s.desired[b.ID])
1107
1108
1109
1110
1111
1112
1113 for _, e := range b.Succs {
1114 succ := e.b
1115
1116 for _, x := range s.startRegs[succ.ID] {
1117 desired.add(x.v.ID, x.r)
1118 }
1119
1120 pidx := e.i
1121 for _, v := range succ.Values {
1122 if v.Op != OpPhi {
1123 break
1124 }
1125 if !s.values[v.ID].needReg {
1126 continue
1127 }
1128 rp, ok := s.f.getHome(v.ID).(*Register)
1129 if !ok {
1130 continue
1131 }
1132 desired.add(v.Args[pidx].ID, register(rp.num))
1133 }
1134 }
1135
1136
1137 for i := len(oldSched) - 1; i >= 0; i-- {
1138 v := oldSched[i]
1139 prefs := desired.remove(v.ID)
1140 regspec := s.regspec(v.Op)
1141 desired.clobber(regspec.clobbers)
1142 for _, j := range regspec.inputs {
1143 if countRegs(j.regs) != 1 {
1144 continue
1145 }
1146 desired.clobber(j.regs)
1147 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1148 }
1149 if opcodeTable[v.Op].resultInArg0 {
1150 if opcodeTable[v.Op].commutative {
1151 desired.addList(v.Args[1].ID, prefs)
1152 }
1153 desired.addList(v.Args[0].ID, prefs)
1154 }
1155
1156 dinfo[i].out = prefs
1157 for j, a := range v.Args {
1158 if j >= len(dinfo[i].in) {
1159 break
1160 }
1161 dinfo[i].in[j] = desired.get(a.ID)
1162 }
1163 }
1164
1165
1166 for idx, v := range oldSched {
1167 if s.f.pass.debug > regDebug {
1168 fmt.Printf(" processing %s\n", v.LongString())
1169 }
1170 regspec := s.regspec(v.Op)
1171 if v.Op == OpPhi {
1172 f.Fatalf("phi %s not at start of block", v)
1173 }
1174 if v.Op == OpSP {
1175 s.assignReg(s.SPReg, v, v)
1176 b.Values = append(b.Values, v)
1177 s.advanceUses(v)
1178 s.sp = v.ID
1179 continue
1180 }
1181 if v.Op == OpSB {
1182 s.assignReg(s.SBReg, v, v)
1183 b.Values = append(b.Values, v)
1184 s.advanceUses(v)
1185 s.sb = v.ID
1186 continue
1187 }
1188 if v.Op == OpSelect0 || v.Op == OpSelect1 {
1189 if s.values[v.ID].needReg {
1190 var i = 0
1191 if v.Op == OpSelect1 {
1192 i = 1
1193 }
1194 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1195 }
1196 b.Values = append(b.Values, v)
1197 s.advanceUses(v)
1198 goto issueSpill
1199 }
1200 if v.Op == OpGetG && s.f.Config.hasGReg {
1201
1202 if s.regs[s.GReg].v != nil {
1203 s.freeReg(s.GReg)
1204 }
1205 s.assignReg(s.GReg, v, v)
1206 b.Values = append(b.Values, v)
1207 s.advanceUses(v)
1208 goto issueSpill
1209 }
1210 if v.Op == OpArg {
1211
1212
1213
1214 s.values[v.ID].spill = v
1215 b.Values = append(b.Values, v)
1216 s.advanceUses(v)
1217 continue
1218 }
1219 if v.Op == OpKeepAlive {
1220
1221 s.advanceUses(v)
1222 a := v.Args[0]
1223 vi := &s.values[a.ID]
1224 if vi.regs == 0 && !vi.rematerializeable {
1225
1226
1227
1228 v.SetArg(0, s.makeSpill(a, b))
1229 } else if _, ok := a.Aux.(GCNode); ok && vi.rematerializeable {
1230
1231
1232
1233 v.Op = OpVarLive
1234 v.SetArgs1(v.Args[1])
1235 v.Aux = a.Aux
1236 } else {
1237
1238
1239
1240 v.Op = OpCopy
1241 v.SetArgs1(v.Args[1])
1242 }
1243 b.Values = append(b.Values, v)
1244 continue
1245 }
1246 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1247
1248 s.freeRegs(regspec.clobbers)
1249 b.Values = append(b.Values, v)
1250 s.advanceUses(v)
1251 continue
1252 }
1253
1254 if s.values[v.ID].rematerializeable {
1255
1256
1257
1258 for _, a := range v.Args {
1259 a.Uses--
1260 }
1261 s.advanceUses(v)
1262 continue
1263 }
1264
1265 if s.f.pass.debug > regDebug {
1266 fmt.Printf("value %s\n", v.LongString())
1267 fmt.Printf(" out:")
1268 for _, r := range dinfo[idx].out {
1269 if r != noRegister {
1270 fmt.Printf(" %s", &s.registers[r])
1271 }
1272 }
1273 fmt.Println()
1274 for i := 0; i < len(v.Args) && i < 3; i++ {
1275 fmt.Printf(" in%d:", i)
1276 for _, r := range dinfo[idx].in[i] {
1277 if r != noRegister {
1278 fmt.Printf(" %s", &s.registers[r])
1279 }
1280 }
1281 fmt.Println()
1282 }
1283 }
1284
1285
1286
1287 args = append(args[:0], v.Args...)
1288 for _, i := range regspec.inputs {
1289 mask := i.regs
1290 if mask&s.values[args[i.idx].ID].regs == 0 {
1291
1292 mask &= s.allocatable
1293 mask &^= s.nospill
1294
1295 if i.idx < 3 {
1296 for _, r := range dinfo[idx].in[i.idx] {
1297 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1298
1299 mask = regMask(1) << r
1300 break
1301 }
1302 }
1303 }
1304
1305 if mask&^desired.avoid != 0 {
1306 mask &^= desired.avoid
1307 }
1308 }
1309 args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos)
1310 }
1311
1312
1313
1314
1315 if opcodeTable[v.Op].resultInArg0 {
1316 var m regMask
1317 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1318
1319 goto ok
1320 }
1321 if s.values[v.Args[0].ID].rematerializeable {
1322
1323 goto ok
1324 }
1325 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1326
1327 goto ok
1328 }
1329 if opcodeTable[v.Op].commutative {
1330 if !s.liveAfterCurrentInstruction(v.Args[1]) {
1331 args[0], args[1] = args[1], args[0]
1332 goto ok
1333 }
1334 if s.values[v.Args[1].ID].rematerializeable {
1335 args[0], args[1] = args[1], args[0]
1336 goto ok
1337 }
1338 if countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1339 args[0], args[1] = args[1], args[0]
1340 goto ok
1341 }
1342 }
1343
1344
1345
1346
1347
1348 m = s.compatRegs(v.Args[0].Type) &^ s.used
1349 if m == 0 {
1350
1351
1352
1353
1354 goto ok
1355 }
1356
1357
1358 for _, r := range dinfo[idx].out {
1359 if r != noRegister && m>>r&1 != 0 {
1360 m = regMask(1) << r
1361 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1362
1363
1364 goto ok
1365 }
1366 }
1367
1368
1369 for _, r := range dinfo[idx].in[0] {
1370 if r != noRegister && m>>r&1 != 0 {
1371 m = regMask(1) << r
1372 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1373 s.copies[c] = false
1374
1375
1376 goto ok
1377 }
1378 }
1379 if opcodeTable[v.Op].commutative {
1380 for _, r := range dinfo[idx].in[1] {
1381 if r != noRegister && m>>r&1 != 0 {
1382 m = regMask(1) << r
1383 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1384 s.copies[c] = false
1385 args[0], args[1] = args[1], args[0]
1386 goto ok
1387 }
1388 }
1389 }
1390
1391 if m&^desired.avoid != 0 {
1392 m &^= desired.avoid
1393 }
1394
1395 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1396 s.copies[c] = false
1397 }
1398
1399 ok:
1400
1401
1402
1403
1404 if !opcodeTable[v.Op].resultNotInArgs {
1405 s.tmpused = s.nospill
1406 s.nospill = 0
1407 s.advanceUses(v)
1408 }
1409
1410
1411 s.freeRegs(regspec.clobbers)
1412 s.tmpused |= regspec.clobbers
1413
1414
1415 {
1416 outRegs := [2]register{noRegister, noRegister}
1417 var used regMask
1418 for _, out := range regspec.outputs {
1419 mask := out.regs & s.allocatable &^ used
1420 if mask == 0 {
1421 continue
1422 }
1423 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1424 if !opcodeTable[v.Op].commutative {
1425
1426 r := register(s.f.getHome(args[0].ID).(*Register).num)
1427 mask = regMask(1) << r
1428 } else {
1429
1430 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1431 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1432
1433 found := false
1434 for _, r := range dinfo[idx].out {
1435 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1436 mask = regMask(1) << r
1437 found = true
1438 if r == r1 {
1439 args[0], args[1] = args[1], args[0]
1440 }
1441 break
1442 }
1443 }
1444 if !found {
1445
1446 mask = regMask(1) << r0
1447 }
1448 }
1449 }
1450 for _, r := range dinfo[idx].out {
1451 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1452
1453 mask = regMask(1) << r
1454 break
1455 }
1456 }
1457
1458 if mask&^desired.avoid&^s.nospill != 0 {
1459 mask &^= desired.avoid
1460 }
1461 r := s.allocReg(mask, v)
1462 outRegs[out.idx] = r
1463 used |= regMask(1) << r
1464 s.tmpused |= regMask(1) << r
1465 }
1466
1467 if v.Type.IsTuple() {
1468 var outLocs LocPair
1469 if r := outRegs[0]; r != noRegister {
1470 outLocs[0] = &s.registers[r]
1471 }
1472 if r := outRegs[1]; r != noRegister {
1473 outLocs[1] = &s.registers[r]
1474 }
1475 s.f.setHome(v, outLocs)
1476
1477 } else {
1478 if r := outRegs[0]; r != noRegister {
1479 s.assignReg(r, v, v)
1480 }
1481 }
1482 }
1483
1484
1485 if opcodeTable[v.Op].resultNotInArgs {
1486 s.nospill = 0
1487 s.advanceUses(v)
1488 }
1489 s.tmpused = 0
1490
1491
1492 for i, a := range args {
1493 v.SetArg(i, a)
1494 }
1495 b.Values = append(b.Values, v)
1496
1497 issueSpill:
1498 }
1499
1500
1501 if v := b.Control; v != nil && s.values[v.ID].needReg {
1502 if s.f.pass.debug > regDebug {
1503 fmt.Printf(" processing control %s\n", v.LongString())
1504 }
1505
1506
1507
1508 b.Control = s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos)
1509 if b.Control != v {
1510 v.Uses--
1511 b.Control.Uses++
1512 }
1513
1514 vi := &s.values[v.ID]
1515 u := vi.uses
1516 vi.uses = u.next
1517 if u.next == nil {
1518 s.freeRegs(vi.regs)
1519 }
1520 u.next = s.freeUseRecords
1521 s.freeUseRecords = u
1522 }
1523
1524
1525 if s.f.Config.use387 {
1526 s.freeRegs(s.f.Config.fpRegMask)
1527 }
1528
1529
1530
1531
1532 if len(b.Succs) == 1 {
1533 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1534 s.freeReg(s.GReg)
1535 }
1536
1537 top := b.Succs[0].b
1538 loop := s.loopnest.b2l[top.ID]
1539 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1540 goto badloop
1541 }
1542
1543
1544 for _, live := range s.live[b.ID] {
1545 if live.dist >= unlikelyDistance {
1546
1547 continue
1548 }
1549 vid := live.ID
1550 vi := &s.values[vid]
1551 if vi.regs != 0 {
1552 continue
1553 }
1554 if vi.rematerializeable {
1555 continue
1556 }
1557 v := s.orig[vid]
1558 if s.f.Config.use387 && v.Type.IsFloat() {
1559 continue
1560 }
1561 m := s.compatRegs(v.Type) &^ s.used
1562 if m&^desired.avoid != 0 {
1563 m &^= desired.avoid
1564 }
1565 if m != 0 {
1566 s.allocValToReg(v, m, false, b.Pos)
1567 }
1568 }
1569 }
1570 badloop:
1571 ;
1572
1573
1574
1575 k := 0
1576 for r := register(0); r < s.numRegs; r++ {
1577 v := s.regs[r].v
1578 if v == nil {
1579 continue
1580 }
1581 k++
1582 }
1583 regList := make([]endReg, 0, k)
1584 for r := register(0); r < s.numRegs; r++ {
1585 v := s.regs[r].v
1586 if v == nil {
1587 continue
1588 }
1589 regList = append(regList, endReg{r, v, s.regs[r].c})
1590 }
1591 s.endRegs[b.ID] = regList
1592
1593 if checkEnabled {
1594 regValLiveSet.clear()
1595 for _, x := range s.live[b.ID] {
1596 regValLiveSet.add(x.ID)
1597 }
1598 for r := register(0); r < s.numRegs; r++ {
1599 v := s.regs[r].v
1600 if v == nil {
1601 continue
1602 }
1603 if !regValLiveSet.contains(v.ID) {
1604 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
1605 }
1606 }
1607 }
1608
1609
1610
1611
1612
1613 for _, e := range s.live[b.ID] {
1614 vi := &s.values[e.ID]
1615 if vi.regs != 0 {
1616
1617 continue
1618 }
1619 if vi.rematerializeable {
1620
1621 continue
1622 }
1623
1624 spill := s.makeSpill(s.orig[e.ID], b)
1625 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
1626 }
1627
1628
1629
1630
1631 for _, e := range s.live[b.ID] {
1632 u := s.values[e.ID].uses
1633 if u == nil {
1634 f.Fatalf("live at end, no uses v%d", e.ID)
1635 }
1636 if u.next != nil {
1637 f.Fatalf("live at end, too many uses v%d", e.ID)
1638 }
1639 s.values[e.ID].uses = nil
1640 u.next = s.freeUseRecords
1641 s.freeUseRecords = u
1642 }
1643 }
1644
1645
1646 s.placeSpills()
1647
1648
1649
1650 stacklive := stackalloc(s.f, s.spillLive)
1651
1652
1653 s.shuffle(stacklive)
1654
1655
1656
1657
1658 for {
1659 progress := false
1660 for c, used := range s.copies {
1661 if !used && c.Uses == 0 {
1662 if s.f.pass.debug > regDebug {
1663 fmt.Printf("delete copied value %s\n", c.LongString())
1664 }
1665 c.RemoveArg(0)
1666 f.freeValue(c)
1667 delete(s.copies, c)
1668 progress = true
1669 }
1670 }
1671 if !progress {
1672 break
1673 }
1674 }
1675
1676 for _, b := range s.visitOrder {
1677 i := 0
1678 for _, v := range b.Values {
1679 if v.Op == OpInvalid {
1680 continue
1681 }
1682 b.Values[i] = v
1683 i++
1684 }
1685 b.Values = b.Values[:i]
1686 }
1687 }
1688
1689 func (s *regAllocState) placeSpills() {
1690 f := s.f
1691
1692
1693 phiRegs := make([]regMask, f.NumBlocks())
1694 for _, b := range s.visitOrder {
1695 var m regMask
1696 for _, v := range b.Values {
1697 if v.Op != OpPhi {
1698 break
1699 }
1700 if r, ok := f.getHome(v.ID).(*Register); ok {
1701 m |= regMask(1) << uint(r.num)
1702 }
1703 }
1704 phiRegs[b.ID] = m
1705 }
1706
1707
1708
1709 start := map[ID][]*Value{}
1710
1711
1712 after := map[ID][]*Value{}
1713
1714 for i := range s.values {
1715 vi := s.values[i]
1716 spill := vi.spill
1717 if spill == nil {
1718 continue
1719 }
1720 if spill.Block != nil {
1721
1722
1723 continue
1724 }
1725 v := s.orig[i]
1726
1727
1728
1729
1730
1731 best := v.Block
1732 bestArg := v
1733 var bestDepth int16
1734 if l := s.loopnest.b2l[best.ID]; l != nil {
1735 bestDepth = l.depth
1736 }
1737 b := best
1738 const maxSpillSearch = 100
1739 for i := 0; i < maxSpillSearch; i++ {
1740
1741
1742 p := b
1743 b = nil
1744 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
1745 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
1746
1747 b = c
1748 break
1749 }
1750 }
1751 if b == nil {
1752
1753 break
1754 }
1755
1756 var depth int16
1757 if l := s.loopnest.b2l[b.ID]; l != nil {
1758 depth = l.depth
1759 }
1760 if depth > bestDepth {
1761
1762 continue
1763 }
1764
1765
1766
1767 if len(b.Preds) == 1 {
1768 for _, e := range s.endRegs[b.Preds[0].b.ID] {
1769 if e.v == v {
1770
1771 best = b
1772 bestArg = e.c
1773 bestDepth = depth
1774 break
1775 }
1776 }
1777 } else {
1778 for _, e := range s.startRegs[b.ID] {
1779 if e.v == v {
1780
1781 best = b
1782 bestArg = e.c
1783 bestDepth = depth
1784 break
1785 }
1786 }
1787 }
1788 }
1789
1790
1791 spill.Block = best
1792 spill.AddArg(bestArg)
1793 if best == v.Block && v.Op != OpPhi {
1794
1795 after[v.ID] = append(after[v.ID], spill)
1796 } else {
1797
1798 start[best.ID] = append(start[best.ID], spill)
1799 }
1800 }
1801
1802
1803 var oldSched []*Value
1804 for _, b := range s.visitOrder {
1805 nphi := 0
1806 for _, v := range b.Values {
1807 if v.Op != OpPhi {
1808 break
1809 }
1810 nphi++
1811 }
1812 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1813 b.Values = b.Values[:nphi]
1814 b.Values = append(b.Values, start[b.ID]...)
1815 for _, v := range oldSched {
1816 b.Values = append(b.Values, v)
1817 b.Values = append(b.Values, after[v.ID]...)
1818 }
1819 }
1820 }
1821
1822
1823 func (s *regAllocState) shuffle(stacklive [][]ID) {
1824 var e edgeState
1825 e.s = s
1826 e.cache = map[ID][]*Value{}
1827 e.contents = map[Location]contentRecord{}
1828 if s.f.pass.debug > regDebug {
1829 fmt.Printf("shuffle %s\n", s.f.Name)
1830 fmt.Println(s.f.String())
1831 }
1832
1833 for _, b := range s.visitOrder {
1834 if len(b.Preds) <= 1 {
1835 continue
1836 }
1837 e.b = b
1838 for i, edge := range b.Preds {
1839 p := edge.b
1840 e.p = p
1841 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
1842 e.process()
1843 }
1844 }
1845 }
1846
1847 type edgeState struct {
1848 s *regAllocState
1849 p, b *Block
1850
1851
1852 cache map[ID][]*Value
1853 cachedVals []ID
1854
1855
1856 contents map[Location]contentRecord
1857
1858
1859 destinations []dstRecord
1860 extra []dstRecord
1861
1862 usedRegs regMask
1863 uniqueRegs regMask
1864 finalRegs regMask
1865 rematerializeableRegs regMask
1866 }
1867
1868 type contentRecord struct {
1869 vid ID
1870 c *Value
1871 final bool
1872 pos src.XPos
1873 }
1874
1875 type dstRecord struct {
1876 loc Location
1877 vid ID
1878 splice **Value
1879 pos src.XPos
1880 }
1881
1882
1883 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
1884 if e.s.f.pass.debug > regDebug {
1885 fmt.Printf("edge %s->%s\n", e.p, e.b)
1886 }
1887
1888
1889 for _, vid := range e.cachedVals {
1890 delete(e.cache, vid)
1891 }
1892 e.cachedVals = e.cachedVals[:0]
1893 for k := range e.contents {
1894 delete(e.contents, k)
1895 }
1896 e.usedRegs = 0
1897 e.uniqueRegs = 0
1898 e.finalRegs = 0
1899 e.rematerializeableRegs = 0
1900
1901
1902 for _, x := range srcReg {
1903 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
1904 }
1905
1906 for _, spillID := range stacklive {
1907 v := e.s.orig[spillID]
1908 spill := e.s.values[v.ID].spill
1909 if !e.s.sdom.isAncestorEq(spill.Block, e.p) {
1910
1911
1912
1913
1914
1915
1916
1917
1918 continue
1919 }
1920 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
1921 }
1922
1923
1924 dsts := e.destinations[:0]
1925 for _, x := range dstReg {
1926 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
1927 }
1928
1929 for _, v := range e.b.Values {
1930 if v.Op != OpPhi {
1931 break
1932 }
1933 loc := e.s.f.getHome(v.ID)
1934 if loc == nil {
1935 continue
1936 }
1937 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
1938 }
1939 e.destinations = dsts
1940
1941 if e.s.f.pass.debug > regDebug {
1942 for _, vid := range e.cachedVals {
1943 a := e.cache[vid]
1944 for _, c := range a {
1945 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
1946 }
1947 }
1948 for _, d := range e.destinations {
1949 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
1950 }
1951 }
1952 }
1953
1954
1955 func (e *edgeState) process() {
1956 dsts := e.destinations
1957
1958
1959 for len(dsts) > 0 {
1960 i := 0
1961 for _, d := range dsts {
1962 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
1963
1964 dsts[i] = d
1965 i++
1966 }
1967 }
1968 if i < len(dsts) {
1969
1970 dsts = dsts[:i]
1971
1972
1973 dsts = append(dsts, e.extra...)
1974 e.extra = e.extra[:0]
1975 continue
1976 }
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000 d := dsts[0]
2001 loc := d.loc
2002 vid := e.contents[loc].vid
2003 c := e.contents[loc].c
2004 r := e.findRegFor(c.Type)
2005 if e.s.f.pass.debug > regDebug {
2006 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2007 }
2008 e.erase(r)
2009 pos := d.pos.WithNotStmt()
2010 if _, isReg := loc.(*Register); isReg {
2011 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2012 } else {
2013 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2014 }
2015 e.set(r, vid, c, false, pos)
2016 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2017 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2018 }
2019 }
2020 }
2021
2022
2023
2024 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2025 pos = pos.WithNotStmt()
2026 occupant := e.contents[loc]
2027 if occupant.vid == vid {
2028
2029 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2030 if splice != nil {
2031 (*splice).Uses--
2032 *splice = occupant.c
2033 occupant.c.Uses++
2034 }
2035
2036
2037
2038 if _, ok := e.s.copies[occupant.c]; ok {
2039
2040 e.s.copies[occupant.c] = true
2041 }
2042 return true
2043 }
2044
2045
2046 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
2047
2048
2049 return false
2050 }
2051
2052
2053 v := e.s.orig[vid]
2054 var c *Value
2055 var src Location
2056 if e.s.f.pass.debug > regDebug {
2057 fmt.Printf("moving v%d to %s\n", vid, loc)
2058 fmt.Printf("sources of v%d:", vid)
2059 }
2060 for _, w := range e.cache[vid] {
2061 h := e.s.f.getHome(w.ID)
2062 if e.s.f.pass.debug > regDebug {
2063 fmt.Printf(" %s:%s", h, w)
2064 }
2065 _, isreg := h.(*Register)
2066 if src == nil || isreg {
2067 c = w
2068 src = h
2069 }
2070 }
2071 if e.s.f.pass.debug > regDebug {
2072 if src != nil {
2073 fmt.Printf(" [use %s]\n", src)
2074 } else {
2075 fmt.Printf(" [no source]\n")
2076 }
2077 }
2078 _, dstReg := loc.(*Register)
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090 e.erase(loc)
2091 var x *Value
2092 if c == nil || e.s.values[vid].rematerializeable {
2093 if !e.s.values[vid].rematerializeable {
2094 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2095 }
2096 if dstReg {
2097 x = v.copyInto(e.p)
2098 } else {
2099
2100
2101 r := e.findRegFor(v.Type)
2102 e.erase(r)
2103 x = v.copyIntoWithXPos(e.p, pos)
2104 e.set(r, vid, x, false, pos)
2105
2106
2107
2108 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2109 }
2110 } else {
2111
2112 _, srcReg := src.(*Register)
2113 if srcReg {
2114 if dstReg {
2115 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2116 } else {
2117 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2118 }
2119 } else {
2120 if dstReg {
2121 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2122 } else {
2123
2124 r := e.findRegFor(c.Type)
2125 e.erase(r)
2126 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2127 e.set(r, vid, t, false, pos)
2128 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2129 }
2130 }
2131 }
2132 e.set(loc, vid, x, true, pos)
2133 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2134 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2135 }
2136 if splice != nil {
2137 (*splice).Uses--
2138 *splice = x
2139 x.Uses++
2140 }
2141 return true
2142 }
2143
2144
2145 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2146 e.s.f.setHome(c, loc)
2147 e.contents[loc] = contentRecord{vid, c, final, pos}
2148 a := e.cache[vid]
2149 if len(a) == 0 {
2150 e.cachedVals = append(e.cachedVals, vid)
2151 }
2152 a = append(a, c)
2153 e.cache[vid] = a
2154 if r, ok := loc.(*Register); ok {
2155 e.usedRegs |= regMask(1) << uint(r.num)
2156 if final {
2157 e.finalRegs |= regMask(1) << uint(r.num)
2158 }
2159 if len(a) == 1 {
2160 e.uniqueRegs |= regMask(1) << uint(r.num)
2161 }
2162 if len(a) == 2 {
2163 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2164 e.uniqueRegs &^= regMask(1) << uint(t.num)
2165 }
2166 }
2167 if e.s.values[vid].rematerializeable {
2168 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2169 }
2170 }
2171 if e.s.f.pass.debug > regDebug {
2172 fmt.Printf("%s\n", c.LongString())
2173 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2174 }
2175 }
2176
2177
2178 func (e *edgeState) erase(loc Location) {
2179 cr := e.contents[loc]
2180 if cr.c == nil {
2181 return
2182 }
2183 vid := cr.vid
2184
2185 if cr.final {
2186
2187
2188
2189 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2190 }
2191
2192
2193 a := e.cache[vid]
2194 for i, c := range a {
2195 if e.s.f.getHome(c.ID) == loc {
2196 if e.s.f.pass.debug > regDebug {
2197 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2198 }
2199 a[i], a = a[len(a)-1], a[:len(a)-1]
2200 break
2201 }
2202 }
2203 e.cache[vid] = a
2204
2205
2206 if r, ok := loc.(*Register); ok {
2207 e.usedRegs &^= regMask(1) << uint(r.num)
2208 if cr.final {
2209 e.finalRegs &^= regMask(1) << uint(r.num)
2210 }
2211 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2212 }
2213 if len(a) == 1 {
2214 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2215 e.uniqueRegs |= regMask(1) << uint(r.num)
2216 }
2217 }
2218 }
2219
2220
2221 func (e *edgeState) findRegFor(typ *types.Type) Location {
2222
2223 var m regMask
2224 types := &e.s.f.Config.Types
2225 if typ.IsFloat() {
2226 m = e.s.compatRegs(types.Float64)
2227 } else {
2228 m = e.s.compatRegs(types.Int64)
2229 }
2230
2231
2232
2233
2234
2235
2236 x := m &^ e.usedRegs
2237 if x != 0 {
2238 return &e.s.registers[pickReg(x)]
2239 }
2240 x = m &^ e.uniqueRegs &^ e.finalRegs
2241 if x != 0 {
2242 return &e.s.registers[pickReg(x)]
2243 }
2244 x = m &^ e.uniqueRegs
2245 if x != 0 {
2246 return &e.s.registers[pickReg(x)]
2247 }
2248 x = m & e.rematerializeableRegs
2249 if x != 0 {
2250 return &e.s.registers[pickReg(x)]
2251 }
2252
2253
2254
2255 for _, vid := range e.cachedVals {
2256 a := e.cache[vid]
2257 for _, c := range a {
2258 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2259 if !c.rematerializeable() {
2260 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2261
2262
2263
2264 t := LocalSlot{N: e.s.f.fe.Auto(c.Pos, types.Int64), Type: types.Int64}
2265
2266 e.set(t, vid, x, false, c.Pos)
2267 if e.s.f.pass.debug > regDebug {
2268 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2269 }
2270 }
2271
2272
2273
2274 return r
2275 }
2276 }
2277 }
2278
2279 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2280 for _, vid := range e.cachedVals {
2281 a := e.cache[vid]
2282 for _, c := range a {
2283 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2284 }
2285 }
2286 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2287 return nil
2288 }
2289
2290
2291
2292 func (v *Value) rematerializeable() bool {
2293 if !opcodeTable[v.Op].rematerializeable {
2294 return false
2295 }
2296 for _, a := range v.Args {
2297
2298 if a.Op != OpSP && a.Op != OpSB {
2299 return false
2300 }
2301 }
2302 return true
2303 }
2304
2305 type liveInfo struct {
2306 ID ID
2307 dist int32
2308 pos src.XPos
2309 }
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319 func (s *regAllocState) computeLive() {
2320 f := s.f
2321 s.live = make([][]liveInfo, f.NumBlocks())
2322 s.desired = make([]desiredState, f.NumBlocks())
2323 var phis []*Value
2324
2325 live := f.newSparseMap(f.NumValues())
2326 defer f.retSparseMap(live)
2327 t := f.newSparseMap(f.NumValues())
2328 defer f.retSparseMap(t)
2329
2330
2331 var desired desiredState
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342 po := f.postorder()
2343 s.loopnest = f.loopnest()
2344 s.loopnest.calculateDepths()
2345 for {
2346 changed := false
2347
2348 for _, b := range po {
2349
2350
2351
2352 live.clear()
2353 for _, e := range s.live[b.ID] {
2354 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2355 }
2356
2357
2358 if b.Control != nil && s.values[b.Control.ID].needReg {
2359 live.set(b.Control.ID, int32(len(b.Values)), b.Pos)
2360 }
2361
2362
2363
2364 phis = phis[:0]
2365 for i := len(b.Values) - 1; i >= 0; i-- {
2366 v := b.Values[i]
2367 live.remove(v.ID)
2368 if v.Op == OpPhi {
2369
2370 phis = append(phis, v)
2371 continue
2372 }
2373 if opcodeTable[v.Op].call {
2374 c := live.contents()
2375 for i := range c {
2376 c[i].val += unlikelyDistance
2377 }
2378 }
2379 for _, a := range v.Args {
2380 if s.values[a.ID].needReg {
2381 live.set(a.ID, int32(i), v.Pos)
2382 }
2383 }
2384 }
2385
2386 desired.copy(&s.desired[b.ID])
2387 for i := len(b.Values) - 1; i >= 0; i-- {
2388 v := b.Values[i]
2389 prefs := desired.remove(v.ID)
2390 if v.Op == OpPhi {
2391
2392
2393
2394 continue
2395 }
2396 regspec := s.regspec(v.Op)
2397
2398 desired.clobber(regspec.clobbers)
2399
2400 for _, j := range regspec.inputs {
2401 if countRegs(j.regs) != 1 {
2402 continue
2403 }
2404 desired.clobber(j.regs)
2405 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2406 }
2407
2408 if opcodeTable[v.Op].resultInArg0 {
2409 if opcodeTable[v.Op].commutative {
2410 desired.addList(v.Args[1].ID, prefs)
2411 }
2412 desired.addList(v.Args[0].ID, prefs)
2413 }
2414 }
2415
2416
2417
2418 for i, e := range b.Preds {
2419 p := e.b
2420
2421
2422
2423 delta := int32(normalDistance)
2424 if len(p.Succs) == 2 {
2425 if p.Succs[0].b == b && p.Likely == BranchLikely ||
2426 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2427 delta = likelyDistance
2428 }
2429 if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2430 p.Succs[1].b == b && p.Likely == BranchLikely {
2431 delta = unlikelyDistance
2432 }
2433 }
2434
2435
2436 s.desired[p.ID].merge(&desired)
2437
2438
2439 t.clear()
2440 for _, e := range s.live[p.ID] {
2441 t.set(e.ID, e.dist, e.pos)
2442 }
2443 update := false
2444
2445
2446 for _, e := range live.contents() {
2447 d := e.val + delta
2448 if !t.contains(e.key) || d < t.get(e.key) {
2449 update = true
2450 t.set(e.key, d, e.aux)
2451 }
2452 }
2453
2454
2455
2456 for _, v := range phis {
2457 id := v.Args[i].ID
2458 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2459 update = true
2460 t.set(id, delta, v.Pos)
2461 }
2462 }
2463
2464 if !update {
2465 continue
2466 }
2467
2468 l := s.live[p.ID][:0]
2469 if cap(l) < t.size() {
2470 l = make([]liveInfo, 0, t.size())
2471 }
2472 for _, e := range t.contents() {
2473 l = append(l, liveInfo{e.key, e.val, e.aux})
2474 }
2475 s.live[p.ID] = l
2476 changed = true
2477 }
2478 }
2479
2480 if !changed {
2481 break
2482 }
2483 }
2484 if f.pass.debug > regDebug {
2485 fmt.Println("live values at end of each block")
2486 for _, b := range f.Blocks {
2487 fmt.Printf(" %s:", b)
2488 for _, x := range s.live[b.ID] {
2489 fmt.Printf(" v%d", x.ID)
2490 for _, e := range s.desired[b.ID].entries {
2491 if e.ID != x.ID {
2492 continue
2493 }
2494 fmt.Printf("[")
2495 first := true
2496 for _, r := range e.regs {
2497 if r == noRegister {
2498 continue
2499 }
2500 if !first {
2501 fmt.Printf(",")
2502 }
2503 fmt.Print(&s.registers[r])
2504 first = false
2505 }
2506 fmt.Printf("]")
2507 }
2508 }
2509 if avoid := s.desired[b.ID].avoid; avoid != 0 {
2510 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2511 }
2512 fmt.Println()
2513 }
2514 }
2515 }
2516
2517
2518 type desiredState struct {
2519
2520
2521 entries []desiredStateEntry
2522
2523
2524
2525
2526 avoid regMask
2527 }
2528 type desiredStateEntry struct {
2529
2530 ID ID
2531
2532
2533 regs [4]register
2534 }
2535
2536 func (d *desiredState) clear() {
2537 d.entries = d.entries[:0]
2538 d.avoid = 0
2539 }
2540
2541
2542 func (d *desiredState) get(vid ID) [4]register {
2543 for _, e := range d.entries {
2544 if e.ID == vid {
2545 return e.regs
2546 }
2547 }
2548 return [4]register{noRegister, noRegister, noRegister, noRegister}
2549 }
2550
2551
2552 func (d *desiredState) add(vid ID, r register) {
2553 d.avoid |= regMask(1) << r
2554 for i := range d.entries {
2555 e := &d.entries[i]
2556 if e.ID != vid {
2557 continue
2558 }
2559 if e.regs[0] == r {
2560
2561 return
2562 }
2563 for j := 1; j < len(e.regs); j++ {
2564 if e.regs[j] == r {
2565
2566 copy(e.regs[1:], e.regs[:j])
2567 e.regs[0] = r
2568 return
2569 }
2570 }
2571 copy(e.regs[1:], e.regs[:])
2572 e.regs[0] = r
2573 return
2574 }
2575 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
2576 }
2577
2578 func (d *desiredState) addList(vid ID, regs [4]register) {
2579
2580 for i := len(regs) - 1; i >= 0; i-- {
2581 r := regs[i]
2582 if r != noRegister {
2583 d.add(vid, r)
2584 }
2585 }
2586 }
2587
2588
2589 func (d *desiredState) clobber(m regMask) {
2590 for i := 0; i < len(d.entries); {
2591 e := &d.entries[i]
2592 j := 0
2593 for _, r := range e.regs {
2594 if r != noRegister && m>>r&1 == 0 {
2595 e.regs[j] = r
2596 j++
2597 }
2598 }
2599 if j == 0 {
2600
2601 d.entries[i] = d.entries[len(d.entries)-1]
2602 d.entries = d.entries[:len(d.entries)-1]
2603 continue
2604 }
2605 for ; j < len(e.regs); j++ {
2606 e.regs[j] = noRegister
2607 }
2608 i++
2609 }
2610 d.avoid &^= m
2611 }
2612
2613
2614 func (d *desiredState) copy(x *desiredState) {
2615 d.entries = append(d.entries[:0], x.entries...)
2616 d.avoid = x.avoid
2617 }
2618
2619
2620 func (d *desiredState) remove(vid ID) [4]register {
2621 for i := range d.entries {
2622 if d.entries[i].ID == vid {
2623 regs := d.entries[i].regs
2624 d.entries[i] = d.entries[len(d.entries)-1]
2625 d.entries = d.entries[:len(d.entries)-1]
2626 return regs
2627 }
2628 }
2629 return [4]register{noRegister, noRegister, noRegister, noRegister}
2630 }
2631
2632
2633 func (d *desiredState) merge(x *desiredState) {
2634 d.avoid |= x.avoid
2635
2636
2637 for _, e := range x.entries {
2638 d.addList(e.ID, e.regs)
2639 }
2640 }
2641
2642 func min32(x, y int32) int32 {
2643 if x < y {
2644 return x
2645 }
2646 return y
2647 }
2648 func max32(x, y int32) int32 {
2649 if x > y {
2650 return x
2651 }
2652 return y
2653 }
2654
View as plain text