1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package ssa
6 7 // This file defines the lifting pass which tries to "lift" Alloc
8 // cells (new/local variables) into SSA registers, replacing loads
9 // with the dominating stored value, eliminating loads and stores, and
10 // inserting φ-nodes as needed.
11 12 // Cited papers and resources:
13 //
14 // Ron Cytron et al. 1991. Efficiently computing SSA form...
15 // http://doi.acm.org/10.1145/115372.115320
16 //
17 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
18 // Software Practice and Experience 2001, 4:1-10.
19 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
20 //
21 // Daniel Berlin, llvmdev mailing list, 2012.
22 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
23 // (Be sure to expand the whole thread.)
24 25 // TODO(adonovan): opt: there are many optimizations worth evaluating, and
26 // the conventional wisdom for SSA construction is that a simple
27 // algorithm well engineered often beats those of better asymptotic
28 // complexity on all but the most egregious inputs.
29 //
30 // Danny Berlin suggests that the Cooper et al. algorithm for
31 // computing the dominance frontier is superior to Cytron et al.
32 // Furthermore he recommends that rather than computing the DF for the
33 // whole function then renaming all alloc cells, it may be cheaper to
34 // compute the DF for each alloc cell separately and throw it away.
35 //
36 // Consider exploiting liveness information to avoid creating dead
37 // φ-nodes which we then immediately remove.
38 //
39 // Also see many other "TODO: opt" suggestions in the code.
40 41 import (
42 "fmt"
43 "go/token"
44 "math/big"
45 "os"
46 "slices"
47 48 "golang.org/x/tools/internal/typeparams"
49 )
50 51 // If true, show diagnostic information at each step of lifting.
52 // Very verbose.
53 const debugLifting = false
54 55 // domFrontier maps each block to the set of blocks in its dominance
56 // frontier. The outer slice is conceptually a map keyed by
57 // Block.Index. The inner slice is conceptually a set, possibly
58 // containing duplicates.
59 //
60 // TODO(adonovan): opt: measure impact of dups; consider a packed bit
61 // representation, e.g. big.Int, and bitwise parallel operations for
62 // the union step in the Children loop.
63 //
64 // domFrontier's methods mutate the slice's elements but not its
65 // length, so their receivers needn't be pointers.
66 type domFrontier [][]*BasicBlock
67 68 func (df domFrontier) add(u, v *BasicBlock) {
69 p := &df[u.Index]
70 *p = append(*p, v)
71 }
72 73 // build builds the dominance frontier df for the dominator (sub)tree
74 // rooted at u, using the Cytron et al. algorithm.
75 //
76 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
77 // by pruning the entire IDF computation, rather than merely pruning
78 // the DF -> IDF step.
79 func (df domFrontier) build(u *BasicBlock) {
80 // Encounter each node u in postorder of dom tree.
81 for _, child := range u.dom.children {
82 df.build(child)
83 }
84 for _, vb := range u.Succs {
85 if v := vb.dom; v.idom != u {
86 df.add(u, vb)
87 }
88 }
89 for _, w := range u.dom.children {
90 for _, vb := range df[w.Index] {
91 // TODO(adonovan): opt: use word-parallel bitwise union.
92 if v := vb.dom; v.idom != u {
93 df.add(u, vb)
94 }
95 }
96 }
97 }
98 99 func buildDomFrontier(fn *Function) domFrontier {
100 df := make(domFrontier, len(fn.Blocks))
101 df.build(fn.Blocks[0])
102 if fn.Recover != nil {
103 df.build(fn.Recover)
104 }
105 return df
106 }
107 108 func removeInstr(refs []Instruction, instr Instruction) []Instruction {
109 return slices.DeleteFunc(refs, func(i Instruction) bool { return i == instr })
110 }
111 112 // lift replaces local and new Allocs accessed only with
113 // load/store by SSA registers, inserting φ-nodes where necessary.
114 // The result is a program in classical pruned SSA form.
115 //
116 // Preconditions:
117 // - fn has no dead blocks (blockopt has run).
118 // - Def/use info (Operands and Referrers) is up-to-date.
119 // - The dominator tree is up-to-date.
120 func lift(fn *Function) {
121 // TODO(adonovan): opt: lots of little optimizations may be
122 // worthwhile here, especially if they cause us to avoid
123 // buildDomFrontier. For example:
124 //
125 // - Alloc never loaded? Eliminate.
126 // - Alloc never stored? Replace all loads with a zero constant.
127 // - Alloc stored once? Replace loads with dominating store;
128 // don't forget that an Alloc is itself an effective store
129 // of zero.
130 // - Alloc used only within a single block?
131 // Use degenerate algorithm avoiding φ-nodes.
132 // - Consider synergy with scalar replacement of aggregates (SRA).
133 // e.g. *(&x.f) where x is an Alloc.
134 // Perhaps we'd get better results if we generated this as x.f
135 // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
136 // Unclear.
137 //
138 // But we will start with the simplest correct code.
139 df := buildDomFrontier(fn)
140 141 if debugLifting {
142 title := false
143 for i, blocks := range df {
144 if blocks != nil {
145 if !title {
146 fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
147 title = true
148 }
149 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
150 }
151 }
152 }
153 154 newPhis := make(newPhiMap)
155 156 // During this pass we will replace some BasicBlock.Instrs
157 // (allocs, loads and stores) with nil, keeping a count in
158 // BasicBlock.gaps. At the end we will reset Instrs to the
159 // concatenation of all non-dead newPhis and non-nil Instrs
160 // for the block, reusing the original array if space permits.
161 162 // While we're here, we also eliminate 'rundefers'
163 // instructions and ssa:deferstack() in functions that contain no
164 // 'defer' instructions. For now, we also eliminate
165 // 's = ssa:deferstack()' calls if s doesn't escape, replacing s
166 // with nil in Defer{DeferStack: s}. This has the same meaning,
167 // but allows eliminating the intrinsic function `ssa:deferstack()`
168 // (unless it is needed due to range-over-func instances). This gives
169 // ssa users more time to support range-over-func.
170 usesDefer := false
171 deferstackAlloc, deferstackCall := deferstackPreamble(fn)
172 eliminateDeferStack := deferstackAlloc != nil && !deferstackAlloc.Heap
173 174 // A counter used to generate ~unique ids for Phi nodes, as an
175 // aid to debugging. We use large numbers to make them highly
176 // visible. All nodes are renumbered later.
177 fresh := 1000
178 179 // Determine which allocs we can lift and number them densely.
180 // The renaming phase uses this numbering for compact maps.
181 numAllocs := 0
182 for _, b := range fn.Blocks {
183 b.gaps = 0
184 b.rundefers = 0
185 for _, instr := range b.Instrs {
186 switch instr := instr.(type) {
187 case *Alloc:
188 index := -1
189 if liftAlloc(df, instr, newPhis, &fresh) {
190 index = numAllocs
191 numAllocs++
192 }
193 instr.index = index
194 case *Defer:
195 usesDefer = true
196 if eliminateDeferStack {
197 // Clear DeferStack and remove references to loads
198 if instr.DeferStack != nil {
199 if refs := instr.DeferStack.Referrers(); refs != nil {
200 *refs = removeInstr(*refs, instr)
201 }
202 instr.DeferStack = nil
203 }
204 }
205 case *RunDefers:
206 b.rundefers++
207 }
208 }
209 }
210 211 // renaming maps an alloc (keyed by index) to its replacement
212 // value. Initially the renaming contains nil, signifying the
213 // zero constant of the appropriate type; we construct the
214 // Const lazily at most once on each path through the domtree.
215 // TODO(adonovan): opt: cache per-function not per subtree.
216 renaming := make([]Value, numAllocs)
217 218 // Renaming.
219 rename(fn.Blocks[0], renaming, newPhis)
220 221 // Eliminate dead φ-nodes.
222 removeDeadPhis(fn.Blocks, newPhis)
223 224 // Eliminate ssa:deferstack() call.
225 if eliminateDeferStack {
226 b := deferstackCall.block
227 for i, instr := range b.Instrs {
228 if instr == deferstackCall {
229 b.Instrs[i] = nil
230 b.gaps++
231 break
232 }
233 }
234 }
235 236 // Prepend remaining live φ-nodes to each block.
237 for _, b := range fn.Blocks {
238 nps := newPhis[b]
239 j := len(nps)
240 241 rundefersToKill := b.rundefers
242 if usesDefer {
243 rundefersToKill = 0
244 }
245 246 if j+b.gaps+rundefersToKill == 0 {
247 continue // fast path: no new phis or gaps
248 }
249 250 // Compact nps + non-nil Instrs into a new slice.
251 // TODO(adonovan): opt: compact in situ (rightwards)
252 // if Instrs has sufficient space or slack.
253 dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
254 for i, np := range nps {
255 dst[i] = np.phi
256 }
257 for _, instr := range b.Instrs {
258 if instr == nil {
259 continue
260 }
261 if !usesDefer {
262 if _, ok := instr.(*RunDefers); ok {
263 continue
264 }
265 }
266 dst[j] = instr
267 j++
268 }
269 b.Instrs = dst
270 }
271 272 // Remove any fn.Locals that were lifted.
273 j := 0
274 for _, l := range fn.Locals {
275 if l.index < 0 {
276 fn.Locals[j] = l
277 j++
278 }
279 }
280 // Nil out fn.Locals[j:] to aid GC.
281 for i := j; i < len(fn.Locals); i++ {
282 fn.Locals[i] = nil
283 }
284 fn.Locals = fn.Locals[:j]
285 }
286 287 // removeDeadPhis removes φ-nodes not transitively needed by a
288 // non-Phi, non-DebugRef instruction.
289 func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) {
290 // First pass: find the set of "live" φ-nodes: those reachable
291 // from some non-Phi instruction.
292 //
293 // We compute reachability in reverse, starting from each φ,
294 // rather than forwards, starting from each live non-Phi
295 // instruction, because this way visits much less of the
296 // Value graph.
297 livePhis := make(map[*Phi]bool)
298 for _, npList := range newPhis {
299 for _, np := range npList {
300 phi := np.phi
301 if !livePhis[phi] && phiHasDirectReferrer(phi) {
302 markLivePhi(livePhis, phi)
303 }
304 }
305 }
306 307 // Existing φ-nodes due to && and || operators
308 // are all considered live (see Go issue 19622).
309 for _, b := range blocks {
310 for _, phi := range b.phis() {
311 markLivePhi(livePhis, phi.(*Phi))
312 }
313 }
314 315 // Second pass: eliminate unused phis from newPhis.
316 for block, npList := range newPhis {
317 j := 0
318 for _, np := range npList {
319 if livePhis[np.phi] {
320 npList[j] = np
321 j++
322 } else {
323 // discard it, first removing it from referrers
324 for _, val := range np.phi.Edges {
325 if refs := val.Referrers(); refs != nil {
326 *refs = removeInstr(*refs, np.phi)
327 }
328 }
329 np.phi.block = nil
330 }
331 }
332 newPhis[block] = npList[:j]
333 }
334 }
335 336 // markLivePhi marks phi, and all φ-nodes transitively reachable via
337 // its Operands, live.
338 func markLivePhi(livePhis map[*Phi]bool, phi *Phi) {
339 livePhis[phi] = true
340 for _, rand := range phi.Operands(nil) {
341 if q, ok := (*rand).(*Phi); ok {
342 if !livePhis[q] {
343 markLivePhi(livePhis, q)
344 }
345 }
346 }
347 }
348 349 // phiHasDirectReferrer reports whether phi is directly referred to by
350 // a non-Phi instruction. Such instructions are the
351 // roots of the liveness traversal.
352 func phiHasDirectReferrer(phi *Phi) bool {
353 for _, instr := range *phi.Referrers() {
354 if _, ok := instr.(*Phi); !ok {
355 return true
356 }
357 }
358 return false
359 }
360 361 type blockSet struct{ big.Int } // (inherit methods from Int)
362 363 // add adds b to the set and returns true if the set changed.
364 func (s *blockSet) add(b *BasicBlock) bool {
365 i := b.Index
366 if s.Bit(i) != 0 {
367 return false
368 }
369 s.SetBit(&s.Int, i, 1)
370 return true
371 }
372 373 // take removes an arbitrary element from a set s and
374 // returns its index, or returns -1 if empty.
375 func (s *blockSet) take() int {
376 l := s.BitLen()
377 for i := range l {
378 if s.Bit(i) == 1 {
379 s.SetBit(&s.Int, i, 0)
380 return i
381 }
382 }
383 return -1
384 }
385 386 // newPhi is a pair of a newly introduced φ-node and the lifted Alloc
387 // it replaces.
388 type newPhi struct {
389 phi *Phi
390 alloc *Alloc
391 }
392 393 // newPhiMap records for each basic block, the set of newPhis that
394 // must be prepended to the block.
395 type newPhiMap map[*BasicBlock][]newPhi
396 397 // liftAlloc determines whether alloc can be lifted into registers,
398 // and if so, it populates newPhis with all the φ-nodes it may require
399 // and returns true.
400 //
401 // fresh is a source of fresh ids for phi nodes.
402 func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool {
403 // Don't lift result values in functions that defer
404 // calls that may recover from panic.
405 if fn := alloc.Parent(); fn.Recover != nil {
406 if slices.Contains(fn.results, alloc) {
407 return false
408 }
409 }
410 411 // Compute defblocks, the set of blocks containing a
412 // definition of the alloc cell.
413 var defblocks blockSet
414 for _, instr := range *alloc.Referrers() {
415 // Bail out if we discover the alloc is not liftable;
416 // the only operations permitted to use the alloc are
417 // loads/stores into the cell, and DebugRef.
418 switch instr := instr.(type) {
419 case *Store:
420 if instr.Val == alloc {
421 return false // address used as value
422 }
423 if instr.Addr != alloc {
424 panic("Alloc.Referrers is inconsistent")
425 }
426 defblocks.add(instr.Block())
427 case *UnOp:
428 if instr.Op != token.MUL {
429 return false // not a load
430 }
431 if instr.X != alloc {
432 panic("Alloc.Referrers is inconsistent")
433 }
434 case *DebugRef:
435 // ok
436 default:
437 return false // some other instruction
438 }
439 }
440 // The Alloc itself counts as a (zero) definition of the cell.
441 defblocks.add(alloc.Block())
442 443 if debugLifting {
444 fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
445 }
446 447 fn := alloc.Parent()
448 449 // Φ-insertion.
450 //
451 // What follows is the body of the main loop of the insert-φ
452 // function described by Cytron et al, but instead of using
453 // counter tricks, we just reset the 'hasAlready' and 'work'
454 // sets each iteration. These are bitmaps so it's pretty cheap.
455 //
456 // TODO(adonovan): opt: recycle slice storage for W,
457 // hasAlready, defBlocks across liftAlloc calls.
458 var hasAlready blockSet
459 460 // Initialize W and work to defblocks.
461 var work blockSet = defblocks // blocks seen
462 var W blockSet // blocks to do
463 W.Set(&defblocks.Int)
464 465 // Traverse iterated dominance frontier, inserting φ-nodes.
466 for i := W.take(); i != -1; i = W.take() {
467 u := fn.Blocks[i]
468 for _, v := range df[u.Index] {
469 if hasAlready.add(v) {
470 // Create φ-node.
471 // It will be prepended to v.Instrs later, if needed.
472 phi := &Phi{
473 Edges: make([]Value, len(v.Preds)),
474 Comment: alloc.Comment,
475 }
476 // This is merely a debugging aid:
477 phi.setNum(*fresh)
478 *fresh++
479 480 phi.pos = alloc.Pos()
481 phi.setType(typeparams.MustDeref(alloc.Type()))
482 phi.block = v
483 if debugLifting {
484 fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
485 }
486 newPhis[v] = append(newPhis[v], newPhi{phi, alloc})
487 488 if work.add(v) {
489 W.add(v)
490 }
491 }
492 }
493 }
494 495 return true
496 }
497 498 // replaceAll replaces all intraprocedural uses of x with y,
499 // updating x.Referrers and y.Referrers.
500 // Precondition: x.Referrers() != nil, i.e. x must be local to some function.
501 func replaceAll(x, y Value) {
502 var rands []*Value
503 pxrefs := x.Referrers()
504 pyrefs := y.Referrers()
505 for _, instr := range *pxrefs {
506 rands = instr.Operands(rands[:0]) // recycle storage
507 for _, rand := range rands {
508 if *rand != nil {
509 if *rand == x {
510 *rand = y
511 }
512 }
513 }
514 if pyrefs != nil {
515 *pyrefs = append(*pyrefs, instr) // dups ok
516 }
517 }
518 *pxrefs = nil // x is now unreferenced
519 }
520 521 // renamed returns the value to which alloc is being renamed,
522 // constructing it lazily if it's the implicit zero initialization.
523 func renamed(renaming []Value, alloc *Alloc) Value {
524 v := renaming[alloc.index]
525 if v == nil {
526 v = zeroConst(typeparams.MustDeref(alloc.Type()))
527 renaming[alloc.index] = v
528 }
529 return v
530 }
531 532 // rename implements the (Cytron et al) SSA renaming algorithm, a
533 // preorder traversal of the dominator tree replacing all loads of
534 // Alloc cells with the value stored to that cell by the dominating
535 // store instruction. For lifting, we need only consider loads,
536 // stores and φ-nodes.
537 //
538 // renaming is a map from *Alloc (keyed by index number) to its
539 // dominating stored value; newPhis[x] is the set of new φ-nodes to be
540 // prepended to block x.
541 func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
542 // Each φ-node becomes the new name for its associated Alloc.
543 for _, np := range newPhis[u] {
544 phi := np.phi
545 alloc := np.alloc
546 renaming[alloc.index] = phi
547 }
548 549 // Rename loads and stores of allocs.
550 for i, instr := range u.Instrs {
551 switch instr := instr.(type) {
552 case *Alloc:
553 if instr.index >= 0 { // store of zero to Alloc cell
554 // Replace dominated loads by the zero value.
555 renaming[instr.index] = nil
556 if debugLifting {
557 fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
558 }
559 // Delete the Alloc.
560 u.Instrs[i] = nil
561 u.gaps++
562 }
563 564 case *Store:
565 if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
566 // Replace dominated loads by the stored value.
567 renaming[alloc.index] = instr.Val
568 if debugLifting {
569 fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
570 instr, instr.Val.Name())
571 }
572 // Remove the store from the referrer list of the stored value.
573 if refs := instr.Val.Referrers(); refs != nil {
574 *refs = removeInstr(*refs, instr)
575 }
576 // Delete the Store.
577 u.Instrs[i] = nil
578 u.gaps++
579 }
580 581 case *UnOp:
582 if instr.Op == token.MUL {
583 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
584 newval := renamed(renaming, alloc)
585 if debugLifting {
586 fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
587 instr.Name(), instr, newval.Name())
588 }
589 // Replace all references to
590 // the loaded value by the
591 // dominating stored value.
592 replaceAll(instr, newval)
593 // Delete the Load.
594 u.Instrs[i] = nil
595 u.gaps++
596 }
597 }
598 599 case *DebugRef:
600 if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
601 if instr.IsAddr {
602 instr.X = renamed(renaming, alloc)
603 instr.IsAddr = false
604 605 // Add DebugRef to instr.X's referrers.
606 if refs := instr.X.Referrers(); refs != nil {
607 *refs = append(*refs, instr)
608 }
609 } else {
610 // A source expression denotes the address
611 // of an Alloc that was optimized away.
612 instr.X = nil
613 614 // Delete the DebugRef.
615 u.Instrs[i] = nil
616 u.gaps++
617 }
618 }
619 }
620 }
621 622 // For each φ-node in a CFG successor, rename the edge.
623 for _, v := range u.Succs {
624 phis := newPhis[v]
625 if len(phis) == 0 {
626 continue
627 }
628 i := v.predIndex(u)
629 for _, np := range phis {
630 phi := np.phi
631 alloc := np.alloc
632 newval := renamed(renaming, alloc)
633 if debugLifting {
634 fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
635 phi.Name(), u, v, i, alloc.Name(), newval.Name())
636 }
637 phi.Edges[i] = newval
638 if prefs := newval.Referrers(); prefs != nil {
639 *prefs = append(*prefs, phi)
640 }
641 }
642 }
643 644 // Continue depth-first recursion over domtree, pushing a
645 // fresh copy of the renaming map for each subtree.
646 for i, v := range u.dom.children {
647 r := renaming
648 if i < len(u.dom.children)-1 {
649 // On all but the final iteration, we must make
650 // a copy to avoid destructive update.
651 r = make([]Value, len(renaming))
652 copy(r, renaming)
653 }
654 rename(v, r, newPhis)
655 }
656 657 }
658 659 // deferstackPreamble returns the *Alloc and ssa:deferstack() call for fn.deferstack.
660 func deferstackPreamble(fn *Function) (*Alloc, *Call) {
661 if alloc, _ := fn.vars[fn.deferstack].(*Alloc); alloc != nil {
662 for _, ref := range *alloc.Referrers() {
663 if ref, _ := ref.(*Store); ref != nil && ref.Addr == alloc {
664 if call, _ := ref.Val.(*Call); call != nil {
665 return alloc, call
666 }
667 }
668 }
669 }
670 return nil, nil
671 }
672