inspector.go raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package inspector provides helper functions for traversal over the
   6  // syntax trees of a package, including node filtering by type, and
   7  // materialization of the traversal stack.
   8  //
   9  // During construction, the inspector does a complete traversal and
  10  // builds a list of push/pop events and their node type. Subsequent
  11  // method calls that request a traversal scan this list, rather than walk
  12  // the AST, and perform type filtering using efficient bit sets.
  13  // This representation is sometimes called a "balanced parenthesis tree."
  14  //
  15  // Experiments suggest the inspector's traversals are about 2.5x faster
  16  // than [ast.Inspect], but it may take around 5 traversals for this
  17  // benefit to amortize the inspector's construction cost.
  18  // If efficiency is the primary concern, do not use Inspector for
  19  // one-off traversals.
  20  //
  21  // The [Cursor] type provides a more flexible API for efficient
  22  // navigation of syntax trees in all four "cardinal directions". For
  23  // example, traversals may be nested, so you can find each node of
  24  // type A and then search within it for nodes of type B. Or you can
  25  // traverse from a node to its immediate neighbors: its parent, its
  26  // previous and next sibling, or its first and last child. We
  27  // recommend using methods of Cursor in preference to Inspector where
  28  // possible.
  29  package inspector
  30  
  31  // There are four orthogonal features in a traversal:
  32  //  1 type filtering
  33  //  2 pruning
  34  //  3 postorder calls to f
  35  //  4 stack
  36  // Rather than offer all of them in the API,
  37  // only a few combinations are exposed:
  38  // - Preorder is the fastest and has fewest features,
  39  //   but is the most commonly needed traversal.
  40  // - Nodes and WithStack both provide pruning and postorder calls,
  41  //   even though few clients need it, because supporting two versions
  42  //   is not justified.
  43  // More combinations could be supported by expressing them as
  44  // wrappers around a more generic traversal, but this was measured
  45  // and found to degrade performance significantly (30%).
  46  
  47  import (
  48  	"go/ast"
  49  
  50  	"golang.org/x/tools/go/ast/edge"
  51  )
  52  
  53  // An Inspector provides methods for inspecting
  54  // (traversing) the syntax trees of a package.
  55  type Inspector struct {
  56  	events []event
  57  }
  58  
  59  func packEdgeKindAndIndex(ek edge.Kind, index int) int32 {
  60  	return int32(uint32(index+1)<<7 | uint32(ek))
  61  }
  62  
  63  // unpackEdgeKindAndIndex unpacks the edge kind and edge index (within
  64  // an []ast.Node slice) from the parent field of a pop event.
  65  func unpackEdgeKindAndIndex(x int32) (edge.Kind, int) {
  66  	// The "parent" field of a pop node holds the
  67  	// edge Kind in the lower 7 bits and the index+1
  68  	// in the upper 25.
  69  	return edge.Kind(x & 0x7f), int(x>>7) - 1
  70  }
  71  
  72  // New returns an Inspector for the specified syntax trees.
  73  func New(files []*ast.File) *Inspector {
  74  	return &Inspector{traverse(files)}
  75  }
  76  
  77  // An event represents a push or a pop
  78  // of an ast.Node during a traversal.
  79  type event struct {
  80  	node   ast.Node
  81  	typ    uint64 // typeOf(node) on push event, or union of typ strictly between push and pop events on pop events
  82  	index  int32  // index of corresponding push or pop event
  83  	parent int32  // index of parent's push node (push nodes only), or packed edge kind/index (pop nodes only)
  84  }
  85  
  86  // TODO: Experiment with storing only the second word of event.node (unsafe.Pointer).
  87  // Type can be recovered from the sole bit in typ.
  88  // [Tried this, wasn't faster. --adonovan]
  89  
  90  // Preorder visits all the nodes of the files supplied to [New] in
  91  // depth-first order. It calls f(n) for each node n before it visits
  92  // n's children.
  93  //
  94  // The complete traversal sequence is determined by [ast.Inspect].
  95  // The types argument, if non-empty, enables type-based filtering of
  96  // events. The function f is called only for nodes whose type
  97  // matches an element of the types slice.
  98  //
  99  // The [Cursor.Preorder] method provides a richer alternative interface.
 100  // Example:
 101  //
 102  //	for c := range in.Root().Preorder(types) { ... }
 103  func (in *Inspector) Preorder(types []ast.Node, f func(ast.Node)) {
 104  	// Because it avoids postorder calls to f, and the pruning
 105  	// check, Preorder is almost twice as fast as Nodes. The two
 106  	// features seem to contribute similar slowdowns (~1.4x each).
 107  
 108  	// This function is equivalent to the PreorderSeq call below,
 109  	// but to avoid the additional dynamic call (which adds 13-35%
 110  	// to the benchmarks), we expand it out.
 111  	//
 112  	// in.PreorderSeq(types...)(func(n ast.Node) bool {
 113  	// 	f(n)
 114  	// 	return true
 115  	// })
 116  
 117  	mask := maskOf(types)
 118  	for i := int32(0); i < int32(len(in.events)); {
 119  		ev := in.events[i]
 120  		if ev.index > i {
 121  			// push
 122  			if ev.typ&mask != 0 {
 123  				f(ev.node)
 124  			}
 125  			pop := ev.index
 126  			if in.events[pop].typ&mask == 0 {
 127  				// Subtrees do not contain types: skip them and pop.
 128  				i = pop + 1
 129  				continue
 130  			}
 131  		}
 132  		i++
 133  	}
 134  }
 135  
 136  // Nodes visits the nodes of the files supplied to [New] in depth-first
 137  // order. It calls f(n, true) for each node n before it visits n's
 138  // children. If f returns true, Nodes invokes f recursively for each
 139  // of the non-nil children of the node, followed by a call of
 140  // f(n, false).
 141  //
 142  // The complete traversal sequence is determined by [ast.Inspect].
 143  // The types argument, if non-empty, enables type-based filtering of
 144  // events. The function f if is called only for nodes whose type
 145  // matches an element of the types slice.
 146  //
 147  // The [Cursor.Inspect] method provides a richer alternative interface.
 148  // Example:
 149  //
 150  //	in.Root().Inspect(types, func(c Cursor) bool {
 151  //		...
 152  //		return true
 153  //	}
 154  func (in *Inspector) Nodes(types []ast.Node, f func(n ast.Node, push bool) (proceed bool)) {
 155  	mask := maskOf(types)
 156  	for i := int32(0); i < int32(len(in.events)); {
 157  		ev := in.events[i]
 158  		if ev.index > i {
 159  			// push
 160  			pop := ev.index
 161  			if ev.typ&mask != 0 {
 162  				if !f(ev.node, true) {
 163  					i = pop + 1 // jump to corresponding pop + 1
 164  					continue
 165  				}
 166  			}
 167  			if in.events[pop].typ&mask == 0 {
 168  				// Subtrees do not contain types: skip them.
 169  				i = pop
 170  				continue
 171  			}
 172  		} else {
 173  			// pop
 174  			push := ev.index
 175  			if in.events[push].typ&mask != 0 {
 176  				f(ev.node, false)
 177  			}
 178  		}
 179  		i++
 180  	}
 181  }
 182  
 183  // WithStack visits nodes in a similar manner to Nodes, but it
 184  // supplies each call to f an additional argument, the current
 185  // traversal stack. The stack's first element is the outermost node,
 186  // an *ast.File; its last is the innermost, n.
 187  //
 188  // The [Cursor.Inspect] method provides a richer alternative interface.
 189  // Example:
 190  //
 191  //	in.Root().Inspect(types, func(c Cursor) bool {
 192  //		stack := slices.Collect(c.Enclosing())
 193  //		...
 194  //		return true
 195  //	})
 196  func (in *Inspector) WithStack(types []ast.Node, f func(n ast.Node, push bool, stack []ast.Node) (proceed bool)) {
 197  	mask := maskOf(types)
 198  	var stack []ast.Node
 199  	for i := int32(0); i < int32(len(in.events)); {
 200  		ev := in.events[i]
 201  		if ev.index > i {
 202  			// push
 203  			pop := ev.index
 204  			stack = append(stack, ev.node)
 205  			if ev.typ&mask != 0 {
 206  				if !f(ev.node, true, stack) {
 207  					i = pop + 1
 208  					stack = stack[:len(stack)-1]
 209  					continue
 210  				}
 211  			}
 212  			if in.events[pop].typ&mask == 0 {
 213  				// Subtrees does not contain types: skip them.
 214  				i = pop
 215  				continue
 216  			}
 217  		} else {
 218  			// pop
 219  			push := ev.index
 220  			if in.events[push].typ&mask != 0 {
 221  				f(ev.node, false, stack)
 222  			}
 223  			stack = stack[:len(stack)-1]
 224  		}
 225  		i++
 226  	}
 227  }
 228  
 229  // traverse builds the table of events representing a traversal.
 230  func traverse(files []*ast.File) []event {
 231  	// Preallocate approximate number of events
 232  	// based on source file extent of the declarations.
 233  	// (We use End-Pos not FileStart-FileEnd to neglect
 234  	// the effect of long doc comments.)
 235  	// This makes traverse faster by 4x (!).
 236  	var extent int
 237  	for _, f := range files {
 238  		extent += int(f.End() - f.Pos())
 239  	}
 240  	// This estimate is based on the net/http package.
 241  	capacity := min(extent*33/100, 1e6) // impose some reasonable maximum (1M)
 242  
 243  	v := &visitor{
 244  		events: make([]event, 0, capacity),
 245  		stack:  []item{{index: -1}}, // include an extra event so file nodes have a parent
 246  	}
 247  	for _, file := range files {
 248  		walk(v, edge.Invalid, -1, file)
 249  	}
 250  	return v.events
 251  }
 252  
 253  type visitor struct {
 254  	events []event
 255  	stack  []item
 256  }
 257  
 258  type item struct {
 259  	index            int32  // index of current node's push event
 260  	parentIndex      int32  // index of parent node's push event
 261  	typAccum         uint64 // accumulated type bits of current node's descendants
 262  	edgeKindAndIndex int32  // edge.Kind and index, bit packed
 263  }
 264  
 265  func (v *visitor) push(ek edge.Kind, eindex int, node ast.Node) {
 266  	var (
 267  		index       = int32(len(v.events))
 268  		parentIndex = v.stack[len(v.stack)-1].index
 269  	)
 270  	v.events = append(v.events, event{
 271  		node:   node,
 272  		parent: parentIndex,
 273  		typ:    typeOf(node),
 274  		index:  0, // (pop index is set later by visitor.pop)
 275  	})
 276  	v.stack = append(v.stack, item{
 277  		index:            index,
 278  		parentIndex:      parentIndex,
 279  		edgeKindAndIndex: packEdgeKindAndIndex(ek, eindex),
 280  	})
 281  
 282  	// 2B nodes ought to be enough for anyone!
 283  	if int32(len(v.events)) < 0 {
 284  		panic("event index exceeded int32")
 285  	}
 286  
 287  	// 32M elements in an []ast.Node ought to be enough for anyone!
 288  	if ek2, eindex2 := unpackEdgeKindAndIndex(packEdgeKindAndIndex(ek, eindex)); ek2 != ek || eindex2 != eindex {
 289  		panic("Node slice index exceeded uint25")
 290  	}
 291  }
 292  
 293  func (v *visitor) pop(node ast.Node) {
 294  	top := len(v.stack) - 1
 295  	current := v.stack[top]
 296  
 297  	push := &v.events[current.index]
 298  	parent := &v.stack[top-1]
 299  
 300  	push.index = int32(len(v.events))              // make push event refer to pop
 301  	parent.typAccum |= current.typAccum | push.typ // accumulate type bits into parent
 302  
 303  	v.stack = v.stack[:top]
 304  
 305  	v.events = append(v.events, event{
 306  		node:   node,
 307  		typ:    current.typAccum,
 308  		index:  current.index,
 309  		parent: current.edgeKindAndIndex, // see [unpackEdgeKindAndIndex]
 310  	})
 311  }
 312