routing_tree.mx raw

   1  // Copyright 2023 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // This file implements a decision tree for fast matching of requests to
   6  // patterns.
   7  //
   8  // The root of the tree branches on the host of the request.
   9  // The next level branches on the method.
  10  // The remaining levels branch on consecutive segments of the path.
  11  //
  12  // The "more specific wins" precedence rule can result in backtracking.
  13  // For example, given the patterns
  14  //     /a/b/z
  15  //     /a/{x}/c
  16  // we will first try to match the path "/a/b/c" with /a/b/z, and
  17  // when that fails we will try against /a/{x}/c.
  18  
  19  package http
  20  
  21  import (
  22  	"bytes"
  23  )
  24  
  25  // A routingNode is a node in the decision tree.
  26  // The same struct is used for leaf and interior nodes.
  27  type routingNode struct {
  28  	// A leaf node holds a single pattern and the Handler it was registered
  29  	// with.
  30  	pattern *pattern
  31  	handler Handler
  32  
  33  	// An interior node maps parts of the incoming request to child nodes.
  34  	// special children keys:
  35  	//     "/"	trailing slash (resulting from {$})
  36  	//	   ""   single wildcard
  37  	children   mapping[string, *routingNode]
  38  	multiChild *routingNode // child with multi wildcard
  39  	emptyChild *routingNode // optimization: child with key ""
  40  }
  41  
  42  // addPattern adds a pattern and its associated Handler to the tree
  43  // at root.
  44  func (root *routingNode) addPattern(p *pattern, h Handler) {
  45  	// First level of tree is host.
  46  	n := root.addChild(p.host)
  47  	// Second level of tree is method.
  48  	n = n.addChild(p.method)
  49  	// Remaining levels are path.
  50  	n.addSegments(p.segments, p, h)
  51  }
  52  
  53  // addSegments adds the given segments to the tree rooted at n.
  54  // If there are no segments, then n is a leaf node that holds
  55  // the given pattern and handler.
  56  func (n *routingNode) addSegments(segs []segment, p *pattern, h Handler) {
  57  	if len(segs) == 0 {
  58  		n.set(p, h)
  59  		return
  60  	}
  61  	seg := segs[0]
  62  	if seg.multi {
  63  		if len(segs) != 1 {
  64  			panic("multi wildcard not last")
  65  		}
  66  		c := &routingNode{}
  67  		n.multiChild = c
  68  		c.set(p, h)
  69  	} else if seg.wild {
  70  		n.addChild("").addSegments(segs[1:], p, h)
  71  	} else {
  72  		n.addChild(seg.s).addSegments(segs[1:], p, h)
  73  	}
  74  }
  75  
  76  // set sets the pattern and handler for n, which
  77  // must be a leaf node.
  78  func (n *routingNode) set(p *pattern, h Handler) {
  79  	if n.pattern != nil || n.handler != nil {
  80  		panic("non-nil leaf fields")
  81  	}
  82  	n.pattern = p
  83  	n.handler = h
  84  }
  85  
  86  // addChild adds a child node with the given key to n
  87  // if one does not exist, and returns the child.
  88  func (n *routingNode) addChild(key []byte) *routingNode {
  89  	if key == "" {
  90  		if n.emptyChild == nil {
  91  			n.emptyChild = &routingNode{}
  92  		}
  93  		return n.emptyChild
  94  	}
  95  	if c := n.findChild(key); c != nil {
  96  		return c
  97  	}
  98  	c := &routingNode{}
  99  	n.children.add(key, c)
 100  	return c
 101  }
 102  
 103  // findChild returns the child of n with the given key, or nil
 104  // if there is no child with that key.
 105  func (n *routingNode) findChild(key []byte) *routingNode {
 106  	if key == "" {
 107  		return n.emptyChild
 108  	}
 109  	r, _ := n.children.find(key)
 110  	return r
 111  }
 112  
 113  // match returns the leaf node under root that matches the arguments, and a list
 114  // of values for pattern wildcards in the order that the wildcards appear.
 115  // For example, if the request path is "/a/b/c" and the pattern is "/{x}/b/{y}",
 116  // then the second return value will be [][]byte{"a", "c"}.
 117  func (root *routingNode) match(host, method, path []byte) (*routingNode, [][]byte) {
 118  	if host != "" {
 119  		// There is a host. If there is a pattern that specifies that host and it
 120  		// matches, we are done. If the pattern doesn't match, fall through to
 121  		// try patterns with no host.
 122  		if l, m := root.findChild(host).matchMethodAndPath(method, path); l != nil {
 123  			return l, m
 124  		}
 125  	}
 126  	return root.emptyChild.matchMethodAndPath(method, path)
 127  }
 128  
 129  // matchMethodAndPath matches the method and path.
 130  // Its return values are the same as [routingNode.match].
 131  // The receiver should be a child of the root.
 132  func (n *routingNode) matchMethodAndPath(method, path []byte) (*routingNode, [][]byte) {
 133  	if n == nil {
 134  		return nil, nil
 135  	}
 136  	if l, m := n.findChild(method).matchPath(path, nil); l != nil {
 137  		// Exact match of method name.
 138  		return l, m
 139  	}
 140  	if method == "HEAD" {
 141  		// GET matches HEAD too.
 142  		if l, m := n.findChild("GET").matchPath(path, nil); l != nil {
 143  			return l, m
 144  		}
 145  	}
 146  	// No exact match; try patterns with no method.
 147  	return n.emptyChild.matchPath(path, nil)
 148  }
 149  
 150  // matchPath matches a path.
 151  // Its return values are the same as [routingNode.match].
 152  // matchPath calls itself recursively. The matches argument holds the wildcard matches
 153  // found so far.
 154  func (n *routingNode) matchPath(path []byte, matches [][]byte) (*routingNode, [][]byte) {
 155  	if n == nil {
 156  		return nil, nil
 157  	}
 158  	// If path is empty, then we are done.
 159  	// If n is a leaf node, we found a match; return it.
 160  	// If n is an interior node (which means it has a nil pattern),
 161  	// then we failed to match.
 162  	if path == "" {
 163  		if n.pattern == nil {
 164  			return nil, nil
 165  		}
 166  		return n, matches
 167  	}
 168  	// Get the first segment of path.
 169  	seg, rest := firstSegment(path)
 170  	// First try matching against patterns that have a literal for this position.
 171  	// We know by construction that such patterns are more specific than those
 172  	// with a wildcard at this position (they are either more specific, equivalent,
 173  	// or overlap, and we ruled out the first two when the patterns were registered).
 174  	if n, m := n.findChild(seg).matchPath(rest, matches); n != nil {
 175  		return n, m
 176  	}
 177  	// If matching a literal fails, try again with patterns that have a single
 178  	// wildcard (represented by an empty string in the child mapping).
 179  	// Again, by construction, patterns with a single wildcard must be more specific than
 180  	// those with a multi wildcard.
 181  	// We skip this step if the segment is a trailing slash, because single wildcards
 182  	// don't match trailing slashes.
 183  	if seg != "/" {
 184  		if n, m := n.emptyChild.matchPath(rest, append(matches, seg)); n != nil {
 185  			return n, m
 186  		}
 187  	}
 188  	// Lastly, match the pattern (there can be at most one) that has a multi
 189  	// wildcard in this position to the rest of the path.
 190  	if c := n.multiChild; c != nil {
 191  		// Don't record a match for a nameless wildcard (which arises from a
 192  		// trailing slash in the pattern).
 193  		if c.pattern.lastSegment().s != "" {
 194  			matches = append(matches, pathUnescape(path[1:])) // remove initial slash
 195  		}
 196  		return c, matches
 197  	}
 198  	return nil, nil
 199  }
 200  
 201  // firstSegment splits path into its first segment, and the rest.
 202  // The path must begin with "/".
 203  // If path consists of only a slash, firstSegment returns ("/", "").
 204  // The segment is returned unescaped, if possible.
 205  func firstSegment(path []byte) (seg, rest []byte) {
 206  	if path == "/" {
 207  		return "/", ""
 208  	}
 209  	path = path[1:] // drop initial slash
 210  	i := bytes.IndexByte(path, '/')
 211  	if i < 0 {
 212  		i = len(path)
 213  	}
 214  	return pathUnescape(path[:i]), path[i:]
 215  }
 216  
 217  // matchingMethods adds to methodSet all the methods that would result in a
 218  // match if passed to routingNode.match with the given host and path.
 219  func (root *routingNode) matchingMethods(host, path []byte, methodSet map[string]bool) {
 220  	if host != "" {
 221  		root.findChild(host).matchingMethodsPath(path, methodSet)
 222  	}
 223  	root.emptyChild.matchingMethodsPath(path, methodSet)
 224  	if methodSet["GET"] {
 225  		methodSet["HEAD"] = true
 226  	}
 227  }
 228  
 229  func (n *routingNode) matchingMethodsPath(path []byte, set map[string]bool) {
 230  	if n == nil {
 231  		return
 232  	}
 233  	n.children.eachPair(func(method string, c *routingNode) bool {
 234  		if p, _ := c.matchPath(path, nil); p != nil {
 235  			set[method] = true
 236  		}
 237  		return true
 238  	})
 239  	// Don't look at the empty child. If there were an empty
 240  	// child, it would match on any method, but we only
 241  	// call this when we fail to match on a method.
 242  }
 243