pattern.mx raw

   1  // Copyright 2023 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Patterns for ServeMux routing.
   6  
   7  package http
   8  
   9  import (
  10  	"errors"
  11  	"fmt"
  12  	"net/url"
  13  	"bytes"
  14  	"unicode"
  15  )
  16  
  17  // A pattern is something that can be matched against an HTTP request.
  18  // It has an optional method, an optional host, and a path.
  19  type pattern struct {
  20  	str    []byte // original string
  21  	method []byte
  22  	host   []byte
  23  	// The representation of a path differs from the surface syntax, which
  24  	// simplifies most algorithms.
  25  	//
  26  	// Paths ending in '/' are represented with an anonymous "..." wildcard.
  27  	// For example, the path "a/" is represented as a literal segment "a" followed
  28  	// by a segment with multi==true.
  29  	//
  30  	// Paths ending in "{$}" are represented with the literal segment "/".
  31  	// For example, the path "a/{$}" is represented as a literal segment "a" followed
  32  	// by a literal segment "/".
  33  	segments []segment
  34  	loc      []byte // source location of registering call, for helpful messages
  35  }
  36  
  37  func (p *pattern) String() string { return p.str }
  38  
  39  func (p *pattern) lastSegment() segment {
  40  	return p.segments[len(p.segments)-1]
  41  }
  42  
  43  // A segment is a pattern piece that matches one or more path segments, or
  44  // a trailing slash.
  45  //
  46  // If wild is false, it matches a literal segment, or, if s == "/", a trailing slash.
  47  // Examples:
  48  //
  49  //	"a" => segment{s: "a"}
  50  //	"/{$}" => segment{s: "/"}
  51  //
  52  // If wild is true and multi is false, it matches a single path segment.
  53  // Example:
  54  //
  55  //	"{x}" => segment{s: "x", wild: true}
  56  //
  57  // If both wild and multi are true, it matches all remaining path segments.
  58  // Example:
  59  //
  60  //	"{rest...}" => segment{s: "rest", wild: true, multi: true}
  61  type segment struct {
  62  	s     []byte // literal or wildcard name or "/" for "/{$}".
  63  	wild  bool
  64  	multi bool // "..." wildcard
  65  }
  66  
  67  // parsePattern parses a string into a Pattern.
  68  // The string's syntax is
  69  //
  70  //	[METHOD] [HOST]/[PATH]
  71  //
  72  // where:
  73  //   - METHOD is an HTTP method
  74  //   - HOST is a hostname
  75  //   - PATH consists of slash-separated segments, where each segment is either
  76  //     a literal or a wildcard of the form "{name}", "{name...}", or "{$}".
  77  //
  78  // METHOD, HOST and PATH are all optional; that is, the string can be "/".
  79  // If METHOD is present, it must be followed by at least one space or tab.
  80  // Wildcard names must be valid Go identifiers.
  81  // The "{$}" and "{name...}" wildcard must occur at the end of PATH.
  82  // PATH may end with a '/'.
  83  // Wildcard names in a path must be distinct.
  84  func parsePattern(s []byte) (_ *pattern, err error) {
  85  	if len(s) == 0 {
  86  		return nil, errors.New("empty pattern")
  87  	}
  88  	off := 0 // offset into string
  89  	defer func() {
  90  		if err != nil {
  91  			err = fmt.Errorf("at offset %d: %w", off, err)
  92  		}
  93  	}()
  94  
  95  	method, rest, found := s, "", false
  96  	if i := bytes.IndexAny(s, " \t"); i >= 0 {
  97  		method, rest, found = s[:i], bytes.TrimLeft(s[i+1:], " \t"), true
  98  	}
  99  	if !found {
 100  		rest = method
 101  		method = ""
 102  	}
 103  	if method != "" && !validMethod(method) {
 104  		return nil, fmt.Errorf("invalid method %q", method)
 105  	}
 106  	p := &pattern{str: s, method: method}
 107  
 108  	if found {
 109  		off = len(method) + 1
 110  	}
 111  	i := bytes.IndexByte(rest, '/')
 112  	if i < 0 {
 113  		return nil, errors.New("host/path missing /")
 114  	}
 115  	p.host = rest[:i]
 116  	rest = rest[i:]
 117  	if j := bytes.IndexByte(p.host, '{'); j >= 0 {
 118  		off += j
 119  		return nil, errors.New("host contains '{' (missing initial '/'?)")
 120  	}
 121  	// At this point, rest is the path.
 122  	off += i
 123  
 124  	// An unclean path with a method that is not CONNECT can never match,
 125  	// because paths are cleaned before matching.
 126  	if method != "" && method != "CONNECT" && rest != cleanPath(rest) {
 127  		return nil, errors.New("non-CONNECT pattern with unclean path can never match")
 128  	}
 129  
 130  	seenNames := map[string]bool{} // remember wildcard names to catch dups
 131  	for len(rest) > 0 {
 132  		// Invariant: rest[0] == '/'.
 133  		rest = rest[1:]
 134  		off = len(s) - len(rest)
 135  		if len(rest) == 0 {
 136  			// Trailing slash.
 137  			p.segments = append(p.segments, segment{wild: true, multi: true})
 138  			break
 139  		}
 140  		i := bytes.IndexByte(rest, '/')
 141  		if i < 0 {
 142  			i = len(rest)
 143  		}
 144  		var seg []byte
 145  		seg, rest = rest[:i], rest[i:]
 146  		if i := bytes.IndexByte(seg, '{'); i < 0 {
 147  			// Literal.
 148  			seg = pathUnescape(seg)
 149  			p.segments = append(p.segments, segment{s: seg})
 150  		} else {
 151  			// Wildcard.
 152  			if i != 0 {
 153  				return nil, errors.New("bad wildcard segment (must start with '{')")
 154  			}
 155  			if seg[len(seg)-1] != '}' {
 156  				return nil, errors.New("bad wildcard segment (must end with '}')")
 157  			}
 158  			name := seg[1 : len(seg)-1]
 159  			if name == "$" {
 160  				if len(rest) != 0 {
 161  					return nil, errors.New("{$} not at end")
 162  				}
 163  				p.segments = append(p.segments, segment{s: "/"})
 164  				break
 165  			}
 166  			name, multi := bytes.CutSuffix(name, "...")
 167  			if multi && len(rest) != 0 {
 168  				return nil, errors.New("{...} wildcard not at end")
 169  			}
 170  			if name == "" {
 171  				return nil, errors.New("empty wildcard")
 172  			}
 173  			if !isValidWildcardName(name) {
 174  				return nil, fmt.Errorf("bad wildcard name %q", name)
 175  			}
 176  			if seenNames[name] {
 177  				return nil, fmt.Errorf("duplicate wildcard name %q", name)
 178  			}
 179  			seenNames[name] = true
 180  			p.segments = append(p.segments, segment{s: name, wild: true, multi: multi})
 181  		}
 182  	}
 183  	return p, nil
 184  }
 185  
 186  func isValidWildcardName(s []byte) bool {
 187  	if s == "" {
 188  		return false
 189  	}
 190  	// Valid Go identifier.
 191  	for i, c := range s {
 192  		if !unicode.IsLetter(rune(c)) && c != '_' && (i == 0 || !unicode.IsDigit(rune(c))) {
 193  			return false
 194  		}
 195  	}
 196  	return true
 197  }
 198  
 199  func pathUnescape(path []byte) []byte {
 200  	u, err := url.PathUnescape(path)
 201  	if err != nil {
 202  		// Invalidly escaped path; use the original
 203  		return path
 204  	}
 205  	return u
 206  }
 207  
 208  // relationship is a relationship between two patterns, p1 and p2.
 209  type relationship string
 210  
 211  const (
 212  	equivalent   relationship = "equivalent"   // both match the same requests
 213  	moreGeneral  relationship = "moreGeneral"  // p1 matches everything p2 does & more
 214  	moreSpecific relationship = "moreSpecific" // p2 matches everything p1 does & more
 215  	disjoint     relationship = "disjoint"     // there is no request that both match
 216  	overlaps     relationship = "overlaps"     // there is a request that both match, but neither is more specific
 217  )
 218  
 219  // conflictsWith reports whether p1 conflicts with p2, that is, whether
 220  // there is a request that both match but where neither is higher precedence
 221  // than the other.
 222  //
 223  //	Precedence is defined by two rules:
 224  //	1. Patterns with a host win over patterns without a host.
 225  //	2. Patterns whose method and path is more specific win. One pattern is more
 226  //	   specific than another if the second matches all the (method, path) pairs
 227  //	   of the first and more.
 228  //
 229  // If rule 1 doesn't apply, then two patterns conflict if their relationship
 230  // is either equivalence (they match the same set of requests) or overlap
 231  // (they both match some requests, but neither is more specific than the other).
 232  func (p1 *pattern) conflictsWith(p2 *pattern) bool {
 233  	if p1.host != p2.host {
 234  		// Either one host is empty and the other isn't, in which case the
 235  		// one with the host wins by rule 1, or neither host is empty
 236  		// and they differ, so they won't match the same paths.
 237  		return false
 238  	}
 239  	rel := p1.comparePathsAndMethods(p2)
 240  	return rel == equivalent || rel == overlaps
 241  }
 242  
 243  func (p1 *pattern) comparePathsAndMethods(p2 *pattern) relationship {
 244  	mrel := p1.compareMethods(p2)
 245  	// Optimization: avoid a call to comparePaths.
 246  	if mrel == disjoint {
 247  		return disjoint
 248  	}
 249  	prel := p1.comparePaths(p2)
 250  	return combineRelationships(mrel, prel)
 251  }
 252  
 253  // compareMethods determines the relationship between the method
 254  // part of patterns p1 and p2.
 255  //
 256  // A method can either be empty, "GET", or something else.
 257  // The empty string matches any method, so it is the most general.
 258  // "GET" matches both GET and HEAD.
 259  // Anything else matches only itself.
 260  func (p1 *pattern) compareMethods(p2 *pattern) relationship {
 261  	if p1.method == p2.method {
 262  		return equivalent
 263  	}
 264  	if p1.method == "" {
 265  		// p1 matches any method, but p2 does not, so p1 is more general.
 266  		return moreGeneral
 267  	}
 268  	if p2.method == "" {
 269  		return moreSpecific
 270  	}
 271  	if p1.method == "GET" && p2.method == "HEAD" {
 272  		// p1 matches GET and HEAD; p2 matches only HEAD.
 273  		return moreGeneral
 274  	}
 275  	if p2.method == "GET" && p1.method == "HEAD" {
 276  		return moreSpecific
 277  	}
 278  	return disjoint
 279  }
 280  
 281  // comparePaths determines the relationship between the path
 282  // part of two patterns.
 283  func (p1 *pattern) comparePaths(p2 *pattern) relationship {
 284  	// Optimization: if a path pattern doesn't end in a multi ("...") wildcard, then it
 285  	// can only match paths with the same number of segments.
 286  	if len(p1.segments) != len(p2.segments) && !p1.lastSegment().multi && !p2.lastSegment().multi {
 287  		return disjoint
 288  	}
 289  
 290  	// Consider corresponding segments in the two path patterns.
 291  	var segs1, segs2 []segment
 292  	rel := equivalent
 293  	for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
 294  		rel = combineRelationships(rel, compareSegments(segs1[0], segs2[0]))
 295  		if rel == disjoint {
 296  			return rel
 297  		}
 298  	}
 299  	// We've reached the end of the corresponding segments of the patterns.
 300  	// If they have the same number of segments, then we've already determined
 301  	// their relationship.
 302  	if len(segs1) == 0 && len(segs2) == 0 {
 303  		return rel
 304  	}
 305  	// Otherwise, the only way they could fail to be disjoint is if the shorter
 306  	// pattern ends in a multi. In that case, that multi is more general
 307  	// than the remainder of the longer pattern, so combine those two relationships.
 308  	if len(segs1) < len(segs2) && p1.lastSegment().multi {
 309  		return combineRelationships(rel, moreGeneral)
 310  	}
 311  	if len(segs2) < len(segs1) && p2.lastSegment().multi {
 312  		return combineRelationships(rel, moreSpecific)
 313  	}
 314  	return disjoint
 315  }
 316  
 317  // compareSegments determines the relationship between two segments.
 318  func compareSegments(s1, s2 segment) relationship {
 319  	if s1.multi && s2.multi {
 320  		return equivalent
 321  	}
 322  	if s1.multi {
 323  		return moreGeneral
 324  	}
 325  	if s2.multi {
 326  		return moreSpecific
 327  	}
 328  	if s1.wild && s2.wild {
 329  		return equivalent
 330  	}
 331  	if s1.wild {
 332  		if s2.s == "/" {
 333  			// A single wildcard doesn't match a trailing slash.
 334  			return disjoint
 335  		}
 336  		return moreGeneral
 337  	}
 338  	if s2.wild {
 339  		if s1.s == "/" {
 340  			return disjoint
 341  		}
 342  		return moreSpecific
 343  	}
 344  	// Both literals.
 345  	if s1.s == s2.s {
 346  		return equivalent
 347  	}
 348  	return disjoint
 349  }
 350  
 351  // combineRelationships determines the overall relationship of two patterns
 352  // given the relationships of a partition of the patterns into two parts.
 353  //
 354  // For example, if p1 is more general than p2 in one way but equivalent
 355  // in the other, then it is more general overall.
 356  //
 357  // Or if p1 is more general in one way and more specific in the other, then
 358  // they overlap.
 359  func combineRelationships(r1, r2 relationship) relationship {
 360  	switch r1 {
 361  	case equivalent:
 362  		return r2
 363  	case disjoint:
 364  		return disjoint
 365  	case overlaps:
 366  		if r2 == disjoint {
 367  			return disjoint
 368  		}
 369  		return overlaps
 370  	case moreGeneral, moreSpecific:
 371  		switch r2 {
 372  		case equivalent:
 373  			return r1
 374  		case inverseRelationship(r1):
 375  			return overlaps
 376  		default:
 377  			return r2
 378  		}
 379  	default:
 380  		panic(fmt.Sprintf("unknown relationship %q", r1))
 381  	}
 382  }
 383  
 384  // If p1 has relationship `r` to p2, then
 385  // p2 has inverseRelationship(r) to p1.
 386  func inverseRelationship(r relationship) relationship {
 387  	switch r {
 388  	case moreSpecific:
 389  		return moreGeneral
 390  	case moreGeneral:
 391  		return moreSpecific
 392  	default:
 393  		return r
 394  	}
 395  }
 396  
 397  // isLitOrSingle reports whether the segment is a non-dollar literal or a single wildcard.
 398  func isLitOrSingle(seg segment) bool {
 399  	if seg.wild {
 400  		return !seg.multi
 401  	}
 402  	return seg.s != "/"
 403  }
 404  
 405  // describeConflict returns an explanation of why two patterns conflict.
 406  func describeConflict(p1, p2 *pattern) []byte {
 407  	mrel := p1.compareMethods(p2)
 408  	prel := p1.comparePaths(p2)
 409  	rel := combineRelationships(mrel, prel)
 410  	if rel == equivalent {
 411  		return fmt.Sprintf("%s matches the same requests as %s", p1, p2)
 412  	}
 413  	if rel != overlaps {
 414  		panic("describeConflict called with non-conflicting patterns")
 415  	}
 416  	if prel == overlaps {
 417  		return fmt.Sprintf(`%[1]s and %[2]s both match some paths, like %[3]q.
 418  But neither is more specific than the other.
 419  %[1]s matches %[4]q, but %[2]s doesn't.
 420  %[2]s matches %[5]q, but %[1]s doesn't.`,
 421  			p1, p2, commonPath(p1, p2), differencePath(p1, p2), differencePath(p2, p1))
 422  	}
 423  	if mrel == moreGeneral && prel == moreSpecific {
 424  		return fmt.Sprintf("%s matches more methods than %s, but has a more specific path pattern", p1, p2)
 425  	}
 426  	if mrel == moreSpecific && prel == moreGeneral {
 427  		return fmt.Sprintf("%s matches fewer methods than %s, but has a more general path pattern", p1, p2)
 428  	}
 429  	return fmt.Sprintf("bug: unexpected way for two patterns %s and %s to conflict: methods %s, paths %s", p1, p2, mrel, prel)
 430  }
 431  
 432  // writeMatchingPath writes to b a path that matches the segments.
 433  func writeMatchingPath(b *bytes.Buffer, segs []segment) {
 434  	for _, s := range segs {
 435  		writeSegment(b, s)
 436  	}
 437  }
 438  
 439  func writeSegment(b *bytes.Buffer, s segment) {
 440  	b.WriteByte('/')
 441  	if !s.multi && s.s != "/" {
 442  		b.WriteString(s.s)
 443  	}
 444  }
 445  
 446  // commonPath returns a path that both p1 and p2 match.
 447  // It assumes there is such a path.
 448  func commonPath(p1, p2 *pattern) []byte {
 449  	var b bytes.Buffer
 450  	var segs1, segs2 []segment
 451  	for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
 452  		if s1 := segs1[0]; s1.wild {
 453  			writeSegment(&b, segs2[0])
 454  		} else {
 455  			writeSegment(&b, s1)
 456  		}
 457  	}
 458  	if len(segs1) > 0 {
 459  		writeMatchingPath(&b, segs1)
 460  	} else if len(segs2) > 0 {
 461  		writeMatchingPath(&b, segs2)
 462  	}
 463  	return b.String()
 464  }
 465  
 466  // differencePath returns a path that p1 matches and p2 doesn't.
 467  // It assumes there is such a path.
 468  func differencePath(p1, p2 *pattern) []byte {
 469  	var b bytes.Buffer
 470  
 471  	var segs1, segs2 []segment
 472  	for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
 473  		s1 := segs1[0]
 474  		s2 := segs2[0]
 475  		if s1.multi && s2.multi {
 476  			// From here the patterns match the same paths, so we must have found a difference earlier.
 477  			b.WriteByte('/')
 478  			return b.String()
 479  
 480  		}
 481  		if s1.multi && !s2.multi {
 482  			// s1 ends in a "..." wildcard but s2 does not.
 483  			// A trailing slash will distinguish them, unless s2 ends in "{$}",
 484  			// in which case any segment will do; prefer the wildcard name if
 485  			// it has one.
 486  			b.WriteByte('/')
 487  			if s2.s == "/" {
 488  				if s1.s != "" {
 489  					b.WriteString(s1.s)
 490  				} else {
 491  					b.WriteString("x")
 492  				}
 493  			}
 494  			return b.String()
 495  		}
 496  		if !s1.multi && s2.multi {
 497  			writeSegment(&b, s1)
 498  		} else if s1.wild && s2.wild {
 499  			// Both patterns will match whatever we put here; use
 500  			// the first wildcard name.
 501  			writeSegment(&b, s1)
 502  		} else if s1.wild && !s2.wild {
 503  			// s1 is a wildcard, s2 is a literal.
 504  			// Any segment other than s2.s will work.
 505  			// Prefer the wildcard name, but if it's the same as the literal,
 506  			// tweak the literal.
 507  			if s1.s != s2.s {
 508  				writeSegment(&b, s1)
 509  			} else {
 510  				b.WriteByte('/')
 511  				b.WriteString(s2.s + "x")
 512  			}
 513  		} else if !s1.wild && s2.wild {
 514  			writeSegment(&b, s1)
 515  		} else {
 516  			// Both are literals. A precondition of this function is that the
 517  			// patterns overlap, so they must be the same literal. Use it.
 518  			if s1.s != s2.s {
 519  				panic(fmt.Sprintf("literals differ: %q and %q", s1.s, s2.s))
 520  			}
 521  			writeSegment(&b, s1)
 522  		}
 523  	}
 524  	if len(segs1) > 0 {
 525  		// p1 is longer than p2, and p2 does not end in a multi.
 526  		// Anything that matches the rest of p1 will do.
 527  		writeMatchingPath(&b, segs1)
 528  	} else if len(segs2) > 0 {
 529  		writeMatchingPath(&b, segs2)
 530  	}
 531  	return b.String()
 532  }
 533