1 // Copyright 2023 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 // Patterns for ServeMux routing.
6 7 package http
8 9 import (
10 "errors"
11 "fmt"
12 "net/url"
13 "bytes"
14 "unicode"
15 )
16 17 // A pattern is something that can be matched against an HTTP request.
18 // It has an optional method, an optional host, and a path.
19 type pattern struct {
20 str []byte // original string
21 method []byte
22 host []byte
23 // The representation of a path differs from the surface syntax, which
24 // simplifies most algorithms.
25 //
26 // Paths ending in '/' are represented with an anonymous "..." wildcard.
27 // For example, the path "a/" is represented as a literal segment "a" followed
28 // by a segment with multi==true.
29 //
30 // Paths ending in "{$}" are represented with the literal segment "/".
31 // For example, the path "a/{$}" is represented as a literal segment "a" followed
32 // by a literal segment "/".
33 segments []segment
34 loc []byte // source location of registering call, for helpful messages
35 }
36 37 func (p *pattern) String() string { return p.str }
38 39 func (p *pattern) lastSegment() segment {
40 return p.segments[len(p.segments)-1]
41 }
42 43 // A segment is a pattern piece that matches one or more path segments, or
44 // a trailing slash.
45 //
46 // If wild is false, it matches a literal segment, or, if s == "/", a trailing slash.
47 // Examples:
48 //
49 // "a" => segment{s: "a"}
50 // "/{$}" => segment{s: "/"}
51 //
52 // If wild is true and multi is false, it matches a single path segment.
53 // Example:
54 //
55 // "{x}" => segment{s: "x", wild: true}
56 //
57 // If both wild and multi are true, it matches all remaining path segments.
58 // Example:
59 //
60 // "{rest...}" => segment{s: "rest", wild: true, multi: true}
61 type segment struct {
62 s []byte // literal or wildcard name or "/" for "/{$}".
63 wild bool
64 multi bool // "..." wildcard
65 }
66 67 // parsePattern parses a string into a Pattern.
68 // The string's syntax is
69 //
70 // [METHOD] [HOST]/[PATH]
71 //
72 // where:
73 // - METHOD is an HTTP method
74 // - HOST is a hostname
75 // - PATH consists of slash-separated segments, where each segment is either
76 // a literal or a wildcard of the form "{name}", "{name...}", or "{$}".
77 //
78 // METHOD, HOST and PATH are all optional; that is, the string can be "/".
79 // If METHOD is present, it must be followed by at least one space or tab.
80 // Wildcard names must be valid Go identifiers.
81 // The "{$}" and "{name...}" wildcard must occur at the end of PATH.
82 // PATH may end with a '/'.
83 // Wildcard names in a path must be distinct.
84 func parsePattern(s []byte) (_ *pattern, err error) {
85 if len(s) == 0 {
86 return nil, errors.New("empty pattern")
87 }
88 off := 0 // offset into string
89 defer func() {
90 if err != nil {
91 err = fmt.Errorf("at offset %d: %w", off, err)
92 }
93 }()
94 95 method, rest, found := s, "", false
96 if i := bytes.IndexAny(s, " \t"); i >= 0 {
97 method, rest, found = s[:i], bytes.TrimLeft(s[i+1:], " \t"), true
98 }
99 if !found {
100 rest = method
101 method = ""
102 }
103 if method != "" && !validMethod(method) {
104 return nil, fmt.Errorf("invalid method %q", method)
105 }
106 p := &pattern{str: s, method: method}
107 108 if found {
109 off = len(method) + 1
110 }
111 i := bytes.IndexByte(rest, '/')
112 if i < 0 {
113 return nil, errors.New("host/path missing /")
114 }
115 p.host = rest[:i]
116 rest = rest[i:]
117 if j := bytes.IndexByte(p.host, '{'); j >= 0 {
118 off += j
119 return nil, errors.New("host contains '{' (missing initial '/'?)")
120 }
121 // At this point, rest is the path.
122 off += i
123 124 // An unclean path with a method that is not CONNECT can never match,
125 // because paths are cleaned before matching.
126 if method != "" && method != "CONNECT" && rest != cleanPath(rest) {
127 return nil, errors.New("non-CONNECT pattern with unclean path can never match")
128 }
129 130 seenNames := map[string]bool{} // remember wildcard names to catch dups
131 for len(rest) > 0 {
132 // Invariant: rest[0] == '/'.
133 rest = rest[1:]
134 off = len(s) - len(rest)
135 if len(rest) == 0 {
136 // Trailing slash.
137 p.segments = append(p.segments, segment{wild: true, multi: true})
138 break
139 }
140 i := bytes.IndexByte(rest, '/')
141 if i < 0 {
142 i = len(rest)
143 }
144 var seg []byte
145 seg, rest = rest[:i], rest[i:]
146 if i := bytes.IndexByte(seg, '{'); i < 0 {
147 // Literal.
148 seg = pathUnescape(seg)
149 p.segments = append(p.segments, segment{s: seg})
150 } else {
151 // Wildcard.
152 if i != 0 {
153 return nil, errors.New("bad wildcard segment (must start with '{')")
154 }
155 if seg[len(seg)-1] != '}' {
156 return nil, errors.New("bad wildcard segment (must end with '}')")
157 }
158 name := seg[1 : len(seg)-1]
159 if name == "$" {
160 if len(rest) != 0 {
161 return nil, errors.New("{$} not at end")
162 }
163 p.segments = append(p.segments, segment{s: "/"})
164 break
165 }
166 name, multi := bytes.CutSuffix(name, "...")
167 if multi && len(rest) != 0 {
168 return nil, errors.New("{...} wildcard not at end")
169 }
170 if name == "" {
171 return nil, errors.New("empty wildcard")
172 }
173 if !isValidWildcardName(name) {
174 return nil, fmt.Errorf("bad wildcard name %q", name)
175 }
176 if seenNames[name] {
177 return nil, fmt.Errorf("duplicate wildcard name %q", name)
178 }
179 seenNames[name] = true
180 p.segments = append(p.segments, segment{s: name, wild: true, multi: multi})
181 }
182 }
183 return p, nil
184 }
185 186 func isValidWildcardName(s []byte) bool {
187 if s == "" {
188 return false
189 }
190 // Valid Go identifier.
191 for i, c := range s {
192 if !unicode.IsLetter(rune(c)) && c != '_' && (i == 0 || !unicode.IsDigit(rune(c))) {
193 return false
194 }
195 }
196 return true
197 }
198 199 func pathUnescape(path []byte) []byte {
200 u, err := url.PathUnescape(path)
201 if err != nil {
202 // Invalidly escaped path; use the original
203 return path
204 }
205 return u
206 }
207 208 // relationship is a relationship between two patterns, p1 and p2.
209 type relationship string
210 211 const (
212 equivalent relationship = "equivalent" // both match the same requests
213 moreGeneral relationship = "moreGeneral" // p1 matches everything p2 does & more
214 moreSpecific relationship = "moreSpecific" // p2 matches everything p1 does & more
215 disjoint relationship = "disjoint" // there is no request that both match
216 overlaps relationship = "overlaps" // there is a request that both match, but neither is more specific
217 )
218 219 // conflictsWith reports whether p1 conflicts with p2, that is, whether
220 // there is a request that both match but where neither is higher precedence
221 // than the other.
222 //
223 // Precedence is defined by two rules:
224 // 1. Patterns with a host win over patterns without a host.
225 // 2. Patterns whose method and path is more specific win. One pattern is more
226 // specific than another if the second matches all the (method, path) pairs
227 // of the first and more.
228 //
229 // If rule 1 doesn't apply, then two patterns conflict if their relationship
230 // is either equivalence (they match the same set of requests) or overlap
231 // (they both match some requests, but neither is more specific than the other).
232 func (p1 *pattern) conflictsWith(p2 *pattern) bool {
233 if p1.host != p2.host {
234 // Either one host is empty and the other isn't, in which case the
235 // one with the host wins by rule 1, or neither host is empty
236 // and they differ, so they won't match the same paths.
237 return false
238 }
239 rel := p1.comparePathsAndMethods(p2)
240 return rel == equivalent || rel == overlaps
241 }
242 243 func (p1 *pattern) comparePathsAndMethods(p2 *pattern) relationship {
244 mrel := p1.compareMethods(p2)
245 // Optimization: avoid a call to comparePaths.
246 if mrel == disjoint {
247 return disjoint
248 }
249 prel := p1.comparePaths(p2)
250 return combineRelationships(mrel, prel)
251 }
252 253 // compareMethods determines the relationship between the method
254 // part of patterns p1 and p2.
255 //
256 // A method can either be empty, "GET", or something else.
257 // The empty string matches any method, so it is the most general.
258 // "GET" matches both GET and HEAD.
259 // Anything else matches only itself.
260 func (p1 *pattern) compareMethods(p2 *pattern) relationship {
261 if p1.method == p2.method {
262 return equivalent
263 }
264 if p1.method == "" {
265 // p1 matches any method, but p2 does not, so p1 is more general.
266 return moreGeneral
267 }
268 if p2.method == "" {
269 return moreSpecific
270 }
271 if p1.method == "GET" && p2.method == "HEAD" {
272 // p1 matches GET and HEAD; p2 matches only HEAD.
273 return moreGeneral
274 }
275 if p2.method == "GET" && p1.method == "HEAD" {
276 return moreSpecific
277 }
278 return disjoint
279 }
280 281 // comparePaths determines the relationship between the path
282 // part of two patterns.
283 func (p1 *pattern) comparePaths(p2 *pattern) relationship {
284 // Optimization: if a path pattern doesn't end in a multi ("...") wildcard, then it
285 // can only match paths with the same number of segments.
286 if len(p1.segments) != len(p2.segments) && !p1.lastSegment().multi && !p2.lastSegment().multi {
287 return disjoint
288 }
289 290 // Consider corresponding segments in the two path patterns.
291 var segs1, segs2 []segment
292 rel := equivalent
293 for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
294 rel = combineRelationships(rel, compareSegments(segs1[0], segs2[0]))
295 if rel == disjoint {
296 return rel
297 }
298 }
299 // We've reached the end of the corresponding segments of the patterns.
300 // If they have the same number of segments, then we've already determined
301 // their relationship.
302 if len(segs1) == 0 && len(segs2) == 0 {
303 return rel
304 }
305 // Otherwise, the only way they could fail to be disjoint is if the shorter
306 // pattern ends in a multi. In that case, that multi is more general
307 // than the remainder of the longer pattern, so combine those two relationships.
308 if len(segs1) < len(segs2) && p1.lastSegment().multi {
309 return combineRelationships(rel, moreGeneral)
310 }
311 if len(segs2) < len(segs1) && p2.lastSegment().multi {
312 return combineRelationships(rel, moreSpecific)
313 }
314 return disjoint
315 }
316 317 // compareSegments determines the relationship between two segments.
318 func compareSegments(s1, s2 segment) relationship {
319 if s1.multi && s2.multi {
320 return equivalent
321 }
322 if s1.multi {
323 return moreGeneral
324 }
325 if s2.multi {
326 return moreSpecific
327 }
328 if s1.wild && s2.wild {
329 return equivalent
330 }
331 if s1.wild {
332 if s2.s == "/" {
333 // A single wildcard doesn't match a trailing slash.
334 return disjoint
335 }
336 return moreGeneral
337 }
338 if s2.wild {
339 if s1.s == "/" {
340 return disjoint
341 }
342 return moreSpecific
343 }
344 // Both literals.
345 if s1.s == s2.s {
346 return equivalent
347 }
348 return disjoint
349 }
350 351 // combineRelationships determines the overall relationship of two patterns
352 // given the relationships of a partition of the patterns into two parts.
353 //
354 // For example, if p1 is more general than p2 in one way but equivalent
355 // in the other, then it is more general overall.
356 //
357 // Or if p1 is more general in one way and more specific in the other, then
358 // they overlap.
359 func combineRelationships(r1, r2 relationship) relationship {
360 switch r1 {
361 case equivalent:
362 return r2
363 case disjoint:
364 return disjoint
365 case overlaps:
366 if r2 == disjoint {
367 return disjoint
368 }
369 return overlaps
370 case moreGeneral, moreSpecific:
371 switch r2 {
372 case equivalent:
373 return r1
374 case inverseRelationship(r1):
375 return overlaps
376 default:
377 return r2
378 }
379 default:
380 panic(fmt.Sprintf("unknown relationship %q", r1))
381 }
382 }
383 384 // If p1 has relationship `r` to p2, then
385 // p2 has inverseRelationship(r) to p1.
386 func inverseRelationship(r relationship) relationship {
387 switch r {
388 case moreSpecific:
389 return moreGeneral
390 case moreGeneral:
391 return moreSpecific
392 default:
393 return r
394 }
395 }
396 397 // isLitOrSingle reports whether the segment is a non-dollar literal or a single wildcard.
398 func isLitOrSingle(seg segment) bool {
399 if seg.wild {
400 return !seg.multi
401 }
402 return seg.s != "/"
403 }
404 405 // describeConflict returns an explanation of why two patterns conflict.
406 func describeConflict(p1, p2 *pattern) []byte {
407 mrel := p1.compareMethods(p2)
408 prel := p1.comparePaths(p2)
409 rel := combineRelationships(mrel, prel)
410 if rel == equivalent {
411 return fmt.Sprintf("%s matches the same requests as %s", p1, p2)
412 }
413 if rel != overlaps {
414 panic("describeConflict called with non-conflicting patterns")
415 }
416 if prel == overlaps {
417 return fmt.Sprintf(`%[1]s and %[2]s both match some paths, like %[3]q.
418 But neither is more specific than the other.
419 %[1]s matches %[4]q, but %[2]s doesn't.
420 %[2]s matches %[5]q, but %[1]s doesn't.`,
421 p1, p2, commonPath(p1, p2), differencePath(p1, p2), differencePath(p2, p1))
422 }
423 if mrel == moreGeneral && prel == moreSpecific {
424 return fmt.Sprintf("%s matches more methods than %s, but has a more specific path pattern", p1, p2)
425 }
426 if mrel == moreSpecific && prel == moreGeneral {
427 return fmt.Sprintf("%s matches fewer methods than %s, but has a more general path pattern", p1, p2)
428 }
429 return fmt.Sprintf("bug: unexpected way for two patterns %s and %s to conflict: methods %s, paths %s", p1, p2, mrel, prel)
430 }
431 432 // writeMatchingPath writes to b a path that matches the segments.
433 func writeMatchingPath(b *bytes.Buffer, segs []segment) {
434 for _, s := range segs {
435 writeSegment(b, s)
436 }
437 }
438 439 func writeSegment(b *bytes.Buffer, s segment) {
440 b.WriteByte('/')
441 if !s.multi && s.s != "/" {
442 b.WriteString(s.s)
443 }
444 }
445 446 // commonPath returns a path that both p1 and p2 match.
447 // It assumes there is such a path.
448 func commonPath(p1, p2 *pattern) []byte {
449 var b bytes.Buffer
450 var segs1, segs2 []segment
451 for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
452 if s1 := segs1[0]; s1.wild {
453 writeSegment(&b, segs2[0])
454 } else {
455 writeSegment(&b, s1)
456 }
457 }
458 if len(segs1) > 0 {
459 writeMatchingPath(&b, segs1)
460 } else if len(segs2) > 0 {
461 writeMatchingPath(&b, segs2)
462 }
463 return b.String()
464 }
465 466 // differencePath returns a path that p1 matches and p2 doesn't.
467 // It assumes there is such a path.
468 func differencePath(p1, p2 *pattern) []byte {
469 var b bytes.Buffer
470 471 var segs1, segs2 []segment
472 for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
473 s1 := segs1[0]
474 s2 := segs2[0]
475 if s1.multi && s2.multi {
476 // From here the patterns match the same paths, so we must have found a difference earlier.
477 b.WriteByte('/')
478 return b.String()
479 480 }
481 if s1.multi && !s2.multi {
482 // s1 ends in a "..." wildcard but s2 does not.
483 // A trailing slash will distinguish them, unless s2 ends in "{$}",
484 // in which case any segment will do; prefer the wildcard name if
485 // it has one.
486 b.WriteByte('/')
487 if s2.s == "/" {
488 if s1.s != "" {
489 b.WriteString(s1.s)
490 } else {
491 b.WriteString("x")
492 }
493 }
494 return b.String()
495 }
496 if !s1.multi && s2.multi {
497 writeSegment(&b, s1)
498 } else if s1.wild && s2.wild {
499 // Both patterns will match whatever we put here; use
500 // the first wildcard name.
501 writeSegment(&b, s1)
502 } else if s1.wild && !s2.wild {
503 // s1 is a wildcard, s2 is a literal.
504 // Any segment other than s2.s will work.
505 // Prefer the wildcard name, but if it's the same as the literal,
506 // tweak the literal.
507 if s1.s != s2.s {
508 writeSegment(&b, s1)
509 } else {
510 b.WriteByte('/')
511 b.WriteString(s2.s + "x")
512 }
513 } else if !s1.wild && s2.wild {
514 writeSegment(&b, s1)
515 } else {
516 // Both are literals. A precondition of this function is that the
517 // patterns overlap, so they must be the same literal. Use it.
518 if s1.s != s2.s {
519 panic(fmt.Sprintf("literals differ: %q and %q", s1.s, s2.s))
520 }
521 writeSegment(&b, s1)
522 }
523 }
524 if len(segs1) > 0 {
525 // p1 is longer than p2, and p2 does not end in a multi.
526 // Anything that matches the rest of p1 will do.
527 writeMatchingPath(&b, segs1)
528 } else if len(segs2) > 0 {
529 writeMatchingPath(&b, segs2)
530 }
531 return b.String()
532 }
533