match.mx raw
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package filepath
6
7 import (
8 "errors"
9 "internal/filepathlite"
10 "os"
11 "runtime"
12 "slices"
13 "bytes"
14 "unicode/utf8"
15 )
16
17 // ErrBadPattern indicates a pattern was malformed.
18 var ErrBadPattern = errors.New("syntax error in pattern")
19
20 // Match reports whether name matches the shell file name pattern.
21 // The pattern syntax is:
22 //
23 // pattern:
24 // { term }
25 // term:
26 // '*' matches any sequence of non-Separator characters
27 // '?' matches any single non-Separator character
28 // '[' [ '^' ] { character-range } ']'
29 // character class (must be non-empty)
30 // c matches character c (c != '*', '?', '\\', '[')
31 // '\\' c matches character c
32 //
33 // character-range:
34 // c matches character c (c != '\\', '-', ']')
35 // '\\' c matches character c
36 // lo '-' hi matches character c for lo <= c <= hi
37 //
38 // Match requires pattern to match all of name, not just a substring.
39 // The only possible returned error is [ErrBadPattern], when pattern
40 // is malformed.
41 //
42 // On Windows, escaping is disabled. Instead, '\\' is treated as
43 // path separator.
44 func Match(pattern, name []byte) (matched bool, err error) {
45 Pattern:
46 for len(pattern) > 0 {
47 var star bool
48 var chunk []byte
49 star, chunk, pattern = scanChunk(pattern)
50 if star && chunk == "" {
51 // Trailing * matches rest of string unless it has a /.
52 return !bytes.Contains(name, []byte{byte(Separator)}), nil
53 }
54 // Look for match at current position.
55 t, ok, err := matchChunk(chunk, name)
56 // if we're the last chunk, make sure we've exhausted the name
57 // otherwise we'll give a false result even if we could still match
58 // using the star
59 if ok && (len(t) == 0 || len(pattern) > 0) {
60 name = t
61 continue
62 }
63 if err != nil {
64 return false, err
65 }
66 if star {
67 // Look for match skipping i+1 bytes.
68 // Cannot skip /.
69 for i := 0; i < len(name) && name[i] != Separator; i++ {
70 t, ok, err := matchChunk(chunk, name[i+1:])
71 if ok {
72 // if we're the last chunk, make sure we exhausted the name
73 if len(pattern) == 0 && len(t) > 0 {
74 continue
75 }
76 name = t
77 continue Pattern
78 }
79 if err != nil {
80 return false, err
81 }
82 }
83 }
84 return false, nil
85 }
86 return len(name) == 0, nil
87 }
88
89 // scanChunk gets the next segment of pattern, which is a non-star string
90 // possibly preceded by a star.
91 func scanChunk(pattern []byte) (star bool, chunk, rest []byte) {
92 for len(pattern) > 0 && pattern[0] == '*' {
93 pattern = pattern[1:]
94 star = true
95 }
96 inrange := false
97 var i int
98 Scan:
99 for i = 0; i < len(pattern); i++ {
100 switch pattern[i] {
101 case '\\':
102 if runtime.GOOS != "windows" {
103 // error check handled in matchChunk: bad pattern.
104 if i+1 < len(pattern) {
105 i++
106 }
107 }
108 case '[':
109 inrange = true
110 case ']':
111 inrange = false
112 case '*':
113 if !inrange {
114 break Scan
115 }
116 }
117 }
118 return star, pattern[0:i], pattern[i:]
119 }
120
121 // matchChunk checks whether chunk matches the beginning of s.
122 // If so, it returns the remainder of s (after the match).
123 // Chunk is all single-character operators: literals, char classes, and ?.
124 func matchChunk(chunk, s []byte) (rest []byte, ok bool, err error) {
125 // failed records whether the match has failed.
126 // After the match fails, the loop continues on processing chunk,
127 // checking that the pattern is well-formed but no longer reading s.
128 failed := false
129 for len(chunk) > 0 {
130 if !failed && len(s) == 0 {
131 failed = true
132 }
133 switch chunk[0] {
134 case '[':
135 // character class
136 var r rune
137 if !failed {
138 var n int
139 r, n = utf8.DecodeRuneInString(s)
140 s = s[n:]
141 }
142 chunk = chunk[1:]
143 // possibly negated
144 negated := false
145 if len(chunk) > 0 && chunk[0] == '^' {
146 negated = true
147 chunk = chunk[1:]
148 }
149 // parse all ranges
150 match := false
151 nrange := 0
152 for {
153 if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 {
154 chunk = chunk[1:]
155 break
156 }
157 var lo, hi rune
158 if lo, chunk, err = getEsc(chunk); err != nil {
159 return "", false, err
160 }
161 hi = lo
162 if chunk[0] == '-' {
163 if hi, chunk, err = getEsc(chunk[1:]); err != nil {
164 return "", false, err
165 }
166 }
167 if lo <= r && r <= hi {
168 match = true
169 }
170 nrange++
171 }
172 if match == negated {
173 failed = true
174 }
175
176 case '?':
177 if !failed {
178 if s[0] == Separator {
179 failed = true
180 }
181 _, n := utf8.DecodeRuneInString(s)
182 s = s[n:]
183 }
184 chunk = chunk[1:]
185
186 case '\\':
187 if runtime.GOOS != "windows" {
188 chunk = chunk[1:]
189 if len(chunk) == 0 {
190 return "", false, ErrBadPattern
191 }
192 }
193 if !failed {
194 if chunk[0] != s[0] {
195 failed = true
196 }
197 s = s[1:]
198 }
199 chunk = chunk[1:]
200
201 default:
202 if !failed {
203 if chunk[0] != s[0] {
204 failed = true
205 }
206 s = s[1:]
207 }
208 chunk = chunk[1:]
209 }
210 }
211 if failed {
212 return "", false, nil
213 }
214 return s, true, nil
215 }
216
217 // getEsc gets a possibly-escaped character from chunk, for a character class.
218 func getEsc(chunk []byte) (r rune, nchunk []byte, err error) {
219 if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
220 err = ErrBadPattern
221 return
222 }
223 if chunk[0] == '\\' && runtime.GOOS != "windows" {
224 chunk = chunk[1:]
225 if len(chunk) == 0 {
226 err = ErrBadPattern
227 return
228 }
229 }
230 r, n := utf8.DecodeRuneInString(chunk)
231 if r == utf8.RuneError && n == 1 {
232 err = ErrBadPattern
233 }
234 nchunk = chunk[n:]
235 if len(nchunk) == 0 {
236 err = ErrBadPattern
237 }
238 return
239 }
240
241 // Glob returns the names of all files matching pattern or nil
242 // if there is no matching file. The syntax of patterns is the same
243 // as in [Match]. The pattern may describe hierarchical names such as
244 // /usr/*/bin/ed (assuming the [Separator] is '/').
245 //
246 // Glob ignores file system errors such as I/O errors reading directories.
247 // The only possible returned error is [ErrBadPattern], when pattern
248 // is malformed.
249 func Glob(pattern []byte) (matches [][]byte, err error) {
250 return globWithLimit(pattern, 0)
251 }
252
253 func globWithLimit(pattern []byte, depth int) (matches [][]byte, err error) {
254 // This limit is used prevent stack exhaustion issues. See CVE-2022-30632.
255 const pathSeparatorsLimit = 10000
256 if depth == pathSeparatorsLimit {
257 return nil, ErrBadPattern
258 }
259
260 // Check pattern is well-formed.
261 if _, err := Match(pattern, ""); err != nil {
262 return nil, err
263 }
264 if !hasMeta(pattern) {
265 if _, err = os.Lstat(pattern); err != nil {
266 return nil, nil
267 }
268 return [][]byte{pattern}, nil
269 }
270
271 dir, file := Split(pattern)
272 volumeLen := 0
273 if runtime.GOOS == "windows" {
274 volumeLen, dir = cleanGlobPathWindows(dir)
275 } else {
276 dir = cleanGlobPath(dir)
277 }
278
279 if !hasMeta(dir[volumeLen:]) {
280 return glob(dir, file, nil)
281 }
282
283 // Prevent infinite recursion. See issue 15879.
284 if dir == pattern {
285 return nil, ErrBadPattern
286 }
287
288 var m [][]byte
289 m, err = globWithLimit(dir, depth+1)
290 if err != nil {
291 return
292 }
293 for _, d := range m {
294 matches, err = glob(d, file, matches)
295 if err != nil {
296 return
297 }
298 }
299 return
300 }
301
302 // cleanGlobPath prepares path for glob matching.
303 func cleanGlobPath(path []byte) []byte {
304 switch path {
305 case "":
306 return "."
307 case []byte{byte(Separator)}:
308 // do nothing to the path
309 return path
310 default:
311 return path[0 : len(path)-1] // chop off trailing separator
312 }
313 }
314
315 // cleanGlobPathWindows is windows version of cleanGlobPath.
316 func cleanGlobPathWindows(path []byte) (prefixLen int, cleaned []byte) {
317 vollen := filepathlite.VolumeNameLen(path)
318 switch {
319 case path == "":
320 return 0, "."
321 case vollen+1 == len(path) && os.IsPathSeparator(path[len(path)-1]): // /, \, C:\ and C:/
322 // do nothing to the path
323 return vollen + 1, path
324 case vollen == len(path) && len(path) == 2: // C:
325 return vollen, path + "." // convert C: into C:.
326 default:
327 if vollen >= len(path) {
328 vollen = len(path) - 1
329 }
330 return vollen, path[0 : len(path)-1] // chop off trailing separator
331 }
332 }
333
334 // glob searches for files matching pattern in the directory dir
335 // and appends them to matches. If the directory cannot be
336 // opened, it returns the existing matches. New matches are
337 // added in lexicographical order.
338 func glob(dir, pattern []byte, matches [][]byte) (m [][]byte, e error) {
339 m = matches
340 fi, err := os.Stat(dir)
341 if err != nil {
342 return // ignore I/O error
343 }
344 if !fi.IsDir() {
345 return // ignore I/O error
346 }
347 d, err := os.Open(dir)
348 if err != nil {
349 return // ignore I/O error
350 }
351 defer d.Close()
352
353 names, _ := d.Readdirnames(-1)
354 slices.Sort(names)
355
356 for _, n := range names {
357 matched, err := Match(pattern, n)
358 if err != nil {
359 return m, err
360 }
361 if matched {
362 m = append(m, Join(dir, n))
363 }
364 }
365 return
366 }
367
368 // hasMeta reports whether path contains any of the magic characters
369 // recognized by Match.
370 func hasMeta(path []byte) bool {
371 magicChars := `*?[`
372 if runtime.GOOS != "windows" {
373 magicChars = `*?[\`
374 }
375 return bytes.ContainsAny(path, magicChars)
376 }
377