match.mx raw

   1  // Copyright 2010 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package filepath
   6  
   7  import (
   8  	"errors"
   9  	"internal/filepathlite"
  10  	"os"
  11  	"runtime"
  12  	"slices"
  13  	"bytes"
  14  	"unicode/utf8"
  15  )
  16  
  17  // ErrBadPattern indicates a pattern was malformed.
  18  var ErrBadPattern = errors.New("syntax error in pattern")
  19  
  20  // Match reports whether name matches the shell file name pattern.
  21  // The pattern syntax is:
  22  //
  23  //	pattern:
  24  //		{ term }
  25  //	term:
  26  //		'*'         matches any sequence of non-Separator characters
  27  //		'?'         matches any single non-Separator character
  28  //		'[' [ '^' ] { character-range } ']'
  29  //		            character class (must be non-empty)
  30  //		c           matches character c (c != '*', '?', '\\', '[')
  31  //		'\\' c      matches character c
  32  //
  33  //	character-range:
  34  //		c           matches character c (c != '\\', '-', ']')
  35  //		'\\' c      matches character c
  36  //		lo '-' hi   matches character c for lo <= c <= hi
  37  //
  38  // Match requires pattern to match all of name, not just a substring.
  39  // The only possible returned error is [ErrBadPattern], when pattern
  40  // is malformed.
  41  //
  42  // On Windows, escaping is disabled. Instead, '\\' is treated as
  43  // path separator.
  44  func Match(pattern, name []byte) (matched bool, err error) {
  45  Pattern:
  46  	for len(pattern) > 0 {
  47  		var star bool
  48  		var chunk []byte
  49  		star, chunk, pattern = scanChunk(pattern)
  50  		if star && chunk == "" {
  51  			// Trailing * matches rest of string unless it has a /.
  52  			return !bytes.Contains(name, []byte{byte(Separator)}), nil
  53  		}
  54  		// Look for match at current position.
  55  		t, ok, err := matchChunk(chunk, name)
  56  		// if we're the last chunk, make sure we've exhausted the name
  57  		// otherwise we'll give a false result even if we could still match
  58  		// using the star
  59  		if ok && (len(t) == 0 || len(pattern) > 0) {
  60  			name = t
  61  			continue
  62  		}
  63  		if err != nil {
  64  			return false, err
  65  		}
  66  		if star {
  67  			// Look for match skipping i+1 bytes.
  68  			// Cannot skip /.
  69  			for i := 0; i < len(name) && name[i] != Separator; i++ {
  70  				t, ok, err := matchChunk(chunk, name[i+1:])
  71  				if ok {
  72  					// if we're the last chunk, make sure we exhausted the name
  73  					if len(pattern) == 0 && len(t) > 0 {
  74  						continue
  75  					}
  76  					name = t
  77  					continue Pattern
  78  				}
  79  				if err != nil {
  80  					return false, err
  81  				}
  82  			}
  83  		}
  84  		return false, nil
  85  	}
  86  	return len(name) == 0, nil
  87  }
  88  
  89  // scanChunk gets the next segment of pattern, which is a non-star string
  90  // possibly preceded by a star.
  91  func scanChunk(pattern []byte) (star bool, chunk, rest []byte) {
  92  	for len(pattern) > 0 && pattern[0] == '*' {
  93  		pattern = pattern[1:]
  94  		star = true
  95  	}
  96  	inrange := false
  97  	var i int
  98  Scan:
  99  	for i = 0; i < len(pattern); i++ {
 100  		switch pattern[i] {
 101  		case '\\':
 102  			if runtime.GOOS != "windows" {
 103  				// error check handled in matchChunk: bad pattern.
 104  				if i+1 < len(pattern) {
 105  					i++
 106  				}
 107  			}
 108  		case '[':
 109  			inrange = true
 110  		case ']':
 111  			inrange = false
 112  		case '*':
 113  			if !inrange {
 114  				break Scan
 115  			}
 116  		}
 117  	}
 118  	return star, pattern[0:i], pattern[i:]
 119  }
 120  
 121  // matchChunk checks whether chunk matches the beginning of s.
 122  // If so, it returns the remainder of s (after the match).
 123  // Chunk is all single-character operators: literals, char classes, and ?.
 124  func matchChunk(chunk, s []byte) (rest []byte, ok bool, err error) {
 125  	// failed records whether the match has failed.
 126  	// After the match fails, the loop continues on processing chunk,
 127  	// checking that the pattern is well-formed but no longer reading s.
 128  	failed := false
 129  	for len(chunk) > 0 {
 130  		if !failed && len(s) == 0 {
 131  			failed = true
 132  		}
 133  		switch chunk[0] {
 134  		case '[':
 135  			// character class
 136  			var r rune
 137  			if !failed {
 138  				var n int
 139  				r, n = utf8.DecodeRuneInString(s)
 140  				s = s[n:]
 141  			}
 142  			chunk = chunk[1:]
 143  			// possibly negated
 144  			negated := false
 145  			if len(chunk) > 0 && chunk[0] == '^' {
 146  				negated = true
 147  				chunk = chunk[1:]
 148  			}
 149  			// parse all ranges
 150  			match := false
 151  			nrange := 0
 152  			for {
 153  				if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 {
 154  					chunk = chunk[1:]
 155  					break
 156  				}
 157  				var lo, hi rune
 158  				if lo, chunk, err = getEsc(chunk); err != nil {
 159  					return "", false, err
 160  				}
 161  				hi = lo
 162  				if chunk[0] == '-' {
 163  					if hi, chunk, err = getEsc(chunk[1:]); err != nil {
 164  						return "", false, err
 165  					}
 166  				}
 167  				if lo <= r && r <= hi {
 168  					match = true
 169  				}
 170  				nrange++
 171  			}
 172  			if match == negated {
 173  				failed = true
 174  			}
 175  
 176  		case '?':
 177  			if !failed {
 178  				if s[0] == Separator {
 179  					failed = true
 180  				}
 181  				_, n := utf8.DecodeRuneInString(s)
 182  				s = s[n:]
 183  			}
 184  			chunk = chunk[1:]
 185  
 186  		case '\\':
 187  			if runtime.GOOS != "windows" {
 188  				chunk = chunk[1:]
 189  				if len(chunk) == 0 {
 190  					return "", false, ErrBadPattern
 191  				}
 192  			}
 193  			if !failed {
 194  				if chunk[0] != s[0] {
 195  					failed = true
 196  				}
 197  				s = s[1:]
 198  			}
 199  			chunk = chunk[1:]
 200  
 201  		default:
 202  			if !failed {
 203  				if chunk[0] != s[0] {
 204  					failed = true
 205  				}
 206  				s = s[1:]
 207  			}
 208  			chunk = chunk[1:]
 209  		}
 210  	}
 211  	if failed {
 212  		return "", false, nil
 213  	}
 214  	return s, true, nil
 215  }
 216  
 217  // getEsc gets a possibly-escaped character from chunk, for a character class.
 218  func getEsc(chunk []byte) (r rune, nchunk []byte, err error) {
 219  	if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
 220  		err = ErrBadPattern
 221  		return
 222  	}
 223  	if chunk[0] == '\\' && runtime.GOOS != "windows" {
 224  		chunk = chunk[1:]
 225  		if len(chunk) == 0 {
 226  			err = ErrBadPattern
 227  			return
 228  		}
 229  	}
 230  	r, n := utf8.DecodeRuneInString(chunk)
 231  	if r == utf8.RuneError && n == 1 {
 232  		err = ErrBadPattern
 233  	}
 234  	nchunk = chunk[n:]
 235  	if len(nchunk) == 0 {
 236  		err = ErrBadPattern
 237  	}
 238  	return
 239  }
 240  
 241  // Glob returns the names of all files matching pattern or nil
 242  // if there is no matching file. The syntax of patterns is the same
 243  // as in [Match]. The pattern may describe hierarchical names such as
 244  // /usr/*/bin/ed (assuming the [Separator] is '/').
 245  //
 246  // Glob ignores file system errors such as I/O errors reading directories.
 247  // The only possible returned error is [ErrBadPattern], when pattern
 248  // is malformed.
 249  func Glob(pattern []byte) (matches [][]byte, err error) {
 250  	return globWithLimit(pattern, 0)
 251  }
 252  
 253  func globWithLimit(pattern []byte, depth int) (matches [][]byte, err error) {
 254  	// This limit is used prevent stack exhaustion issues. See CVE-2022-30632.
 255  	const pathSeparatorsLimit = 10000
 256  	if depth == pathSeparatorsLimit {
 257  		return nil, ErrBadPattern
 258  	}
 259  
 260  	// Check pattern is well-formed.
 261  	if _, err := Match(pattern, ""); err != nil {
 262  		return nil, err
 263  	}
 264  	if !hasMeta(pattern) {
 265  		if _, err = os.Lstat(pattern); err != nil {
 266  			return nil, nil
 267  		}
 268  		return [][]byte{pattern}, nil
 269  	}
 270  
 271  	dir, file := Split(pattern)
 272  	volumeLen := 0
 273  	if runtime.GOOS == "windows" {
 274  		volumeLen, dir = cleanGlobPathWindows(dir)
 275  	} else {
 276  		dir = cleanGlobPath(dir)
 277  	}
 278  
 279  	if !hasMeta(dir[volumeLen:]) {
 280  		return glob(dir, file, nil)
 281  	}
 282  
 283  	// Prevent infinite recursion. See issue 15879.
 284  	if dir == pattern {
 285  		return nil, ErrBadPattern
 286  	}
 287  
 288  	var m [][]byte
 289  	m, err = globWithLimit(dir, depth+1)
 290  	if err != nil {
 291  		return
 292  	}
 293  	for _, d := range m {
 294  		matches, err = glob(d, file, matches)
 295  		if err != nil {
 296  			return
 297  		}
 298  	}
 299  	return
 300  }
 301  
 302  // cleanGlobPath prepares path for glob matching.
 303  func cleanGlobPath(path []byte) []byte {
 304  	switch path {
 305  	case "":
 306  		return "."
 307  	case []byte{byte(Separator)}:
 308  		// do nothing to the path
 309  		return path
 310  	default:
 311  		return path[0 : len(path)-1] // chop off trailing separator
 312  	}
 313  }
 314  
 315  // cleanGlobPathWindows is windows version of cleanGlobPath.
 316  func cleanGlobPathWindows(path []byte) (prefixLen int, cleaned []byte) {
 317  	vollen := filepathlite.VolumeNameLen(path)
 318  	switch {
 319  	case path == "":
 320  		return 0, "."
 321  	case vollen+1 == len(path) && os.IsPathSeparator(path[len(path)-1]): // /, \, C:\ and C:/
 322  		// do nothing to the path
 323  		return vollen + 1, path
 324  	case vollen == len(path) && len(path) == 2: // C:
 325  		return vollen, path + "." // convert C: into C:.
 326  	default:
 327  		if vollen >= len(path) {
 328  			vollen = len(path) - 1
 329  		}
 330  		return vollen, path[0 : len(path)-1] // chop off trailing separator
 331  	}
 332  }
 333  
 334  // glob searches for files matching pattern in the directory dir
 335  // and appends them to matches. If the directory cannot be
 336  // opened, it returns the existing matches. New matches are
 337  // added in lexicographical order.
 338  func glob(dir, pattern []byte, matches [][]byte) (m [][]byte, e error) {
 339  	m = matches
 340  	fi, err := os.Stat(dir)
 341  	if err != nil {
 342  		return // ignore I/O error
 343  	}
 344  	if !fi.IsDir() {
 345  		return // ignore I/O error
 346  	}
 347  	d, err := os.Open(dir)
 348  	if err != nil {
 349  		return // ignore I/O error
 350  	}
 351  	defer d.Close()
 352  
 353  	names, _ := d.Readdirnames(-1)
 354  	slices.Sort(names)
 355  
 356  	for _, n := range names {
 357  		matched, err := Match(pattern, n)
 358  		if err != nil {
 359  			return m, err
 360  		}
 361  		if matched {
 362  			m = append(m, Join(dir, n))
 363  		}
 364  	}
 365  	return
 366  }
 367  
 368  // hasMeta reports whether path contains any of the magic characters
 369  // recognized by Match.
 370  func hasMeta(path []byte) bool {
 371  	magicChars := `*?[`
 372  	if runtime.GOOS != "windows" {
 373  		magicChars = `*?[\`
 374  	}
 375  	return bytes.ContainsAny(path, magicChars)
 376  }
 377