match.go raw

   1  // Package match provides a simple pattern matcher with unicode support.
   2  package match
   3  
   4  import (
   5  	"unicode/utf8"
   6  )
   7  
   8  // Match returns true if str matches pattern. This is a very
   9  // simple wildcard match where '*' matches on any number characters
  10  // and '?' matches on any one character.
  11  //
  12  // pattern:
  13  // 	{ term }
  14  // term:
  15  // 	'*'         matches any sequence of non-Separator characters
  16  // 	'?'         matches any single non-Separator character
  17  // 	c           matches character c (c != '*', '?', '\\')
  18  // 	'\\' c      matches character c
  19  //
  20  func Match(str, pattern string) bool {
  21  	if pattern == "*" {
  22  		return true
  23  	}
  24  	return match(str, pattern, 0, nil, -1) == rMatch
  25  }
  26  
  27  // MatchLimit is the same as Match but will limit the complexity of the match
  28  // operation. This is to avoid long running matches, specifically to avoid ReDos
  29  // attacks from arbritary inputs.
  30  //
  31  // How it works:
  32  // The underlying match routine is recursive and may call itself when it
  33  // encounters a sandwiched wildcard pattern, such as: `user:*:name`.
  34  // Everytime it calls itself a counter is incremented.
  35  // The operation is stopped when counter > maxcomp*len(str).
  36  func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
  37  	if pattern == "*" {
  38  		return true, false
  39  	}
  40  	counter := 0
  41  	r := match(str, pattern, len(str), &counter, maxcomp)
  42  	if r == rStop {
  43  		return false, true
  44  	}
  45  	return r == rMatch, false
  46  }
  47  
  48  type result int
  49  
  50  const (
  51  	rNoMatch result = iota
  52  	rMatch
  53  	rStop
  54  )
  55  
  56  func match(str, pat string, slen int, counter *int, maxcomp int) result {
  57  	// check complexity limit
  58  	if maxcomp > -1 {
  59  		if *counter > slen*maxcomp {
  60  			return rStop
  61  		}
  62  		*counter++
  63  	}
  64  
  65  	for len(pat) > 0 {
  66  		var wild bool
  67  		pc, ps := rune(pat[0]), 1
  68  		if pc > 0x7f {
  69  			pc, ps = utf8.DecodeRuneInString(pat)
  70  		}
  71  		var sc rune
  72  		var ss int
  73  		if len(str) > 0 {
  74  			sc, ss = rune(str[0]), 1
  75  			if sc > 0x7f {
  76  				sc, ss = utf8.DecodeRuneInString(str)
  77  			}
  78  		}
  79  		switch pc {
  80  		case '?':
  81  			if ss == 0 {
  82  				return rNoMatch
  83  			}
  84  		case '*':
  85  			// Ignore repeating stars.
  86  			for len(pat) > 1 && pat[1] == '*' {
  87  				pat = pat[1:]
  88  			}
  89  
  90  			// If this star is the last character then it must be a match.
  91  			if len(pat) == 1 {
  92  				return rMatch
  93  			}
  94  
  95  			// Match and trim any non-wildcard suffix characters.
  96  			var ok bool
  97  			str, pat, ok = matchTrimSuffix(str, pat)
  98  			if !ok {
  99  				return rNoMatch
 100  			}
 101  
 102  			// Check for single star again.
 103  			if len(pat) == 1 {
 104  				return rMatch
 105  			}
 106  
 107  			// Perform recursive wildcard search.
 108  			r := match(str, pat[1:], slen, counter, maxcomp)
 109  			if r != rNoMatch {
 110  				return r
 111  			}
 112  			if len(str) == 0 {
 113  				return rNoMatch
 114  			}
 115  			wild = true
 116  		default:
 117  			if ss == 0 {
 118  				return rNoMatch
 119  			}
 120  			if pc == '\\' {
 121  				pat = pat[ps:]
 122  				pc, ps = utf8.DecodeRuneInString(pat)
 123  				if ps == 0 {
 124  					return rNoMatch
 125  				}
 126  			}
 127  			if sc != pc {
 128  				return rNoMatch
 129  			}
 130  		}
 131  		str = str[ss:]
 132  		if !wild {
 133  			pat = pat[ps:]
 134  		}
 135  	}
 136  	if len(str) == 0 {
 137  		return rMatch
 138  	}
 139  	return rNoMatch
 140  }
 141  
 142  // matchTrimSuffix matches and trims any non-wildcard suffix characters.
 143  // Returns the trimed string and pattern.
 144  //
 145  // This is called because the pattern contains extra data after the wildcard
 146  // star. Here we compare any suffix characters in the pattern to the suffix of
 147  // the target string. Basically a reverse match that stops when a wildcard
 148  // character is reached. This is a little trickier than a forward match because
 149  // we need to evaluate an escaped character in reverse.
 150  //
 151  // Any matched characters will be trimmed from both the target
 152  // string and the pattern.
 153  func matchTrimSuffix(str, pat string) (string, string, bool) {
 154  	// It's expected that the pattern has at least two bytes and the first byte
 155  	// is a wildcard star '*'
 156  	match := true
 157  	for len(str) > 0 && len(pat) > 1 {
 158  		pc, ps := utf8.DecodeLastRuneInString(pat)
 159  		var esc bool
 160  		for i := 0; ; i++ {
 161  			if pat[len(pat)-ps-i-1] != '\\' {
 162  				if i&1 == 1 {
 163  					esc = true
 164  					ps++
 165  				}
 166  				break
 167  			}
 168  		}
 169  		if pc == '*' && !esc {
 170  			match = true
 171  			break
 172  		}
 173  		sc, ss := utf8.DecodeLastRuneInString(str)
 174  		if !((pc == '?' && !esc) || pc == sc) {
 175  			match = false
 176  			break
 177  		}
 178  		str = str[:len(str)-ss]
 179  		pat = pat[:len(pat)-ps]
 180  	}
 181  	return str, pat, match
 182  }
 183  
 184  var maxRuneBytes = [...]byte{244, 143, 191, 191}
 185  
 186  // Allowable parses the pattern and determines the minimum and maximum allowable
 187  // values that the pattern can represent.
 188  // When the max cannot be determined, 'true' will be returned
 189  // for infinite.
 190  func Allowable(pattern string) (min, max string) {
 191  	if pattern == "" || pattern[0] == '*' {
 192  		return "", ""
 193  	}
 194  
 195  	minb := make([]byte, 0, len(pattern))
 196  	maxb := make([]byte, 0, len(pattern))
 197  	var wild bool
 198  	for i := 0; i < len(pattern); i++ {
 199  		if pattern[i] == '*' {
 200  			wild = true
 201  			break
 202  		}
 203  		if pattern[i] == '?' {
 204  			minb = append(minb, 0)
 205  			maxb = append(maxb, maxRuneBytes[:]...)
 206  		} else {
 207  			minb = append(minb, pattern[i])
 208  			maxb = append(maxb, pattern[i])
 209  		}
 210  	}
 211  	if wild {
 212  		r, n := utf8.DecodeLastRune(maxb)
 213  		if r != utf8.RuneError {
 214  			if r < utf8.MaxRune {
 215  				r++
 216  				if r > 0x7f {
 217  					b := make([]byte, 4)
 218  					nn := utf8.EncodeRune(b, r)
 219  					maxb = append(maxb[:len(maxb)-n], b[:nn]...)
 220  				} else {
 221  					maxb = append(maxb[:len(maxb)-n], byte(r))
 222  				}
 223  			}
 224  		}
 225  	}
 226  	return string(minb), string(maxb)
 227  }
 228  
 229  // IsPattern returns true if the string is a pattern.
 230  func IsPattern(str string) bool {
 231  	for i := 0; i < len(str); i++ {
 232  		if str[i] == '*' || str[i] == '?' {
 233  			return true
 234  		}
 235  	}
 236  	return false
 237  }
 238