1 // Package match provides a simple pattern matcher with unicode support.
2 package match
3 4 import (
5 "unicode/utf8"
6 )
7 8 // Match returns true if str matches pattern. This is a very
9 // simple wildcard match where '*' matches on any number characters
10 // and '?' matches on any one character.
11 //
12 // pattern:
13 // { term }
14 // term:
15 // '*' matches any sequence of non-Separator characters
16 // '?' matches any single non-Separator character
17 // c matches character c (c != '*', '?', '\\')
18 // '\\' c matches character c
19 //
20 func Match(str, pattern string) bool {
21 if pattern == "*" {
22 return true
23 }
24 return match(str, pattern, 0, nil, -1) == rMatch
25 }
26 27 // MatchLimit is the same as Match but will limit the complexity of the match
28 // operation. This is to avoid long running matches, specifically to avoid ReDos
29 // attacks from arbritary inputs.
30 //
31 // How it works:
32 // The underlying match routine is recursive and may call itself when it
33 // encounters a sandwiched wildcard pattern, such as: `user:*:name`.
34 // Everytime it calls itself a counter is incremented.
35 // The operation is stopped when counter > maxcomp*len(str).
36 func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
37 if pattern == "*" {
38 return true, false
39 }
40 counter := 0
41 r := match(str, pattern, len(str), &counter, maxcomp)
42 if r == rStop {
43 return false, true
44 }
45 return r == rMatch, false
46 }
47 48 type result int
49 50 const (
51 rNoMatch result = iota
52 rMatch
53 rStop
54 )
55 56 func match(str, pat string, slen int, counter *int, maxcomp int) result {
57 // check complexity limit
58 if maxcomp > -1 {
59 if *counter > slen*maxcomp {
60 return rStop
61 }
62 *counter++
63 }
64 65 for len(pat) > 0 {
66 var wild bool
67 pc, ps := rune(pat[0]), 1
68 if pc > 0x7f {
69 pc, ps = utf8.DecodeRuneInString(pat)
70 }
71 var sc rune
72 var ss int
73 if len(str) > 0 {
74 sc, ss = rune(str[0]), 1
75 if sc > 0x7f {
76 sc, ss = utf8.DecodeRuneInString(str)
77 }
78 }
79 switch pc {
80 case '?':
81 if ss == 0 {
82 return rNoMatch
83 }
84 case '*':
85 // Ignore repeating stars.
86 for len(pat) > 1 && pat[1] == '*' {
87 pat = pat[1:]
88 }
89 90 // If this star is the last character then it must be a match.
91 if len(pat) == 1 {
92 return rMatch
93 }
94 95 // Match and trim any non-wildcard suffix characters.
96 var ok bool
97 str, pat, ok = matchTrimSuffix(str, pat)
98 if !ok {
99 return rNoMatch
100 }
101 102 // Check for single star again.
103 if len(pat) == 1 {
104 return rMatch
105 }
106 107 // Perform recursive wildcard search.
108 r := match(str, pat[1:], slen, counter, maxcomp)
109 if r != rNoMatch {
110 return r
111 }
112 if len(str) == 0 {
113 return rNoMatch
114 }
115 wild = true
116 default:
117 if ss == 0 {
118 return rNoMatch
119 }
120 if pc == '\\' {
121 pat = pat[ps:]
122 pc, ps = utf8.DecodeRuneInString(pat)
123 if ps == 0 {
124 return rNoMatch
125 }
126 }
127 if sc != pc {
128 return rNoMatch
129 }
130 }
131 str = str[ss:]
132 if !wild {
133 pat = pat[ps:]
134 }
135 }
136 if len(str) == 0 {
137 return rMatch
138 }
139 return rNoMatch
140 }
141 142 // matchTrimSuffix matches and trims any non-wildcard suffix characters.
143 // Returns the trimed string and pattern.
144 //
145 // This is called because the pattern contains extra data after the wildcard
146 // star. Here we compare any suffix characters in the pattern to the suffix of
147 // the target string. Basically a reverse match that stops when a wildcard
148 // character is reached. This is a little trickier than a forward match because
149 // we need to evaluate an escaped character in reverse.
150 //
151 // Any matched characters will be trimmed from both the target
152 // string and the pattern.
153 func matchTrimSuffix(str, pat string) (string, string, bool) {
154 // It's expected that the pattern has at least two bytes and the first byte
155 // is a wildcard star '*'
156 match := true
157 for len(str) > 0 && len(pat) > 1 {
158 pc, ps := utf8.DecodeLastRuneInString(pat)
159 var esc bool
160 for i := 0; ; i++ {
161 if pat[len(pat)-ps-i-1] != '\\' {
162 if i&1 == 1 {
163 esc = true
164 ps++
165 }
166 break
167 }
168 }
169 if pc == '*' && !esc {
170 match = true
171 break
172 }
173 sc, ss := utf8.DecodeLastRuneInString(str)
174 if !((pc == '?' && !esc) || pc == sc) {
175 match = false
176 break
177 }
178 str = str[:len(str)-ss]
179 pat = pat[:len(pat)-ps]
180 }
181 return str, pat, match
182 }
183 184 var maxRuneBytes = [...]byte{244, 143, 191, 191}
185 186 // Allowable parses the pattern and determines the minimum and maximum allowable
187 // values that the pattern can represent.
188 // When the max cannot be determined, 'true' will be returned
189 // for infinite.
190 func Allowable(pattern string) (min, max string) {
191 if pattern == "" || pattern[0] == '*' {
192 return "", ""
193 }
194 195 minb := make([]byte, 0, len(pattern))
196 maxb := make([]byte, 0, len(pattern))
197 var wild bool
198 for i := 0; i < len(pattern); i++ {
199 if pattern[i] == '*' {
200 wild = true
201 break
202 }
203 if pattern[i] == '?' {
204 minb = append(minb, 0)
205 maxb = append(maxb, maxRuneBytes[:]...)
206 } else {
207 minb = append(minb, pattern[i])
208 maxb = append(maxb, pattern[i])
209 }
210 }
211 if wild {
212 r, n := utf8.DecodeLastRune(maxb)
213 if r != utf8.RuneError {
214 if r < utf8.MaxRune {
215 r++
216 if r > 0x7f {
217 b := make([]byte, 4)
218 nn := utf8.EncodeRune(b, r)
219 maxb = append(maxb[:len(maxb)-n], b[:nn]...)
220 } else {
221 maxb = append(maxb[:len(maxb)-n], byte(r))
222 }
223 }
224 }
225 }
226 return string(minb), string(maxb)
227 }
228 229 // IsPattern returns true if the string is a pattern.
230 func IsPattern(str string) bool {
231 for i := 0; i < len(str); i++ {
232 if str[i] == '*' || str[i] == '?' {
233 return true
234 }
235 }
236 return false
237 }
238