parser.go raw
1 //
2 // Copyright 2024 CloudWeGo Authors
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 package expr
18
19 import (
20 "strconv"
21 "unicode"
22 "unsafe"
23 )
24
25 type _TokenKind uint8
26
27 const (
28 _T_end _TokenKind = iota + 1
29 _T_int
30 _T_punc
31 _T_name
32 )
33
34 const (
35 _OP2 = 0x80
36 _POW = _OP2 | '*'
37 _SHL = _OP2 | '<'
38 _SHR = _OP2 | '>'
39 )
40
41 type _Slice struct {
42 p unsafe.Pointer
43 n int
44 c int
45 }
46
47 type _Token struct {
48 pos int
49 ptr *rune
50 u64 uint64
51 tag _TokenKind
52 }
53
54 func (self _Token) str() (v string) {
55 return string(self.rbuf())
56 }
57
58 func (self _Token) rbuf() (v []rune) {
59 (*_Slice)(unsafe.Pointer(&v)).c = int(self.u64)
60 (*_Slice)(unsafe.Pointer(&v)).n = int(self.u64)
61 (*_Slice)(unsafe.Pointer(&v)).p = unsafe.Pointer(self.ptr)
62 return
63 }
64
65 func tokenEnd(p int) _Token {
66 return _Token{
67 pos: p,
68 tag: _T_end,
69 }
70 }
71
72 func tokenInt(p int, v uint64) _Token {
73 return _Token{
74 pos: p,
75 u64: v,
76 tag: _T_int,
77 }
78 }
79
80 func tokenPunc(p int, v rune) _Token {
81 return _Token{
82 pos: p,
83 tag: _T_punc,
84 u64: uint64(v),
85 }
86 }
87
88 func tokenName(p int, v []rune) _Token {
89 return _Token{
90 pos: p,
91 ptr: &v[0],
92 tag: _T_name,
93 u64: uint64(len(v)),
94 }
95 }
96
97 // Repository represents a repository of Term's.
98 type Repository interface {
99 Get(name string) (Term, error)
100 }
101
102 // Parser parses an expression string to it's AST representation.
103 type Parser struct {
104 pos int
105 src []rune
106 }
107
108 var binaryOps = [...]func(*Expr, *Expr) *Expr{
109 '+': (*Expr).Add,
110 '-': (*Expr).Sub,
111 '*': (*Expr).Mul,
112 '/': (*Expr).Div,
113 '%': (*Expr).Mod,
114 '&': (*Expr).And,
115 '^': (*Expr).Xor,
116 '|': (*Expr).Or,
117 _SHL: (*Expr).Shl,
118 _SHR: (*Expr).Shr,
119 _POW: (*Expr).Pow,
120 }
121
122 var precedence = [...]map[int]bool{
123 {_SHL: true, _SHR: true},
124 {'|': true},
125 {'^': true},
126 {'&': true},
127 {'+': true, '-': true},
128 {'*': true, '/': true, '%': true},
129 {_POW: true},
130 }
131
132 func (self *Parser) ch() rune {
133 return self.src[self.pos]
134 }
135
136 func (self *Parser) eof() bool {
137 return self.pos >= len(self.src)
138 }
139
140 func (self *Parser) rch() (v rune) {
141 v, self.pos = self.src[self.pos], self.pos+1
142 return
143 }
144
145 func (self *Parser) hex(ss []rune) bool {
146 if len(ss) == 1 && ss[0] == '0' {
147 return unicode.ToLower(self.ch()) == 'x'
148 } else if len(ss) <= 1 || unicode.ToLower(ss[1]) != 'x' {
149 return unicode.IsDigit(self.ch())
150 } else {
151 return ishexdigit(self.ch())
152 }
153 }
154
155 func (self *Parser) int(p int, ss []rune) (_Token, error) {
156 var err error
157 var val uint64
158
159 /* find all the digits */
160 for !self.eof() && self.hex(ss) {
161 ss = append(ss, self.rch())
162 }
163
164 /* parse the value */
165 if val, err = strconv.ParseUint(string(ss), 0, 64); err != nil {
166 return _Token{}, err
167 } else {
168 return tokenInt(p, val), nil
169 }
170 }
171
172 func (self *Parser) name(p int, ss []rune) _Token {
173 for !self.eof() && isident(self.ch()) {
174 ss = append(ss, self.rch())
175 }
176 return tokenName(p, ss)
177 }
178
179 func (self *Parser) read(p int, ch rune) (_Token, error) {
180 if isdigit(ch) {
181 return self.int(p, []rune{ch})
182 } else if isident0(ch) {
183 return self.name(p, []rune{ch}), nil
184 } else if isop2ch(ch) && !self.eof() && self.ch() == ch {
185 return tokenPunc(p, _OP2|self.rch()), nil
186 } else if isop1ch(ch) {
187 return tokenPunc(p, ch), nil
188 } else {
189 return _Token{}, newSyntaxError(self.pos, "invalid character "+strconv.QuoteRuneToASCII(ch))
190 }
191 }
192
193 func (self *Parser) next() (_Token, error) {
194 for {
195 var p int
196 var c rune
197
198 /* check for EOF */
199 if self.eof() {
200 return tokenEnd(self.pos), nil
201 }
202
203 /* read the next char */
204 p = self.pos
205 c = self.rch()
206
207 /* parse the token if not a space */
208 if !unicode.IsSpace(c) {
209 return self.read(p, c)
210 }
211 }
212 }
213
214 func (self *Parser) grab(tk _Token, repo Repository) (*Expr, error) {
215 if repo == nil {
216 return nil, newSyntaxError(tk.pos, "unresolved symbol: "+tk.str())
217 } else if term, err := repo.Get(tk.str()); err != nil {
218 return nil, err
219 } else {
220 return Ref(term), nil
221 }
222 }
223
224 func (self *Parser) nest(nest int, repo Repository) (*Expr, error) {
225 var err error
226 var ret *Expr
227 var ntk _Token
228
229 /* evaluate the nested expression */
230 if ret, err = self.expr(0, nest+1, repo); err != nil {
231 return nil, err
232 }
233
234 /* must follows with a ')' */
235 if ntk, err = self.next(); err != nil {
236 return nil, err
237 } else if ntk.tag != _T_punc || ntk.u64 != ')' {
238 return nil, newSyntaxError(ntk.pos, "')' expected")
239 } else {
240 return ret, nil
241 }
242 }
243
244 func (self *Parser) unit(nest int, repo Repository) (*Expr, error) {
245 if tk, err := self.next(); err != nil {
246 return nil, err
247 } else if tk.tag == _T_int {
248 return Int(int64(tk.u64)), nil
249 } else if tk.tag == _T_name {
250 return self.grab(tk, repo)
251 } else if tk.tag == _T_punc && tk.u64 == '(' {
252 return self.nest(nest, repo)
253 } else if tk.tag == _T_punc && tk.u64 == '+' {
254 return self.unit(nest, repo)
255 } else if tk.tag == _T_punc && tk.u64 == '-' {
256 return neg2(self.unit(nest, repo))
257 } else if tk.tag == _T_punc && tk.u64 == '~' {
258 return not2(self.unit(nest, repo))
259 } else {
260 return nil, newSyntaxError(tk.pos, "integer, unary operator or nested expression expected")
261 }
262 }
263
264 func (self *Parser) term(prec int, nest int, repo Repository) (*Expr, error) {
265 var err error
266 var val *Expr
267
268 /* parse the LHS operand */
269 if val, err = self.expr(prec+1, nest, repo); err != nil {
270 return nil, err
271 }
272
273 /* parse all the operators of the same precedence */
274 for {
275 var op int
276 var rv *Expr
277 var tk _Token
278
279 /* peek the next token */
280 pp := self.pos
281 tk, err = self.next()
282
283 /* check for errors */
284 if err != nil {
285 return nil, err
286 }
287
288 /* encountered EOF */
289 if tk.tag == _T_end {
290 return val, nil
291 }
292
293 /* must be an operator */
294 if tk.tag != _T_punc {
295 return nil, newSyntaxError(tk.pos, "operators expected")
296 }
297
298 /* check for the operator precedence */
299 if op = int(tk.u64); !precedence[prec][op] {
300 self.pos = pp
301 return val, nil
302 }
303
304 /* evaluate the RHS operand, and combine the value */
305 if rv, err = self.expr(prec+1, nest, repo); err != nil {
306 return nil, err
307 } else {
308 val = binaryOps[op](val, rv)
309 }
310 }
311 }
312
313 func (self *Parser) expr(prec int, nest int, repo Repository) (*Expr, error) {
314 if prec >= len(precedence) {
315 return self.unit(nest, repo)
316 } else {
317 return self.term(prec, nest, repo)
318 }
319 }
320
321 // Parse parses the expression, and returns it's AST tree.
322 func (self *Parser) Parse(repo Repository) (*Expr, error) {
323 return self.expr(0, 0, repo)
324 }
325
326 // SetSource resets the expression parser and sets the expression source.
327 func (self *Parser) SetSource(src string) *Parser {
328 self.pos = 0
329 self.src = []rune(src)
330 return self
331 }
332