parser.go raw

   1  //
   2  // Copyright 2024 CloudWeGo Authors
   3  //
   4  // Licensed under the Apache License, Version 2.0 (the "License");
   5  // you may not use this file except in compliance with the License.
   6  // You may obtain a copy of the License at
   7  //
   8  //     http://www.apache.org/licenses/LICENSE-2.0
   9  //
  10  // Unless required by applicable law or agreed to in writing, software
  11  // distributed under the License is distributed on an "AS IS" BASIS,
  12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  // See the License for the specific language governing permissions and
  14  // limitations under the License.
  15  //
  16  
  17  package expr
  18  
  19  import (
  20  	"strconv"
  21  	"unicode"
  22  	"unsafe"
  23  )
  24  
  25  type _TokenKind uint8
  26  
  27  const (
  28  	_T_end _TokenKind = iota + 1
  29  	_T_int
  30  	_T_punc
  31  	_T_name
  32  )
  33  
  34  const (
  35  	_OP2 = 0x80
  36  	_POW = _OP2 | '*'
  37  	_SHL = _OP2 | '<'
  38  	_SHR = _OP2 | '>'
  39  )
  40  
  41  type _Slice struct {
  42  	p unsafe.Pointer
  43  	n int
  44  	c int
  45  }
  46  
  47  type _Token struct {
  48  	pos int
  49  	ptr *rune
  50  	u64 uint64
  51  	tag _TokenKind
  52  }
  53  
  54  func (self _Token) str() (v string) {
  55  	return string(self.rbuf())
  56  }
  57  
  58  func (self _Token) rbuf() (v []rune) {
  59  	(*_Slice)(unsafe.Pointer(&v)).c = int(self.u64)
  60  	(*_Slice)(unsafe.Pointer(&v)).n = int(self.u64)
  61  	(*_Slice)(unsafe.Pointer(&v)).p = unsafe.Pointer(self.ptr)
  62  	return
  63  }
  64  
  65  func tokenEnd(p int) _Token {
  66  	return _Token{
  67  		pos: p,
  68  		tag: _T_end,
  69  	}
  70  }
  71  
  72  func tokenInt(p int, v uint64) _Token {
  73  	return _Token{
  74  		pos: p,
  75  		u64: v,
  76  		tag: _T_int,
  77  	}
  78  }
  79  
  80  func tokenPunc(p int, v rune) _Token {
  81  	return _Token{
  82  		pos: p,
  83  		tag: _T_punc,
  84  		u64: uint64(v),
  85  	}
  86  }
  87  
  88  func tokenName(p int, v []rune) _Token {
  89  	return _Token{
  90  		pos: p,
  91  		ptr: &v[0],
  92  		tag: _T_name,
  93  		u64: uint64(len(v)),
  94  	}
  95  }
  96  
  97  // Repository represents a repository of Term's.
  98  type Repository interface {
  99  	Get(name string) (Term, error)
 100  }
 101  
 102  // Parser parses an expression string to it's AST representation.
 103  type Parser struct {
 104  	pos int
 105  	src []rune
 106  }
 107  
 108  var binaryOps = [...]func(*Expr, *Expr) *Expr{
 109  	'+':  (*Expr).Add,
 110  	'-':  (*Expr).Sub,
 111  	'*':  (*Expr).Mul,
 112  	'/':  (*Expr).Div,
 113  	'%':  (*Expr).Mod,
 114  	'&':  (*Expr).And,
 115  	'^':  (*Expr).Xor,
 116  	'|':  (*Expr).Or,
 117  	_SHL: (*Expr).Shl,
 118  	_SHR: (*Expr).Shr,
 119  	_POW: (*Expr).Pow,
 120  }
 121  
 122  var precedence = [...]map[int]bool{
 123  	{_SHL: true, _SHR: true},
 124  	{'|': true},
 125  	{'^': true},
 126  	{'&': true},
 127  	{'+': true, '-': true},
 128  	{'*': true, '/': true, '%': true},
 129  	{_POW: true},
 130  }
 131  
 132  func (self *Parser) ch() rune {
 133  	return self.src[self.pos]
 134  }
 135  
 136  func (self *Parser) eof() bool {
 137  	return self.pos >= len(self.src)
 138  }
 139  
 140  func (self *Parser) rch() (v rune) {
 141  	v, self.pos = self.src[self.pos], self.pos+1
 142  	return
 143  }
 144  
 145  func (self *Parser) hex(ss []rune) bool {
 146  	if len(ss) == 1 && ss[0] == '0' {
 147  		return unicode.ToLower(self.ch()) == 'x'
 148  	} else if len(ss) <= 1 || unicode.ToLower(ss[1]) != 'x' {
 149  		return unicode.IsDigit(self.ch())
 150  	} else {
 151  		return ishexdigit(self.ch())
 152  	}
 153  }
 154  
 155  func (self *Parser) int(p int, ss []rune) (_Token, error) {
 156  	var err error
 157  	var val uint64
 158  
 159  	/* find all the digits */
 160  	for !self.eof() && self.hex(ss) {
 161  		ss = append(ss, self.rch())
 162  	}
 163  
 164  	/* parse the value */
 165  	if val, err = strconv.ParseUint(string(ss), 0, 64); err != nil {
 166  		return _Token{}, err
 167  	} else {
 168  		return tokenInt(p, val), nil
 169  	}
 170  }
 171  
 172  func (self *Parser) name(p int, ss []rune) _Token {
 173  	for !self.eof() && isident(self.ch()) {
 174  		ss = append(ss, self.rch())
 175  	}
 176  	return tokenName(p, ss)
 177  }
 178  
 179  func (self *Parser) read(p int, ch rune) (_Token, error) {
 180  	if isdigit(ch) {
 181  		return self.int(p, []rune{ch})
 182  	} else if isident0(ch) {
 183  		return self.name(p, []rune{ch}), nil
 184  	} else if isop2ch(ch) && !self.eof() && self.ch() == ch {
 185  		return tokenPunc(p, _OP2|self.rch()), nil
 186  	} else if isop1ch(ch) {
 187  		return tokenPunc(p, ch), nil
 188  	} else {
 189  		return _Token{}, newSyntaxError(self.pos, "invalid character "+strconv.QuoteRuneToASCII(ch))
 190  	}
 191  }
 192  
 193  func (self *Parser) next() (_Token, error) {
 194  	for {
 195  		var p int
 196  		var c rune
 197  
 198  		/* check for EOF */
 199  		if self.eof() {
 200  			return tokenEnd(self.pos), nil
 201  		}
 202  
 203  		/* read the next char */
 204  		p = self.pos
 205  		c = self.rch()
 206  
 207  		/* parse the token if not a space */
 208  		if !unicode.IsSpace(c) {
 209  			return self.read(p, c)
 210  		}
 211  	}
 212  }
 213  
 214  func (self *Parser) grab(tk _Token, repo Repository) (*Expr, error) {
 215  	if repo == nil {
 216  		return nil, newSyntaxError(tk.pos, "unresolved symbol: "+tk.str())
 217  	} else if term, err := repo.Get(tk.str()); err != nil {
 218  		return nil, err
 219  	} else {
 220  		return Ref(term), nil
 221  	}
 222  }
 223  
 224  func (self *Parser) nest(nest int, repo Repository) (*Expr, error) {
 225  	var err error
 226  	var ret *Expr
 227  	var ntk _Token
 228  
 229  	/* evaluate the nested expression */
 230  	if ret, err = self.expr(0, nest+1, repo); err != nil {
 231  		return nil, err
 232  	}
 233  
 234  	/* must follows with a ')' */
 235  	if ntk, err = self.next(); err != nil {
 236  		return nil, err
 237  	} else if ntk.tag != _T_punc || ntk.u64 != ')' {
 238  		return nil, newSyntaxError(ntk.pos, "')' expected")
 239  	} else {
 240  		return ret, nil
 241  	}
 242  }
 243  
 244  func (self *Parser) unit(nest int, repo Repository) (*Expr, error) {
 245  	if tk, err := self.next(); err != nil {
 246  		return nil, err
 247  	} else if tk.tag == _T_int {
 248  		return Int(int64(tk.u64)), nil
 249  	} else if tk.tag == _T_name {
 250  		return self.grab(tk, repo)
 251  	} else if tk.tag == _T_punc && tk.u64 == '(' {
 252  		return self.nest(nest, repo)
 253  	} else if tk.tag == _T_punc && tk.u64 == '+' {
 254  		return self.unit(nest, repo)
 255  	} else if tk.tag == _T_punc && tk.u64 == '-' {
 256  		return neg2(self.unit(nest, repo))
 257  	} else if tk.tag == _T_punc && tk.u64 == '~' {
 258  		return not2(self.unit(nest, repo))
 259  	} else {
 260  		return nil, newSyntaxError(tk.pos, "integer, unary operator or nested expression expected")
 261  	}
 262  }
 263  
 264  func (self *Parser) term(prec int, nest int, repo Repository) (*Expr, error) {
 265  	var err error
 266  	var val *Expr
 267  
 268  	/* parse the LHS operand */
 269  	if val, err = self.expr(prec+1, nest, repo); err != nil {
 270  		return nil, err
 271  	}
 272  
 273  	/* parse all the operators of the same precedence */
 274  	for {
 275  		var op int
 276  		var rv *Expr
 277  		var tk _Token
 278  
 279  		/* peek the next token */
 280  		pp := self.pos
 281  		tk, err = self.next()
 282  
 283  		/* check for errors */
 284  		if err != nil {
 285  			return nil, err
 286  		}
 287  
 288  		/* encountered EOF */
 289  		if tk.tag == _T_end {
 290  			return val, nil
 291  		}
 292  
 293  		/* must be an operator */
 294  		if tk.tag != _T_punc {
 295  			return nil, newSyntaxError(tk.pos, "operators expected")
 296  		}
 297  
 298  		/* check for the operator precedence */
 299  		if op = int(tk.u64); !precedence[prec][op] {
 300  			self.pos = pp
 301  			return val, nil
 302  		}
 303  
 304  		/* evaluate the RHS operand, and combine the value */
 305  		if rv, err = self.expr(prec+1, nest, repo); err != nil {
 306  			return nil, err
 307  		} else {
 308  			val = binaryOps[op](val, rv)
 309  		}
 310  	}
 311  }
 312  
 313  func (self *Parser) expr(prec int, nest int, repo Repository) (*Expr, error) {
 314  	if prec >= len(precedence) {
 315  		return self.unit(nest, repo)
 316  	} else {
 317  		return self.term(prec, nest, repo)
 318  	}
 319  }
 320  
 321  // Parse parses the expression, and returns it's AST tree.
 322  func (self *Parser) Parse(repo Repository) (*Expr, error) {
 323  	return self.expr(0, 0, repo)
 324  }
 325  
 326  // SetSource resets the expression parser and sets the expression source.
 327  func (self *Parser) SetSource(src string) *Parser {
 328  	self.pos = 0
 329  	self.src = []rune(src)
 330  	return self
 331  }
 332