const.go raw

   1  package cgo
   2  
   3  // This file implements a parser of a subset of the C language, just enough to
   4  // parse common #define statements to Go constant expressions.
   5  
   6  import (
   7  	"fmt"
   8  	"go/ast"
   9  	"go/scanner"
  10  	"go/token"
  11  	"strings"
  12  )
  13  
  14  var (
  15  	prefixParseFns map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error)
  16  	precedences    = map[token.Token]int{
  17  		token.OR:  precedenceOr,
  18  		token.XOR: precedenceXor,
  19  		token.AND: precedenceAnd,
  20  		token.SHL: precedenceShift,
  21  		token.SHR: precedenceShift,
  22  		token.ADD: precedenceAdd,
  23  		token.SUB: precedenceAdd,
  24  		token.MUL: precedenceMul,
  25  		token.QUO: precedenceMul,
  26  		token.REM: precedenceMul,
  27  	}
  28  )
  29  
  30  // See: https://en.cppreference.com/w/c/language/operator_precedence
  31  const (
  32  	precedenceLowest = iota + 1
  33  	precedenceOr
  34  	precedenceXor
  35  	precedenceAnd
  36  	precedenceShift
  37  	precedenceAdd
  38  	precedenceMul
  39  	precedencePrefix
  40  )
  41  
  42  func init() {
  43  	// This must be done in an init function to avoid an initialization order
  44  	// failure.
  45  	prefixParseFns = map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error){
  46  		token.IDENT:  parseIdent,
  47  		token.INT:    parseBasicLit,
  48  		token.FLOAT:  parseBasicLit,
  49  		token.STRING: parseBasicLit,
  50  		token.CHAR:   parseBasicLit,
  51  		token.LPAREN: parseParenExpr,
  52  		token.SUB:    parseUnaryExpr,
  53  	}
  54  }
  55  
  56  // parseConst parses the given string as a C constant.
  57  func parseConst(pos token.Pos, fset *token.FileSet, value string, params []ast.Expr, callerPos token.Pos, f *cgoFile) (ast.Expr, *scanner.Error) {
  58  	t := newTokenizer(pos, fset, value, f)
  59  
  60  	// If params is non-nil (could be a zero length slice), this const is
  61  	// actually a function-call like expression from another macro.
  62  	// This means we have to parse a string like "(a, b) (a+b)".
  63  	// We do this by parsing the parameters at the start and then treating the
  64  	// following like a normal constant expression.
  65  	if params != nil {
  66  		// Parse opening paren.
  67  		if t.curToken != token.LPAREN {
  68  			return nil, unexpectedToken(t, token.LPAREN)
  69  		}
  70  		t.Next()
  71  
  72  		// Parse parameters (identifiers) and closing paren.
  73  		var paramIdents []string
  74  		for i := 0; ; i++ {
  75  			if i == 0 && t.curToken == token.RPAREN {
  76  				// No parameters, break early.
  77  				t.Next()
  78  				break
  79  			}
  80  
  81  			// Read the parameter name.
  82  			if t.curToken != token.IDENT {
  83  				return nil, unexpectedToken(t, token.IDENT)
  84  			}
  85  			paramIdents = append(paramIdents, t.curValue)
  86  			t.Next()
  87  
  88  			// Read the next token: either a continuation (comma) or end of list
  89  			// (rparen).
  90  			if t.curToken == token.RPAREN {
  91  				// End of parameter list.
  92  				t.Next()
  93  				break
  94  			} else if t.curToken == token.COMMA {
  95  				// Comma, so there will be another parameter name.
  96  				t.Next()
  97  			} else {
  98  				return nil, &scanner.Error{
  99  					Pos: t.fset.Position(t.curPos),
 100  					Msg: "unexpected token " + t.curToken.String() + " inside macro parameters, expected ',' or ')'",
 101  				}
 102  			}
 103  		}
 104  
 105  		// Report an error if there is a mismatch in parameter length.
 106  		// The error is reported at the location of the closing paren from the
 107  		// caller location.
 108  		if len(params) != len(paramIdents) {
 109  			return nil, &scanner.Error{
 110  				Pos: t.fset.Position(callerPos),
 111  				Msg: fmt.Sprintf("unexpected number of parameters: expected %d, got %d", len(paramIdents), len(params)),
 112  			}
 113  		}
 114  
 115  		// Assign values to the parameters.
 116  		// These parameter names are closer in 'scope' than other identifiers so
 117  		// will be used first when parsing an identifier.
 118  		for i, name := range paramIdents {
 119  			t.params[name] = params[i]
 120  		}
 121  	}
 122  
 123  	expr, err := parseConstExpr(t, precedenceLowest)
 124  	t.Next()
 125  	if t.curToken != token.EOF {
 126  		return nil, &scanner.Error{
 127  			Pos: t.fset.Position(t.curPos),
 128  			Msg: "unexpected token " + t.curToken.String() + ", expected end of expression",
 129  		}
 130  	}
 131  	return expr, err
 132  }
 133  
 134  // parseConstExpr parses a stream of C tokens to a Go expression.
 135  func parseConstExpr(t *tokenizer, precedence int) (ast.Expr, *scanner.Error) {
 136  	if t.curToken == token.EOF {
 137  		return nil, &scanner.Error{
 138  			Pos: t.fset.Position(t.curPos),
 139  			Msg: "empty constant",
 140  		}
 141  	}
 142  	prefix := prefixParseFns[t.curToken]
 143  	if prefix == nil {
 144  		return nil, &scanner.Error{
 145  			Pos: t.fset.Position(t.curPos),
 146  			Msg: fmt.Sprintf("unexpected token %s", t.curToken),
 147  		}
 148  	}
 149  	leftExpr, err := prefix(t)
 150  
 151  	for t.peekToken != token.EOF && precedence < precedences[t.peekToken] {
 152  		switch t.peekToken {
 153  		case token.OR, token.XOR, token.AND, token.SHL, token.SHR, token.ADD, token.SUB, token.MUL, token.QUO, token.REM:
 154  			t.Next()
 155  			leftExpr, err = parseBinaryExpr(t, leftExpr)
 156  		}
 157  	}
 158  
 159  	return leftExpr, err
 160  }
 161  
 162  func parseIdent(t *tokenizer) (ast.Expr, *scanner.Error) {
 163  	// If the identifier is one of the parameters of this function-like macro,
 164  	// use the parameter value.
 165  	if val, ok := t.params[t.curValue]; ok {
 166  		return val, nil
 167  	}
 168  
 169  	if t.f != nil {
 170  		// Check whether this identifier is actually a macro "call" with
 171  		// parameters. In that case, we should parse the parameters and pass it
 172  		// on to a new invocation of parseConst.
 173  		if t.peekToken == token.LPAREN {
 174  			if cursor, ok := t.f.names[t.curValue]; ok && t.f.isFunctionLikeMacro(cursor) {
 175  				// We know the current and peek tokens (the peek one is the '('
 176  				// token). So skip ahead until the current token is the first
 177  				// unknown token.
 178  				t.Next()
 179  				t.Next()
 180  
 181  				// Parse the list of parameters until ')' (rparen) is found.
 182  				params := []ast.Expr{}
 183  				for i := 0; ; i++ {
 184  					if i == 0 && t.curToken == token.RPAREN {
 185  						break
 186  					}
 187  					x, err := parseConstExpr(t, precedenceLowest)
 188  					if err != nil {
 189  						return nil, err
 190  					}
 191  					params = append(params, x)
 192  					t.Next()
 193  					if t.curToken == token.COMMA {
 194  						t.Next()
 195  					} else if t.curToken == token.RPAREN {
 196  						break
 197  					} else {
 198  						return nil, &scanner.Error{
 199  							Pos: t.fset.Position(t.curPos),
 200  							Msg: "unexpected token " + t.curToken.String() + ", ',' or ')'",
 201  						}
 202  					}
 203  				}
 204  
 205  				// Evaluate the macro value and use it as the identifier value.
 206  				rparen := t.curPos
 207  				pos, text := t.f.getMacro(cursor)
 208  				return parseConst(pos, t.fset, text, params, rparen, t.f)
 209  			}
 210  		}
 211  
 212  		// Normally the name is something defined in the file (like another
 213  		// macro) which we get the declaration from using getASTDeclName.
 214  		// This ensures that names that are only referenced inside a macro are
 215  		// still getting defined.
 216  		if cursor, ok := t.f.names[t.curValue]; ok {
 217  			return &ast.Ident{
 218  				NamePos: t.curPos,
 219  				Name:    t.f.getASTDeclName(t.curValue, cursor, false),
 220  			}, nil
 221  		}
 222  	}
 223  
 224  	// t.f is nil during testing. This is a fallback.
 225  	return &ast.Ident{
 226  		NamePos: t.curPos,
 227  		Name:    "C." + t.curValue,
 228  	}, nil
 229  }
 230  
 231  func parseBasicLit(t *tokenizer) (ast.Expr, *scanner.Error) {
 232  	return &ast.BasicLit{
 233  		ValuePos: t.curPos,
 234  		Kind:     t.curToken,
 235  		Value:    t.curValue,
 236  	}, nil
 237  }
 238  
 239  func parseParenExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
 240  	lparen := t.curPos
 241  	t.Next()
 242  	x, err := parseConstExpr(t, precedenceLowest)
 243  	if err != nil {
 244  		return nil, err
 245  	}
 246  	t.Next()
 247  	if t.curToken != token.RPAREN {
 248  		return nil, unexpectedToken(t, token.RPAREN)
 249  	}
 250  	expr := &ast.ParenExpr{
 251  		Lparen: lparen,
 252  		X:      x,
 253  		Rparen: t.curPos,
 254  	}
 255  	return expr, nil
 256  }
 257  
 258  func parseBinaryExpr(t *tokenizer, left ast.Expr) (ast.Expr, *scanner.Error) {
 259  	expression := &ast.BinaryExpr{
 260  		X:     left,
 261  		Op:    t.curToken,
 262  		OpPos: t.curPos,
 263  	}
 264  	precedence := precedences[t.curToken]
 265  	t.Next()
 266  	right, err := parseConstExpr(t, precedence)
 267  	expression.Y = right
 268  	return expression, err
 269  }
 270  
 271  func parseUnaryExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
 272  	expression := &ast.UnaryExpr{
 273  		OpPos: t.curPos,
 274  		Op:    t.curToken,
 275  	}
 276  	t.Next()
 277  	x, err := parseConstExpr(t, precedencePrefix)
 278  	expression.X = x
 279  	return expression, err
 280  }
 281  
 282  // unexpectedToken returns an error of the form "unexpected token FOO, expected
 283  // BAR".
 284  func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
 285  	return &scanner.Error{
 286  		Pos: t.fset.Position(t.curPos),
 287  		Msg: fmt.Sprintf("unexpected token %s, expected %s", t.curToken, expected),
 288  	}
 289  }
 290  
 291  // tokenizer reads C source code and converts it to Go tokens.
 292  type tokenizer struct {
 293  	f                   *cgoFile
 294  	curPos, peekPos     token.Pos
 295  	fset                *token.FileSet
 296  	curToken, peekToken token.Token
 297  	curValue, peekValue string
 298  	buf                 string
 299  	params              map[string]ast.Expr
 300  }
 301  
 302  // newTokenizer initializes a new tokenizer, positioned at the first token in
 303  // the string.
 304  func newTokenizer(start token.Pos, fset *token.FileSet, buf string, f *cgoFile) *tokenizer {
 305  	t := &tokenizer{
 306  		f:         f,
 307  		peekPos:   start,
 308  		fset:      fset,
 309  		buf:       buf,
 310  		peekToken: token.ILLEGAL,
 311  		params:    make(map[string]ast.Expr),
 312  	}
 313  	// Parse the first two tokens (cur and peek).
 314  	t.Next()
 315  	t.Next()
 316  	return t
 317  }
 318  
 319  // Next consumes the next token in the stream. There is no return value, read
 320  // the next token from the pos, token and value properties.
 321  func (t *tokenizer) Next() {
 322  	// The previous peek is now the current token.
 323  	t.curPos = t.peekPos
 324  	t.curToken = t.peekToken
 325  	t.curValue = t.peekValue
 326  
 327  	// Parse the next peek token.
 328  	if t.peekPos != token.NoPos {
 329  		t.peekPos += token.Pos(len(t.curValue))
 330  	}
 331  	for {
 332  		if len(t.buf) == 0 {
 333  			t.peekToken = token.EOF
 334  			return
 335  		}
 336  		c := t.buf[0]
 337  		switch {
 338  		case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v':
 339  			// Skip whitespace.
 340  			// Based on this source, not sure whether it represents C whitespace:
 341  			// https://en.cppreference.com/w/cpp/string/byte/isspace
 342  			if t.peekPos != token.NoPos {
 343  				t.peekPos++
 344  			}
 345  			t.buf = t.buf[1:]
 346  		case len(t.buf) >= 2 && (string(t.buf[:2]) == "||" || string(t.buf[:2]) == "&&" || string(t.buf[:2]) == "<<" || string(t.buf[:2]) == ">>"):
 347  			// Two-character tokens.
 348  			switch c {
 349  			case '&':
 350  				t.peekToken = token.LAND
 351  			case '|':
 352  				t.peekToken = token.LOR
 353  			case '<':
 354  				t.peekToken = token.SHL
 355  			case '>':
 356  				t.peekToken = token.SHR
 357  			default:
 358  				panic("unreachable")
 359  			}
 360  			t.peekValue = t.buf[:2]
 361  			t.buf = t.buf[2:]
 362  			return
 363  		case c == '(' || c == ')' || c == ',' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^':
 364  			// Single-character tokens.
 365  			// TODO: ++ (increment) and -- (decrement) operators.
 366  			switch c {
 367  			case '(':
 368  				t.peekToken = token.LPAREN
 369  			case ')':
 370  				t.peekToken = token.RPAREN
 371  			case ',':
 372  				t.peekToken = token.COMMA
 373  			case '+':
 374  				t.peekToken = token.ADD
 375  			case '-':
 376  				t.peekToken = token.SUB
 377  			case '*':
 378  				t.peekToken = token.MUL
 379  			case '/':
 380  				t.peekToken = token.QUO
 381  			case '%':
 382  				t.peekToken = token.REM
 383  			case '&':
 384  				t.peekToken = token.AND
 385  			case '|':
 386  				t.peekToken = token.OR
 387  			case '^':
 388  				t.peekToken = token.XOR
 389  			}
 390  			t.peekValue = t.buf[:1]
 391  			t.buf = t.buf[1:]
 392  			return
 393  		case c >= '0' && c <= '9':
 394  			// Numeric constant (int, float, etc.).
 395  			// Find the last non-numeric character.
 396  			tokenLen := len(t.buf)
 397  			hasDot := false
 398  			for i, c := range t.buf {
 399  				if c == '.' {
 400  					hasDot = true
 401  				}
 402  				if c >= '0' && c <= '9' || c == '.' || c == '_' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' {
 403  					tokenLen = i + 1
 404  				} else {
 405  					break
 406  				}
 407  			}
 408  			t.peekValue = t.buf[:tokenLen]
 409  			t.buf = t.buf[tokenLen:]
 410  			if hasDot {
 411  				// Integer constants are more complicated than this but this is
 412  				// a close approximation.
 413  				// https://en.cppreference.com/w/cpp/language/integer_literal
 414  				t.peekToken = token.FLOAT
 415  				t.peekValue = strings.TrimRight(t.peekValue, "f")
 416  			} else {
 417  				t.peekToken = token.INT
 418  				t.peekValue = strings.TrimRight(t.peekValue, "uUlL")
 419  			}
 420  			return
 421  		case c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_':
 422  			// Identifier. Find all remaining tokens that are part of this
 423  			// identifier.
 424  			tokenLen := len(t.buf)
 425  			for i, c := range t.buf {
 426  				if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_' {
 427  					tokenLen = i + 1
 428  				} else {
 429  					break
 430  				}
 431  			}
 432  			t.peekValue = t.buf[:tokenLen]
 433  			t.buf = t.buf[tokenLen:]
 434  			t.peekToken = token.IDENT
 435  			return
 436  		case c == '"':
 437  			// String constant. Find the first '"' character that is not
 438  			// preceded by a backslash.
 439  			escape := false
 440  			tokenLen := len(t.buf)
 441  			for i, c := range t.buf {
 442  				if i != 0 && c == '"' && !escape {
 443  					tokenLen = i + 1
 444  					break
 445  				}
 446  				if !escape {
 447  					escape = c == '\\'
 448  				}
 449  			}
 450  			t.peekToken = token.STRING
 451  			t.peekValue = t.buf[:tokenLen]
 452  			t.buf = t.buf[tokenLen:]
 453  			return
 454  		case c == '\'':
 455  			// Char (rune) constant. Find the first '\'' character that is not
 456  			// preceded by a backslash.
 457  			escape := false
 458  			tokenLen := len(t.buf)
 459  			for i, c := range t.buf {
 460  				if i != 0 && c == '\'' && !escape {
 461  					tokenLen = i + 1
 462  					break
 463  				}
 464  				if !escape {
 465  					escape = c == '\\'
 466  				}
 467  			}
 468  			t.peekToken = token.CHAR
 469  			t.peekValue = t.buf[:tokenLen]
 470  			t.buf = t.buf[tokenLen:]
 471  			return
 472  		default:
 473  			t.peekToken = token.ILLEGAL
 474  			return
 475  		}
 476  	}
 477  }
 478