const.go raw
1 package cgo
2
3 // This file implements a parser of a subset of the C language, just enough to
4 // parse common #define statements to Go constant expressions.
5
6 import (
7 "fmt"
8 "go/ast"
9 "go/scanner"
10 "go/token"
11 "strings"
12 )
13
14 var (
15 prefixParseFns map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error)
16 precedences = map[token.Token]int{
17 token.OR: precedenceOr,
18 token.XOR: precedenceXor,
19 token.AND: precedenceAnd,
20 token.SHL: precedenceShift,
21 token.SHR: precedenceShift,
22 token.ADD: precedenceAdd,
23 token.SUB: precedenceAdd,
24 token.MUL: precedenceMul,
25 token.QUO: precedenceMul,
26 token.REM: precedenceMul,
27 }
28 )
29
30 // See: https://en.cppreference.com/w/c/language/operator_precedence
31 const (
32 precedenceLowest = iota + 1
33 precedenceOr
34 precedenceXor
35 precedenceAnd
36 precedenceShift
37 precedenceAdd
38 precedenceMul
39 precedencePrefix
40 )
41
42 func init() {
43 // This must be done in an init function to avoid an initialization order
44 // failure.
45 prefixParseFns = map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error){
46 token.IDENT: parseIdent,
47 token.INT: parseBasicLit,
48 token.FLOAT: parseBasicLit,
49 token.STRING: parseBasicLit,
50 token.CHAR: parseBasicLit,
51 token.LPAREN: parseParenExpr,
52 token.SUB: parseUnaryExpr,
53 }
54 }
55
56 // parseConst parses the given string as a C constant.
57 func parseConst(pos token.Pos, fset *token.FileSet, value string, params []ast.Expr, callerPos token.Pos, f *cgoFile) (ast.Expr, *scanner.Error) {
58 t := newTokenizer(pos, fset, value, f)
59
60 // If params is non-nil (could be a zero length slice), this const is
61 // actually a function-call like expression from another macro.
62 // This means we have to parse a string like "(a, b) (a+b)".
63 // We do this by parsing the parameters at the start and then treating the
64 // following like a normal constant expression.
65 if params != nil {
66 // Parse opening paren.
67 if t.curToken != token.LPAREN {
68 return nil, unexpectedToken(t, token.LPAREN)
69 }
70 t.Next()
71
72 // Parse parameters (identifiers) and closing paren.
73 var paramIdents []string
74 for i := 0; ; i++ {
75 if i == 0 && t.curToken == token.RPAREN {
76 // No parameters, break early.
77 t.Next()
78 break
79 }
80
81 // Read the parameter name.
82 if t.curToken != token.IDENT {
83 return nil, unexpectedToken(t, token.IDENT)
84 }
85 paramIdents = append(paramIdents, t.curValue)
86 t.Next()
87
88 // Read the next token: either a continuation (comma) or end of list
89 // (rparen).
90 if t.curToken == token.RPAREN {
91 // End of parameter list.
92 t.Next()
93 break
94 } else if t.curToken == token.COMMA {
95 // Comma, so there will be another parameter name.
96 t.Next()
97 } else {
98 return nil, &scanner.Error{
99 Pos: t.fset.Position(t.curPos),
100 Msg: "unexpected token " + t.curToken.String() + " inside macro parameters, expected ',' or ')'",
101 }
102 }
103 }
104
105 // Report an error if there is a mismatch in parameter length.
106 // The error is reported at the location of the closing paren from the
107 // caller location.
108 if len(params) != len(paramIdents) {
109 return nil, &scanner.Error{
110 Pos: t.fset.Position(callerPos),
111 Msg: fmt.Sprintf("unexpected number of parameters: expected %d, got %d", len(paramIdents), len(params)),
112 }
113 }
114
115 // Assign values to the parameters.
116 // These parameter names are closer in 'scope' than other identifiers so
117 // will be used first when parsing an identifier.
118 for i, name := range paramIdents {
119 t.params[name] = params[i]
120 }
121 }
122
123 expr, err := parseConstExpr(t, precedenceLowest)
124 t.Next()
125 if t.curToken != token.EOF {
126 return nil, &scanner.Error{
127 Pos: t.fset.Position(t.curPos),
128 Msg: "unexpected token " + t.curToken.String() + ", expected end of expression",
129 }
130 }
131 return expr, err
132 }
133
134 // parseConstExpr parses a stream of C tokens to a Go expression.
135 func parseConstExpr(t *tokenizer, precedence int) (ast.Expr, *scanner.Error) {
136 if t.curToken == token.EOF {
137 return nil, &scanner.Error{
138 Pos: t.fset.Position(t.curPos),
139 Msg: "empty constant",
140 }
141 }
142 prefix := prefixParseFns[t.curToken]
143 if prefix == nil {
144 return nil, &scanner.Error{
145 Pos: t.fset.Position(t.curPos),
146 Msg: fmt.Sprintf("unexpected token %s", t.curToken),
147 }
148 }
149 leftExpr, err := prefix(t)
150
151 for t.peekToken != token.EOF && precedence < precedences[t.peekToken] {
152 switch t.peekToken {
153 case token.OR, token.XOR, token.AND, token.SHL, token.SHR, token.ADD, token.SUB, token.MUL, token.QUO, token.REM:
154 t.Next()
155 leftExpr, err = parseBinaryExpr(t, leftExpr)
156 }
157 }
158
159 return leftExpr, err
160 }
161
162 func parseIdent(t *tokenizer) (ast.Expr, *scanner.Error) {
163 // If the identifier is one of the parameters of this function-like macro,
164 // use the parameter value.
165 if val, ok := t.params[t.curValue]; ok {
166 return val, nil
167 }
168
169 if t.f != nil {
170 // Check whether this identifier is actually a macro "call" with
171 // parameters. In that case, we should parse the parameters and pass it
172 // on to a new invocation of parseConst.
173 if t.peekToken == token.LPAREN {
174 if cursor, ok := t.f.names[t.curValue]; ok && t.f.isFunctionLikeMacro(cursor) {
175 // We know the current and peek tokens (the peek one is the '('
176 // token). So skip ahead until the current token is the first
177 // unknown token.
178 t.Next()
179 t.Next()
180
181 // Parse the list of parameters until ')' (rparen) is found.
182 params := []ast.Expr{}
183 for i := 0; ; i++ {
184 if i == 0 && t.curToken == token.RPAREN {
185 break
186 }
187 x, err := parseConstExpr(t, precedenceLowest)
188 if err != nil {
189 return nil, err
190 }
191 params = append(params, x)
192 t.Next()
193 if t.curToken == token.COMMA {
194 t.Next()
195 } else if t.curToken == token.RPAREN {
196 break
197 } else {
198 return nil, &scanner.Error{
199 Pos: t.fset.Position(t.curPos),
200 Msg: "unexpected token " + t.curToken.String() + ", ',' or ')'",
201 }
202 }
203 }
204
205 // Evaluate the macro value and use it as the identifier value.
206 rparen := t.curPos
207 pos, text := t.f.getMacro(cursor)
208 return parseConst(pos, t.fset, text, params, rparen, t.f)
209 }
210 }
211
212 // Normally the name is something defined in the file (like another
213 // macro) which we get the declaration from using getASTDeclName.
214 // This ensures that names that are only referenced inside a macro are
215 // still getting defined.
216 if cursor, ok := t.f.names[t.curValue]; ok {
217 return &ast.Ident{
218 NamePos: t.curPos,
219 Name: t.f.getASTDeclName(t.curValue, cursor, false),
220 }, nil
221 }
222 }
223
224 // t.f is nil during testing. This is a fallback.
225 return &ast.Ident{
226 NamePos: t.curPos,
227 Name: "C." + t.curValue,
228 }, nil
229 }
230
231 func parseBasicLit(t *tokenizer) (ast.Expr, *scanner.Error) {
232 return &ast.BasicLit{
233 ValuePos: t.curPos,
234 Kind: t.curToken,
235 Value: t.curValue,
236 }, nil
237 }
238
239 func parseParenExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
240 lparen := t.curPos
241 t.Next()
242 x, err := parseConstExpr(t, precedenceLowest)
243 if err != nil {
244 return nil, err
245 }
246 t.Next()
247 if t.curToken != token.RPAREN {
248 return nil, unexpectedToken(t, token.RPAREN)
249 }
250 expr := &ast.ParenExpr{
251 Lparen: lparen,
252 X: x,
253 Rparen: t.curPos,
254 }
255 return expr, nil
256 }
257
258 func parseBinaryExpr(t *tokenizer, left ast.Expr) (ast.Expr, *scanner.Error) {
259 expression := &ast.BinaryExpr{
260 X: left,
261 Op: t.curToken,
262 OpPos: t.curPos,
263 }
264 precedence := precedences[t.curToken]
265 t.Next()
266 right, err := parseConstExpr(t, precedence)
267 expression.Y = right
268 return expression, err
269 }
270
271 func parseUnaryExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
272 expression := &ast.UnaryExpr{
273 OpPos: t.curPos,
274 Op: t.curToken,
275 }
276 t.Next()
277 x, err := parseConstExpr(t, precedencePrefix)
278 expression.X = x
279 return expression, err
280 }
281
282 // unexpectedToken returns an error of the form "unexpected token FOO, expected
283 // BAR".
284 func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
285 return &scanner.Error{
286 Pos: t.fset.Position(t.curPos),
287 Msg: fmt.Sprintf("unexpected token %s, expected %s", t.curToken, expected),
288 }
289 }
290
291 // tokenizer reads C source code and converts it to Go tokens.
292 type tokenizer struct {
293 f *cgoFile
294 curPos, peekPos token.Pos
295 fset *token.FileSet
296 curToken, peekToken token.Token
297 curValue, peekValue string
298 buf string
299 params map[string]ast.Expr
300 }
301
302 // newTokenizer initializes a new tokenizer, positioned at the first token in
303 // the string.
304 func newTokenizer(start token.Pos, fset *token.FileSet, buf string, f *cgoFile) *tokenizer {
305 t := &tokenizer{
306 f: f,
307 peekPos: start,
308 fset: fset,
309 buf: buf,
310 peekToken: token.ILLEGAL,
311 params: make(map[string]ast.Expr),
312 }
313 // Parse the first two tokens (cur and peek).
314 t.Next()
315 t.Next()
316 return t
317 }
318
319 // Next consumes the next token in the stream. There is no return value, read
320 // the next token from the pos, token and value properties.
321 func (t *tokenizer) Next() {
322 // The previous peek is now the current token.
323 t.curPos = t.peekPos
324 t.curToken = t.peekToken
325 t.curValue = t.peekValue
326
327 // Parse the next peek token.
328 if t.peekPos != token.NoPos {
329 t.peekPos += token.Pos(len(t.curValue))
330 }
331 for {
332 if len(t.buf) == 0 {
333 t.peekToken = token.EOF
334 return
335 }
336 c := t.buf[0]
337 switch {
338 case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v':
339 // Skip whitespace.
340 // Based on this source, not sure whether it represents C whitespace:
341 // https://en.cppreference.com/w/cpp/string/byte/isspace
342 if t.peekPos != token.NoPos {
343 t.peekPos++
344 }
345 t.buf = t.buf[1:]
346 case len(t.buf) >= 2 && (string(t.buf[:2]) == "||" || string(t.buf[:2]) == "&&" || string(t.buf[:2]) == "<<" || string(t.buf[:2]) == ">>"):
347 // Two-character tokens.
348 switch c {
349 case '&':
350 t.peekToken = token.LAND
351 case '|':
352 t.peekToken = token.LOR
353 case '<':
354 t.peekToken = token.SHL
355 case '>':
356 t.peekToken = token.SHR
357 default:
358 panic("unreachable")
359 }
360 t.peekValue = t.buf[:2]
361 t.buf = t.buf[2:]
362 return
363 case c == '(' || c == ')' || c == ',' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^':
364 // Single-character tokens.
365 // TODO: ++ (increment) and -- (decrement) operators.
366 switch c {
367 case '(':
368 t.peekToken = token.LPAREN
369 case ')':
370 t.peekToken = token.RPAREN
371 case ',':
372 t.peekToken = token.COMMA
373 case '+':
374 t.peekToken = token.ADD
375 case '-':
376 t.peekToken = token.SUB
377 case '*':
378 t.peekToken = token.MUL
379 case '/':
380 t.peekToken = token.QUO
381 case '%':
382 t.peekToken = token.REM
383 case '&':
384 t.peekToken = token.AND
385 case '|':
386 t.peekToken = token.OR
387 case '^':
388 t.peekToken = token.XOR
389 }
390 t.peekValue = t.buf[:1]
391 t.buf = t.buf[1:]
392 return
393 case c >= '0' && c <= '9':
394 // Numeric constant (int, float, etc.).
395 // Find the last non-numeric character.
396 tokenLen := len(t.buf)
397 hasDot := false
398 for i, c := range t.buf {
399 if c == '.' {
400 hasDot = true
401 }
402 if c >= '0' && c <= '9' || c == '.' || c == '_' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' {
403 tokenLen = i + 1
404 } else {
405 break
406 }
407 }
408 t.peekValue = t.buf[:tokenLen]
409 t.buf = t.buf[tokenLen:]
410 if hasDot {
411 // Integer constants are more complicated than this but this is
412 // a close approximation.
413 // https://en.cppreference.com/w/cpp/language/integer_literal
414 t.peekToken = token.FLOAT
415 t.peekValue = strings.TrimRight(t.peekValue, "f")
416 } else {
417 t.peekToken = token.INT
418 t.peekValue = strings.TrimRight(t.peekValue, "uUlL")
419 }
420 return
421 case c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_':
422 // Identifier. Find all remaining tokens that are part of this
423 // identifier.
424 tokenLen := len(t.buf)
425 for i, c := range t.buf {
426 if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_' {
427 tokenLen = i + 1
428 } else {
429 break
430 }
431 }
432 t.peekValue = t.buf[:tokenLen]
433 t.buf = t.buf[tokenLen:]
434 t.peekToken = token.IDENT
435 return
436 case c == '"':
437 // String constant. Find the first '"' character that is not
438 // preceded by a backslash.
439 escape := false
440 tokenLen := len(t.buf)
441 for i, c := range t.buf {
442 if i != 0 && c == '"' && !escape {
443 tokenLen = i + 1
444 break
445 }
446 if !escape {
447 escape = c == '\\'
448 }
449 }
450 t.peekToken = token.STRING
451 t.peekValue = t.buf[:tokenLen]
452 t.buf = t.buf[tokenLen:]
453 return
454 case c == '\'':
455 // Char (rune) constant. Find the first '\'' character that is not
456 // preceded by a backslash.
457 escape := false
458 tokenLen := len(t.buf)
459 for i, c := range t.buf {
460 if i != 0 && c == '\'' && !escape {
461 tokenLen = i + 1
462 break
463 }
464 if !escape {
465 escape = c == '\\'
466 }
467 }
468 t.peekToken = token.CHAR
469 t.peekValue = t.buf[:tokenLen]
470 t.buf = t.buf[tokenLen:]
471 return
472 default:
473 t.peekToken = token.ILLEGAL
474 return
475 }
476 }
477 }
478