token.mx raw
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Package token defines constants representing the lexical tokens of the Go
6 // programming language and basic operations on tokens (printing, predicates).
7 package token
8
9 import (
10 "strconv"
11 "unicode"
12 "unicode/utf8"
13 )
14
15 // Token is the set of lexical tokens of the Go programming language.
16 type Token int
17
18 // The list of tokens.
19 const (
20 // Special tokens
21 ILLEGAL Token = iota
22 EOF
23 COMMENT
24
25 literal_beg
26 // Identifiers and basic type literals
27 // (these tokens stand for classes of literals)
28 IDENT // main
29 INT // 12345
30 FLOAT // 123.45
31 IMAG // 123.45i
32 CHAR // 'a'
33 STRING // "abc"
34 literal_end
35
36 operator_beg
37 // Operators and delimiters
38 ADD // +
39 SUB // -
40 MUL // *
41 QUO // /
42 REM // %
43
44 AND // &
45 OR // |
46 XOR // ^
47 SHL // <<
48 SHR // >>
49 AND_NOT // &^
50
51 ADD_ASSIGN // +=
52 SUB_ASSIGN // -=
53 MUL_ASSIGN // *=
54 QUO_ASSIGN // /=
55 REM_ASSIGN // %=
56
57 AND_ASSIGN // &=
58 OR_ASSIGN // |=
59 XOR_ASSIGN // ^=
60 SHL_ASSIGN // <<=
61 SHR_ASSIGN // >>=
62 AND_NOT_ASSIGN // &^=
63
64 LAND // &&
65 LOR // ||
66 ARROW // <-
67 INC // ++
68 DEC // --
69
70 EQL // ==
71 LSS // <
72 GTR // >
73 ASSIGN // =
74 NOT // !
75
76 NEQ // !=
77 LEQ // <=
78 GEQ // >=
79 DEFINE // :=
80 ELLIPSIS // ...
81
82 LPAREN // (
83 LBRACK // [
84 LBRACE // {
85 COMMA // ,
86 PERIOD // .
87
88 RPAREN // )
89 RBRACK // ]
90 RBRACE // }
91 SEMICOLON // ;
92 COLON // :
93 operator_end
94
95 keyword_beg
96 // Keywords
97 BREAK
98 CASE
99 CHAN
100 CONST
101 CONTINUE
102
103 DEFAULT
104 DEFER
105 ELSE
106 FALLTHROUGH
107 FOR
108
109 FUNC
110 GO
111 GOTO
112 IF
113 IMPORT
114
115 INTERFACE
116 MAP
117 PACKAGE
118 RANGE
119 RETURN
120
121 SELECT
122 STRUCT
123 SWITCH
124 TYPE
125 VAR
126 keyword_end
127
128 additional_beg
129 // additional tokens, handled in an ad-hoc manner
130 TILDE
131 additional_end
132 )
133
134 var tokens = [...][]byte{
135 ILLEGAL: "ILLEGAL",
136
137 EOF: "EOF",
138 COMMENT: "COMMENT",
139
140 IDENT: "IDENT",
141 INT: "INT",
142 FLOAT: "FLOAT",
143 IMAG: "IMAG",
144 CHAR: "CHAR",
145 STRING: "STRING",
146
147 ADD: "+",
148 SUB: "-",
149 MUL: "*",
150 QUO: "/",
151 REM: "%",
152
153 AND: "&",
154 OR: "|",
155 XOR: "^",
156 SHL: "<<",
157 SHR: ">>",
158 AND_NOT: "&^",
159
160 ADD_ASSIGN: "+=",
161 SUB_ASSIGN: "-=",
162 MUL_ASSIGN: "*=",
163 QUO_ASSIGN: "/=",
164 REM_ASSIGN: "%=",
165
166 AND_ASSIGN: "&=",
167 OR_ASSIGN: "|=",
168 XOR_ASSIGN: "^=",
169 SHL_ASSIGN: "<<=",
170 SHR_ASSIGN: ">>=",
171 AND_NOT_ASSIGN: "&^=",
172
173 LAND: "&&",
174 LOR: "||",
175 ARROW: "<-",
176 INC: "++",
177 DEC: "--",
178
179 EQL: "==",
180 LSS: "<",
181 GTR: ">",
182 ASSIGN: "=",
183 NOT: "!",
184
185 NEQ: "!=",
186 LEQ: "<=",
187 GEQ: ">=",
188 DEFINE: ":=",
189 ELLIPSIS: "...",
190
191 LPAREN: "(",
192 LBRACK: "[",
193 LBRACE: "{",
194 COMMA: ",",
195 PERIOD: ".",
196
197 RPAREN: ")",
198 RBRACK: "]",
199 RBRACE: "}",
200 SEMICOLON: ";",
201 COLON: ":",
202
203 BREAK: "break",
204 CASE: "case",
205 CHAN: "chan",
206 CONST: "const",
207 CONTINUE: "continue",
208
209 DEFAULT: "default",
210 DEFER: "defer",
211 ELSE: "else",
212 FALLTHROUGH: "fallthrough",
213 FOR: "for",
214
215 FUNC: "func",
216 GO: "go",
217 GOTO: "goto",
218 IF: "if",
219 IMPORT: "import",
220
221 INTERFACE: "interface",
222 MAP: "map",
223 PACKAGE: "package",
224 RANGE: "range",
225 RETURN: "return",
226
227 SELECT: "select",
228 STRUCT: "struct",
229 SWITCH: "switch",
230 TYPE: "type",
231 VAR: "var",
232
233 TILDE: "~",
234 }
235
236 // String returns the string corresponding to the token tok.
237 // For operators, delimiters, and keywords the string is the actual
238 // token character sequence (e.g., for the token [ADD], the string is
239 // "+"). For all other tokens the string corresponds to the token
240 // constant name (e.g. for the token [IDENT], the string is "IDENT").
241 func (tok Token) String() string {
242 s := ""
243 if 0 <= tok && tok < Token(len(tokens)) {
244 s = tokens[tok]
245 }
246 if s == "" {
247 s = "token(" + strconv.Itoa(int(tok)) + ")"
248 }
249 return s
250 }
251
252 // A set of constants for precedence-based expression parsing.
253 // Non-operators have lowest precedence, followed by operators
254 // starting with precedence 1 up to unary operators. The highest
255 // precedence serves as "catch-all" precedence for selector,
256 // indexing, and other operator and delimiter tokens.
257 const (
258 LowestPrec = 0 // non-operators
259 UnaryPrec = 6
260 HighestPrec = 7
261 )
262
263 // Precedence returns the operator precedence of the binary
264 // operator op. If op is not a binary operator, the result
265 // is LowestPrecedence.
266 func (op Token) Precedence() int {
267 switch op {
268 case LOR:
269 return 1
270 case LAND:
271 return 2
272 case EQL, NEQ, LSS, LEQ, GTR, GEQ:
273 return 3
274 case ADD, SUB, OR, XOR:
275 return 4
276 case MUL, QUO, REM, SHL, SHR, AND, AND_NOT:
277 return 5
278 }
279 return LowestPrec
280 }
281
282 var keywords map[string]Token
283
284 func init() {
285 keywords = map[string]Token{}
286 for i := keyword_beg + 1; i < keyword_end; i++ {
287 keywords[tokens[i]] = i
288 }
289 }
290
291 // Lookup maps an identifier to its keyword token or [IDENT] (if not a keyword).
292 func Lookup(ident []byte) Token {
293 if tok, is_keyword := keywords[ident]; is_keyword {
294 return tok
295 }
296 return IDENT
297 }
298
299 // Predicates
300
301 // IsLiteral returns true for tokens corresponding to identifiers
302 // and basic type literals; it returns false otherwise.
303 func (tok Token) IsLiteral() bool { return literal_beg < tok && tok < literal_end }
304
305 // IsOperator returns true for tokens corresponding to operators and
306 // delimiters; it returns false otherwise.
307 func (tok Token) IsOperator() bool {
308 return (operator_beg < tok && tok < operator_end) || tok == TILDE
309 }
310
311 // IsKeyword returns true for tokens corresponding to keywords;
312 // it returns false otherwise.
313 func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end }
314
315 // IsExported reports whether name starts with an upper-case letter.
316 func IsExported(name []byte) bool {
317 ch, _ := utf8.DecodeRuneInString(name)
318 return unicode.IsUpper(ch)
319 }
320
321 // IsKeyword reports whether name is a Go keyword, such as "func" or "return".
322 func IsKeyword(name []byte) bool {
323 // TODO: opt: use a perfect hash function instead of a global map.
324 _, ok := keywords[name]
325 return ok
326 }
327
328 // IsIdentifier reports whether name is a Go identifier, that is, a non-empty
329 // string made up of letters, digits, and underscores, where the first character
330 // is not a digit. Keywords are not identifiers.
331 func IsIdentifier(name []byte) bool {
332 if name == "" || IsKeyword(name) {
333 return false
334 }
335 for i, c := range name {
336 if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
337 return false
338 }
339 }
340 return true
341 }
342