token.mx raw

   1  // Copyright 2009 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package token defines constants representing the lexical tokens of the Go
   6  // programming language and basic operations on tokens (printing, predicates).
   7  package token
   8  
   9  import (
  10  	"strconv"
  11  	"unicode"
  12  	"unicode/utf8"
  13  )
  14  
  15  // Token is the set of lexical tokens of the Go programming language.
  16  type Token int
  17  
  18  // The list of tokens.
  19  const (
  20  	// Special tokens
  21  	ILLEGAL Token = iota
  22  	EOF
  23  	COMMENT
  24  
  25  	literal_beg
  26  	// Identifiers and basic type literals
  27  	// (these tokens stand for classes of literals)
  28  	IDENT  // main
  29  	INT    // 12345
  30  	FLOAT  // 123.45
  31  	IMAG   // 123.45i
  32  	CHAR   // 'a'
  33  	STRING // "abc"
  34  	literal_end
  35  
  36  	operator_beg
  37  	// Operators and delimiters
  38  	ADD // +
  39  	SUB // -
  40  	MUL // *
  41  	QUO // /
  42  	REM // %
  43  
  44  	AND     // &
  45  	OR      // |
  46  	XOR     // ^
  47  	SHL     // <<
  48  	SHR     // >>
  49  	AND_NOT // &^
  50  
  51  	ADD_ASSIGN // +=
  52  	SUB_ASSIGN // -=
  53  	MUL_ASSIGN // *=
  54  	QUO_ASSIGN // /=
  55  	REM_ASSIGN // %=
  56  
  57  	AND_ASSIGN     // &=
  58  	OR_ASSIGN      // |=
  59  	XOR_ASSIGN     // ^=
  60  	SHL_ASSIGN     // <<=
  61  	SHR_ASSIGN     // >>=
  62  	AND_NOT_ASSIGN // &^=
  63  
  64  	LAND  // &&
  65  	LOR   // ||
  66  	ARROW // <-
  67  	INC   // ++
  68  	DEC   // --
  69  
  70  	EQL    // ==
  71  	LSS    // <
  72  	GTR    // >
  73  	ASSIGN // =
  74  	NOT    // !
  75  
  76  	NEQ      // !=
  77  	LEQ      // <=
  78  	GEQ      // >=
  79  	DEFINE   // :=
  80  	ELLIPSIS // ...
  81  
  82  	LPAREN // (
  83  	LBRACK // [
  84  	LBRACE // {
  85  	COMMA  // ,
  86  	PERIOD // .
  87  
  88  	RPAREN    // )
  89  	RBRACK    // ]
  90  	RBRACE    // }
  91  	SEMICOLON // ;
  92  	COLON     // :
  93  	operator_end
  94  
  95  	keyword_beg
  96  	// Keywords
  97  	BREAK
  98  	CASE
  99  	CHAN
 100  	CONST
 101  	CONTINUE
 102  
 103  	DEFAULT
 104  	DEFER
 105  	ELSE
 106  	FALLTHROUGH
 107  	FOR
 108  
 109  	FUNC
 110  	GO
 111  	GOTO
 112  	IF
 113  	IMPORT
 114  
 115  	INTERFACE
 116  	MAP
 117  	PACKAGE
 118  	RANGE
 119  	RETURN
 120  
 121  	SELECT
 122  	STRUCT
 123  	SWITCH
 124  	TYPE
 125  	VAR
 126  	keyword_end
 127  
 128  	additional_beg
 129  	// additional tokens, handled in an ad-hoc manner
 130  	TILDE
 131  	additional_end
 132  )
 133  
 134  var tokens = [...][]byte{
 135  	ILLEGAL: "ILLEGAL",
 136  
 137  	EOF:     "EOF",
 138  	COMMENT: "COMMENT",
 139  
 140  	IDENT:  "IDENT",
 141  	INT:    "INT",
 142  	FLOAT:  "FLOAT",
 143  	IMAG:   "IMAG",
 144  	CHAR:   "CHAR",
 145  	STRING: "STRING",
 146  
 147  	ADD: "+",
 148  	SUB: "-",
 149  	MUL: "*",
 150  	QUO: "/",
 151  	REM: "%",
 152  
 153  	AND:     "&",
 154  	OR:      "|",
 155  	XOR:     "^",
 156  	SHL:     "<<",
 157  	SHR:     ">>",
 158  	AND_NOT: "&^",
 159  
 160  	ADD_ASSIGN: "+=",
 161  	SUB_ASSIGN: "-=",
 162  	MUL_ASSIGN: "*=",
 163  	QUO_ASSIGN: "/=",
 164  	REM_ASSIGN: "%=",
 165  
 166  	AND_ASSIGN:     "&=",
 167  	OR_ASSIGN:      "|=",
 168  	XOR_ASSIGN:     "^=",
 169  	SHL_ASSIGN:     "<<=",
 170  	SHR_ASSIGN:     ">>=",
 171  	AND_NOT_ASSIGN: "&^=",
 172  
 173  	LAND:  "&&",
 174  	LOR:   "||",
 175  	ARROW: "<-",
 176  	INC:   "++",
 177  	DEC:   "--",
 178  
 179  	EQL:    "==",
 180  	LSS:    "<",
 181  	GTR:    ">",
 182  	ASSIGN: "=",
 183  	NOT:    "!",
 184  
 185  	NEQ:      "!=",
 186  	LEQ:      "<=",
 187  	GEQ:      ">=",
 188  	DEFINE:   ":=",
 189  	ELLIPSIS: "...",
 190  
 191  	LPAREN: "(",
 192  	LBRACK: "[",
 193  	LBRACE: "{",
 194  	COMMA:  ",",
 195  	PERIOD: ".",
 196  
 197  	RPAREN:    ")",
 198  	RBRACK:    "]",
 199  	RBRACE:    "}",
 200  	SEMICOLON: ";",
 201  	COLON:     ":",
 202  
 203  	BREAK:    "break",
 204  	CASE:     "case",
 205  	CHAN:     "chan",
 206  	CONST:    "const",
 207  	CONTINUE: "continue",
 208  
 209  	DEFAULT:     "default",
 210  	DEFER:       "defer",
 211  	ELSE:        "else",
 212  	FALLTHROUGH: "fallthrough",
 213  	FOR:         "for",
 214  
 215  	FUNC:   "func",
 216  	GO:     "go",
 217  	GOTO:   "goto",
 218  	IF:     "if",
 219  	IMPORT: "import",
 220  
 221  	INTERFACE: "interface",
 222  	MAP:       "map",
 223  	PACKAGE:   "package",
 224  	RANGE:     "range",
 225  	RETURN:    "return",
 226  
 227  	SELECT: "select",
 228  	STRUCT: "struct",
 229  	SWITCH: "switch",
 230  	TYPE:   "type",
 231  	VAR:    "var",
 232  
 233  	TILDE: "~",
 234  }
 235  
 236  // String returns the string corresponding to the token tok.
 237  // For operators, delimiters, and keywords the string is the actual
 238  // token character sequence (e.g., for the token [ADD], the string is
 239  // "+"). For all other tokens the string corresponds to the token
 240  // constant name (e.g. for the token [IDENT], the string is "IDENT").
 241  func (tok Token) String() string {
 242  	s := ""
 243  	if 0 <= tok && tok < Token(len(tokens)) {
 244  		s = tokens[tok]
 245  	}
 246  	if s == "" {
 247  		s = "token(" + strconv.Itoa(int(tok)) + ")"
 248  	}
 249  	return s
 250  }
 251  
 252  // A set of constants for precedence-based expression parsing.
 253  // Non-operators have lowest precedence, followed by operators
 254  // starting with precedence 1 up to unary operators. The highest
 255  // precedence serves as "catch-all" precedence for selector,
 256  // indexing, and other operator and delimiter tokens.
 257  const (
 258  	LowestPrec  = 0 // non-operators
 259  	UnaryPrec   = 6
 260  	HighestPrec = 7
 261  )
 262  
 263  // Precedence returns the operator precedence of the binary
 264  // operator op. If op is not a binary operator, the result
 265  // is LowestPrecedence.
 266  func (op Token) Precedence() int {
 267  	switch op {
 268  	case LOR:
 269  		return 1
 270  	case LAND:
 271  		return 2
 272  	case EQL, NEQ, LSS, LEQ, GTR, GEQ:
 273  		return 3
 274  	case ADD, SUB, OR, XOR:
 275  		return 4
 276  	case MUL, QUO, REM, SHL, SHR, AND, AND_NOT:
 277  		return 5
 278  	}
 279  	return LowestPrec
 280  }
 281  
 282  var keywords map[string]Token
 283  
 284  func init() {
 285  	keywords = map[string]Token{}
 286  	for i := keyword_beg + 1; i < keyword_end; i++ {
 287  		keywords[tokens[i]] = i
 288  	}
 289  }
 290  
 291  // Lookup maps an identifier to its keyword token or [IDENT] (if not a keyword).
 292  func Lookup(ident []byte) Token {
 293  	if tok, is_keyword := keywords[ident]; is_keyword {
 294  		return tok
 295  	}
 296  	return IDENT
 297  }
 298  
 299  // Predicates
 300  
 301  // IsLiteral returns true for tokens corresponding to identifiers
 302  // and basic type literals; it returns false otherwise.
 303  func (tok Token) IsLiteral() bool { return literal_beg < tok && tok < literal_end }
 304  
 305  // IsOperator returns true for tokens corresponding to operators and
 306  // delimiters; it returns false otherwise.
 307  func (tok Token) IsOperator() bool {
 308  	return (operator_beg < tok && tok < operator_end) || tok == TILDE
 309  }
 310  
 311  // IsKeyword returns true for tokens corresponding to keywords;
 312  // it returns false otherwise.
 313  func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end }
 314  
 315  // IsExported reports whether name starts with an upper-case letter.
 316  func IsExported(name []byte) bool {
 317  	ch, _ := utf8.DecodeRuneInString(name)
 318  	return unicode.IsUpper(ch)
 319  }
 320  
 321  // IsKeyword reports whether name is a Go keyword, such as "func" or "return".
 322  func IsKeyword(name []byte) bool {
 323  	// TODO: opt: use a perfect hash function instead of a global map.
 324  	_, ok := keywords[name]
 325  	return ok
 326  }
 327  
 328  // IsIdentifier reports whether name is a Go identifier, that is, a non-empty
 329  // string made up of letters, digits, and underscores, where the first character
 330  // is not a digit. Keywords are not identifiers.
 331  func IsIdentifier(name []byte) bool {
 332  	if name == "" || IsKeyword(name) {
 333  		return false
 334  	}
 335  	for i, c := range name {
 336  		if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
 337  			return false
 338  		}
 339  	}
 340  	return true
 341  }
 342