package main import ( "fmt" "io" "unicode" "unicode/utf8" ) const ( comments uint = 1 << iota directives ) type Scanner struct { Source mode uint nlsemi bool Line, Col uint32 Blank bool Tok Token Lit string Bad bool Kind LitKind Op Operator Prec int32 keywordMap [1 << 6]Token keywordsReady bool } func (s *Scanner) Init(src io.Reader, errh func(line, col uint32, msg string), mode uint) { s.Source.init(src, errh) s.mode = mode s.nlsemi = false s.initKeywords() } func (s *Scanner) Errorf(format string, args ...interface{}) { s.error(fmt.Sprintf(format, args...)) } func (s *Scanner) ErrorAtf(offset int32, format string, args ...interface{}) { s.errh(s.line, s.col+uint32(offset), fmt.Sprintf(format, args...)) } func (s *Scanner) SetLit(kind LitKind, ok bool) { s.nlsemi = true s.Tok = Literal s.Lit = string(s.segmentCopy()) s.Bad = !ok s.Kind = kind } func (s *Scanner) Next() { nlsemi := s.nlsemi s.nlsemi = false redo: s.stop() startLine, startCol := s.pos() for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !nlsemi || s.ch == '\r' { s.nextch() } s.Line, s.Col = s.pos() s.Blank = s.line > startLine || startCol == Colbase s.start() if IsLetter(s.ch) || s.ch >= utf8.RuneSelf && s.AtIdentChar(true) { s.nextch() s.Ident() return } switch s.ch { case -1: if nlsemi { s.Lit = "EOF" s.Tok = Semi break } s.Tok = EOF case '\n': s.nextch() s.Lit = "newline" s.Tok = Semi case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': s.Number(false) case '"': s.stdString() case '`': s.rawString() case '\'': s.rune() case '(': s.nextch() s.Tok = Lparen case '[': s.nextch() s.Tok = Lbrack case '{': s.nextch() s.Tok = Lbrace case ',': s.nextch() s.Tok = Comma case ';': s.nextch() s.Lit = "semicolon" s.Tok = Semi case ')': s.nextch() s.nlsemi = true s.Tok = Rparen case ']': s.nextch() s.nlsemi = true s.Tok = Rbrack case '}': s.nextch() s.nlsemi = true s.Tok = Rbrace case ':': s.nextch() if s.ch == '=' { s.nextch() s.Tok = Define break } s.Tok = Colon case '.': s.nextch() if IsDecimal(s.ch) { s.Number(true) break } if s.ch == '.' { s.nextch() if s.ch == '.' { s.nextch() s.Tok = DotDotDot break } s.rewind() s.nextch() } s.Tok = Dot case '+': s.nextch() s.Op, s.Prec = Add, PrecAdd if s.ch != '+' { goto assignop } s.nextch() s.nlsemi = true s.Tok = IncOp case '-': s.nextch() s.Op, s.Prec = Sub, PrecAdd if s.ch != '-' { goto assignop } s.nextch() s.nlsemi = true s.Tok = IncOp case '*': s.nextch() s.Op, s.Prec = Mul, PrecMul if s.ch == '=' { s.nextch() s.Tok = AssignOp break } s.Tok = Star case '/': s.nextch() if s.ch == '/' { s.nextch() s.lineComment() goto redo } if s.ch == '*' { s.nextch() s.fullComment() if line, _ := s.pos(); line > s.Line && nlsemi { s.Lit = "newline" s.Tok = Semi break } goto redo } s.Op, s.Prec = Div, PrecMul goto assignop case '%': s.nextch() s.Op, s.Prec = Rem, PrecMul goto assignop case '&': s.nextch() if s.ch == '&' { s.nextch() s.Op, s.Prec = AndAnd, PrecAndAnd s.Tok = OperatorType break } s.Op, s.Prec = And, PrecMul if s.ch == '^' { s.nextch() s.Op = AndNot } goto assignop case '|': s.nextch() if s.ch == '|' { s.nextch() s.Op, s.Prec = OrOr, PrecOrOr s.Tok = OperatorType break } s.Op, s.Prec = Or, PrecAdd goto assignop case '^': s.nextch() s.Op, s.Prec = Xor, PrecAdd goto assignop case '<': s.nextch() if s.ch == '=' { s.nextch() s.Op, s.Prec = Leq, PrecCmp s.Tok = OperatorType break } if s.ch == '<' { s.nextch() s.Op, s.Prec = Shl, PrecMul goto assignop } if s.ch == '-' { s.nextch() s.Tok = Arrow break } s.Op, s.Prec = Lss, PrecCmp s.Tok = OperatorType case '>': s.nextch() if s.ch == '=' { s.nextch() s.Op, s.Prec = Geq, PrecCmp s.Tok = OperatorType break } if s.ch == '>' { s.nextch() s.Op, s.Prec = Shr, PrecMul goto assignop } s.Op, s.Prec = Gtr, PrecCmp s.Tok = OperatorType case '=': s.nextch() if s.ch == '=' { s.nextch() s.Op, s.Prec = Eql, PrecCmp s.Tok = OperatorType break } s.Tok = Assign case '!': s.nextch() if s.ch == '=' { s.nextch() s.Op, s.Prec = Neq, PrecCmp s.Tok = OperatorType break } s.Op, s.Prec = Not, 0 s.Tok = OperatorType case '~': s.nextch() s.Op, s.Prec = Tilde, 0 s.Tok = OperatorType default: s.Errorf("invalid character %#U", s.ch) s.nextch() goto redo } return assignop: if s.ch == '=' { s.nextch() s.Tok = AssignOp return } s.Tok = OperatorType } func (s *Scanner) Ident() { for IsLetter(s.ch) || IsDecimal(s.ch) { s.nextch() } if s.ch >= utf8.RuneSelf { for s.AtIdentChar(false) { s.nextch() } } lit := s.segment() if len(lit) >= 2 { h := (uint(lit[0])<<4 ^ uint(lit[1]) + uint(len(lit))) & 63 if tok := s.keywordMap[h]; tok != 0 && tokStrFast(tok) == string(lit) { s.nlsemi = contains(1<= utf8.RuneSelf: s.Errorf("invalid character %#U in identifier", s.ch) default: return false } return true } func (s *Scanner) initKeywords() { if s.keywordsReady { return } s.keywordsReady = true for tok := Break; tok <= Var; tok++ { b := []byte(tok.String()) h := (uint(b[0])<<4 ^ uint(b[1]) + uint(len(b))) & 63 if s.keywordMap[h] != 0 { panic("imperfect hash") } s.keywordMap[h] = tok } } func Lower(ch rune) rune { return ('a' - 'A') | ch } func IsLetter(ch rune) bool { return 'a' <= Lower(ch) && Lower(ch) <= 'z' || ch == '_' } func IsDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } func IsHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= Lower(ch) && Lower(ch) <= 'f' } func (s *Scanner) Digits(base int32, invalid *int32) (digsep int32) { if base <= 10 { max := rune('0' + base) for IsDecimal(s.ch) || s.ch == '_' { ds := int32(1) if s.ch == '_' { ds = 2 } else if s.ch >= max && *invalid < 0 { _, col := s.pos() *invalid = int32(col - s.col) } digsep |= ds s.nextch() } } else { for IsHex(s.ch) || s.ch == '_' { ds := int32(1) if s.ch == '_' { ds = 2 } digsep |= ds s.nextch() } } return } func (s *Scanner) Number(seenPoint bool) { ok := true kind := IntLit base := int32(10) prefix := rune(0) digsep := int32(0) invalid := int32(-1) if !seenPoint { if s.ch == '0' { s.nextch() switch Lower(s.ch) { case 'x': s.nextch() base, prefix = 16, 'x' case 'o': s.nextch() base, prefix = 8, 'o' case 'b': s.nextch() base, prefix = 2, 'b' default: base, prefix = 8, '0' digsep = 1 } } digsep |= s.Digits(base, &invalid) if s.ch == '.' { if prefix == 'o' || prefix == 'b' { s.Errorf("invalid radix point in %s literal", baseName(base)) ok = false } s.nextch() seenPoint = true } } if seenPoint { kind = FloatLit digsep |= s.Digits(base, &invalid) } if digsep&1 == 0 && ok { s.Errorf("%s literal has no digits", baseName(base)) ok = false } if e := Lower(s.ch); e == 'e' || e == 'p' { if ok { switch { case e == 'e' && prefix != 0 && prefix != '0': s.Errorf("%q exponent requires decimal mantissa", s.ch) ok = false case e == 'p' && prefix != 'x': s.Errorf("%q exponent requires hexadecimal mantissa", s.ch) ok = false } } s.nextch() kind = FloatLit if s.ch == '+' || s.ch == '-' { s.nextch() } digsep = s.Digits(10, nil) | digsep&2 if digsep&1 == 0 && ok { s.Errorf("exponent has no digits") ok = false } } else if prefix == 'x' && kind == FloatLit && ok { s.Errorf("hexadecimal mantissa requires a 'p' exponent") ok = false } if s.ch == 'i' { kind = ImagLit s.nextch() } s.SetLit(kind, ok) if kind == IntLit && invalid >= 0 && ok { s.ErrorAtf(invalid, "invalid digit %q in %s literal", s.Lit[invalid], baseName(base)) ok = false } if digsep&2 != 0 && ok { if i := invalidSep(s.Lit); i >= 0 { s.ErrorAtf(i, "'_' must separate successive digits") ok = false } } s.Bad = !ok } func baseName(base int32) string { switch base { case 2: return "binary" case 8: return "octal" case 10: return "decimal" case 16: return "hexadecimal" } panic("invalid base") } func invalidSep(x string) int32 { x1 := ' ' d := '.' i := int32(0) if len(x) >= 2 && x[0] == '0' { x1 = Lower(rune(x[1])) if x1 == 'x' || x1 == 'o' || x1 == 'b' { d = '0' i = 2 } } for ; i < int32(len(x)); i++ { p := d d = rune(x[i]) switch { case d == '_': if p != '0' { return i } case IsDecimal(d) || x1 == 'x' && IsHex(d): d = '0' default: if p == '_' { return i - 1 } d = '.' } } if d == '_' { return int32(len(x)) - 1 } return -1 } func (s *Scanner) rune() { ok := true s.nextch() n := 0 for ; ; n++ { if s.ch == '\'' { if ok { if n == 0 { s.Errorf("empty rune literal or unescaped '") ok = false } else if n != 1 { s.ErrorAtf(0, "more than one character in rune literal") ok = false } } s.nextch() break } if s.ch == '\\' { s.nextch() if !s.escape('\'') { ok = false } continue } if s.ch == '\n' { if ok { s.Errorf("newline in rune literal") ok = false } break } if s.ch < 0 { if ok { s.ErrorAtf(0, "rune literal not terminated") ok = false } break } s.nextch() } s.SetLit(RuneLit, ok) } func (s *Scanner) stdString() { ok := true s.nextch() for { if s.ch == '"' { s.nextch() break } if s.ch == '\\' { s.nextch() if !s.escape('"') { ok = false } continue } if s.ch == '\n' { s.Errorf("newline in string") ok = false break } if s.ch < 0 { s.ErrorAtf(0, "string not terminated") ok = false break } s.nextch() } s.SetLit(StringLit, ok) } func (s *Scanner) rawString() { ok := true s.nextch() for { if s.ch == '`' { s.nextch() break } if s.ch < 0 { s.ErrorAtf(0, "string not terminated") ok = false break } s.nextch() } s.SetLit(StringLit, ok) } func (s *Scanner) comment(text string) { s.ErrorAtf(0, "%s", text) } func (s *Scanner) skipLine() { for s.ch >= 0 && s.ch != '\n' { s.nextch() } } func (s *Scanner) lineComment() { if s.mode&comments != 0 { s.skipLine() s.comment(string(s.segment())) return } if s.mode&directives == 0 || (s.ch != 'g' && s.ch != 'l') { s.stop() s.skipLine() return } prefix := "go:" if s.ch == 'l' { prefix = "line " } for _, r := range prefix { if s.ch != rune(r) { s.stop() s.skipLine() return } s.nextch() } s.skipLine() s.comment(string(s.segment())) } func (s *Scanner) skipComment() bool { for s.ch >= 0 { for s.ch == '*' { s.nextch() if s.ch == '/' { s.nextch() return true } } s.nextch() } s.ErrorAtf(0, "comment not terminated") return false } func (s *Scanner) fullComment() { if s.mode&comments != 0 { if s.skipComment() { s.comment(string(s.segment())) } return } if s.mode&directives == 0 || s.ch != 'l' { s.stop() s.skipComment() return } const prefix = "line " for _, r := range prefix { if s.ch != rune(r) { s.stop() s.skipComment() return } s.nextch() } if s.skipComment() { s.comment(string(s.segment())) } } func (s *Scanner) escape(quote rune) bool { var n int32 var base, max uint32 switch s.ch { case quote, 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\': s.nextch() return true case '0', '1', '2', '3', '4', '5', '6', '7': n, base, max = 3, 8, 255 case 'x': s.nextch() n, base, max = 2, 16, 255 case 'u': s.nextch() n, base, max = 4, 16, unicode.MaxRune case 'U': s.nextch() n, base, max = 8, 16, unicode.MaxRune default: if s.ch < 0 { return true } s.Errorf("unknown escape") return false } var x uint32 for i := n; i > 0; i-- { if s.ch < 0 { return true } d := base if IsDecimal(s.ch) { d = uint32(s.ch) - '0' } else if 'a' <= Lower(s.ch) && Lower(s.ch) <= 'f' { d = uint32(Lower(s.ch)) - 'a' + 10 } if d >= base { s.Errorf("invalid character %q in %s escape", s.ch, baseName(int32(base))) return false } x = x*base + d s.nextch() } if x > max && base == 8 { s.Errorf("octal escape value %d > 255", x) return false } if x > max || 0xD800 <= x && x < 0xE000 { s.Errorf("escape is invalid Unicode code point %#U", x) return false } return true } func String(n Node) string { return fmt.Sprintf("%T", n) } func StartPos(n Node) Pos { return n.Pos() }