lexer.go raw
1 package pattern
2
3 import (
4 "fmt"
5 "go/token"
6 "unicode"
7 "unicode/utf8"
8 )
9
10 type lexer struct {
11 f *token.File
12
13 input string
14 start int
15 pos int
16 width int
17 items chan item
18 }
19
20 type itemType int
21
22 const eof = -1
23
24 const (
25 itemError itemType = iota
26 itemLeftParen
27 itemRightParen
28 itemLeftBracket
29 itemRightBracket
30 itemTypeName
31 itemVariable
32 itemAt
33 itemColon
34 itemBlank
35 itemString
36 itemEOF
37 )
38
39 func (typ itemType) String() string {
40 switch typ {
41 case itemError:
42 return "ERROR"
43 case itemLeftParen:
44 return "("
45 case itemRightParen:
46 return ")"
47 case itemLeftBracket:
48 return "["
49 case itemRightBracket:
50 return "]"
51 case itemTypeName:
52 return "TYPE"
53 case itemVariable:
54 return "VAR"
55 case itemAt:
56 return "@"
57 case itemColon:
58 return ":"
59 case itemBlank:
60 return "_"
61 case itemString:
62 return "STRING"
63 case itemEOF:
64 return "EOF"
65 default:
66 return fmt.Sprintf("itemType(%d)", typ)
67 }
68 }
69
70 type item struct {
71 typ itemType
72 val string
73 pos int
74 }
75
76 type stateFn func(*lexer) stateFn
77
78 func (l *lexer) run() {
79 for state := lexStart; state != nil; {
80 state = state(l)
81 }
82 close(l.items)
83 }
84
85 func (l *lexer) emitValue(t itemType, value string) {
86 l.items <- item{t, value, l.start}
87 l.start = l.pos
88 }
89
90 func (l *lexer) emit(t itemType) {
91 l.items <- item{t, l.input[l.start:l.pos], l.start}
92 l.start = l.pos
93 }
94
95 func lexStart(l *lexer) stateFn {
96 switch r := l.next(); {
97 case r == eof:
98 l.emit(itemEOF)
99 return nil
100 case unicode.IsSpace(r):
101 l.ignore()
102 case r == '(':
103 l.emit(itemLeftParen)
104 case r == ')':
105 l.emit(itemRightParen)
106 case r == '[':
107 l.emit(itemLeftBracket)
108 case r == ']':
109 l.emit(itemRightBracket)
110 case r == '@':
111 l.emit(itemAt)
112 case r == ':':
113 l.emit(itemColon)
114 case r == '_':
115 l.emit(itemBlank)
116 case r == '"':
117 l.backup()
118 return lexString
119 case unicode.IsUpper(r):
120 l.backup()
121 return lexType
122 case unicode.IsLower(r):
123 l.backup()
124 return lexVariable
125 default:
126 return l.errorf("unexpected character %c", r)
127 }
128 return lexStart
129 }
130
131 func (l *lexer) next() (r rune) {
132 if l.pos >= len(l.input) {
133 l.width = 0
134 return eof
135 }
136 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
137
138 if r == '\n' {
139 l.f.AddLine(l.pos)
140 }
141
142 l.pos += l.width
143
144 return r
145 }
146
147 func (l *lexer) ignore() {
148 l.start = l.pos
149 }
150
151 func (l *lexer) backup() {
152 l.pos -= l.width
153 }
154
155 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
156 // TODO(dh): emit position information in errors
157 l.items <- item{
158 itemError,
159 fmt.Sprintf(format, args...),
160 l.start,
161 }
162 return nil
163 }
164
165 func isAlphaNumeric(r rune) bool {
166 return r >= '0' && r <= '9' ||
167 r >= 'a' && r <= 'z' ||
168 r >= 'A' && r <= 'Z'
169 }
170
171 func lexString(l *lexer) stateFn {
172 l.next() // skip quote
173 escape := false
174
175 var runes []rune
176 for {
177 switch r := l.next(); r {
178 case eof:
179 return l.errorf("unterminated string")
180 case '"':
181 if !escape {
182 l.emitValue(itemString, string(runes))
183 return lexStart
184 } else {
185 runes = append(runes, '"')
186 escape = false
187 }
188 case '\\':
189 if escape {
190 runes = append(runes, '\\')
191 escape = false
192 } else {
193 escape = true
194 }
195 default:
196 runes = append(runes, r)
197 }
198 }
199 }
200
201 func lexType(l *lexer) stateFn {
202 l.next()
203 for {
204 if !isAlphaNumeric(l.next()) {
205 l.backup()
206 l.emit(itemTypeName)
207 return lexStart
208 }
209 }
210 }
211
212 func lexVariable(l *lexer) stateFn {
213 l.next()
214 for {
215 if !isAlphaNumeric(l.next()) {
216 l.backup()
217 l.emit(itemVariable)
218 return lexStart
219 }
220 }
221 }
222