1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 // Package scanner provides a scanner and tokenizer for UTF-8-encoded text.
6 // It takes an io.Reader providing the source, which then can be tokenized
7 // through repeated calls to the Scan function. For compatibility with
8 // existing tools, the NUL character is not allowed. If the first character
9 // in the source is a UTF-8 encoded byte order mark (BOM), it is discarded.
10 //
11 // By default, a [Scanner] skips white space and Go comments and recognizes all
12 // literals as defined by the Go language specification. It may be
13 // customized to recognize only a subset of those literals and to recognize
14 // different identifier and white space characters.
15 package scanner
16 17 import (
18 "bytes"
19 "fmt"
20 "io"
21 "os"
22 "unicode"
23 "unicode/utf8"
24 )
25 26 // Position is a value that represents a source position.
27 // A position is valid if Line > 0.
28 type Position struct {
29 Filename []byte // filename, if any
30 Offset int // byte offset, starting at 0
31 Line int // line number, starting at 1
32 Column int // column number, starting at 1 (character count per line)
33 }
34 35 // IsValid reports whether the position is valid.
36 func (pos *Position) IsValid() bool { return pos.Line > 0 }
37 38 func (pos Position) String() string {
39 s := pos.Filename
40 if s == "" {
41 s = "<input>"
42 }
43 if pos.IsValid() {
44 s += fmt.Sprintf(":%d:%d", pos.Line, pos.Column)
45 }
46 return s
47 }
48 49 // Predefined mode bits to control recognition of tokens. For instance,
50 // to configure a [Scanner] such that it only recognizes (Go) identifiers,
51 // integers, and skips comments, set the Scanner's Mode field to:
52 //
53 // ScanIdents | ScanInts | ScanComments | SkipComments
54 //
55 // With the exceptions of comments, which are skipped if SkipComments is
56 // set, unrecognized tokens are not ignored. Instead, the scanner simply
57 // returns the respective individual characters (or possibly sub-tokens).
58 // For instance, if the mode is ScanIdents (not ScanStrings), the string
59 // "foo" is scanned as the token sequence '"' [Ident] '"'.
60 //
61 // Use GoTokens to configure the Scanner such that it accepts all Go
62 // literal tokens including Go identifiers. Comments will be skipped.
63 const (
64 ScanIdents = 1 << -Ident
65 ScanInts = 1 << -Int
66 ScanFloats = 1 << -Float // includes Ints and hexadecimal floats
67 ScanChars = 1 << -Char
68 ScanStrings = 1 << -String
69 ScanRawStrings = 1 << -RawString
70 ScanComments = 1 << -Comment
71 SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space
72 GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments
73 )
74 75 // The result of Scan is one of these tokens or a Unicode character.
76 const (
77 EOF = -(iota + 1)
78 Ident
79 Int
80 Float
81 Char
82 String
83 RawString
84 Comment
85 86 // internal use only
87 skipComment
88 )
89 90 var tokenString = map[rune][]byte{
91 EOF: "EOF",
92 Ident: "Ident",
93 Int: "Int",
94 Float: "Float",
95 Char: "Char",
96 String: "String",
97 RawString: "RawString",
98 Comment: "Comment",
99 }
100 101 // TokenString returns a printable string for a token or Unicode character.
102 func TokenString(tok rune) []byte {
103 if s, found := tokenString[tok]; found {
104 return s
105 }
106 return fmt.Sprintf("%q", []byte(tok))
107 }
108 109 // GoWhitespace is the default value for the [Scanner]'s Whitespace field.
110 // Its value selects Go's white space characters.
111 const GoWhitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
112 113 const bufLen = 1024 // at least utf8.UTFMax
114 115 // A Scanner implements reading of Unicode characters and tokens from an [io.Reader].
116 type Scanner struct {
117 // Input
118 src io.Reader
119 120 // Source buffer
121 srcBuf [bufLen + 1]byte // +1 for sentinel for common case of s.next()
122 srcPos int // reading position (srcBuf index)
123 srcEnd int // source end (srcBuf index)
124 125 // Source position
126 srcBufOffset int // byte offset of srcBuf[0] in source
127 line int // line count
128 column int // character count
129 lastLineLen int // length of last line in characters (for correct column reporting)
130 lastCharLen int // length of last character in bytes
131 132 // Token text buffer
133 // Typically, token text is stored completely in srcBuf, but in general
134 // the token text's head may be buffered in tokBuf while the token text's
135 // tail is stored in srcBuf.
136 tokBuf bytes.Buffer // token text head that is not in srcBuf anymore
137 tokPos int // token text tail position (srcBuf index); valid if >= 0
138 tokEnd int // token text tail end (srcBuf index)
139 140 // One character look-ahead
141 ch rune // character before current srcPos
142 143 // Error is called for each error encountered. If no Error
144 // function is set, the error is reported to os.Stderr.
145 Error func(s *Scanner, msg []byte)
146 147 // ErrorCount is incremented by one for each error encountered.
148 ErrorCount int
149 150 // The Mode field controls which tokens are recognized. For instance,
151 // to recognize Ints, set the ScanInts bit in Mode. The field may be
152 // changed at any time.
153 Mode uint
154 155 // The Whitespace field controls which characters are recognized
156 // as white space. To recognize a character ch <= ' ' as white space,
157 // set the ch'th bit in Whitespace (the Scanner's behavior is undefined
158 // for values ch > ' '). The field may be changed at any time.
159 Whitespace uint64
160 161 // IsIdentRune is a predicate controlling the characters accepted
162 // as the ith rune in an identifier. The set of valid characters
163 // must not intersect with the set of white space characters.
164 // If no IsIdentRune function is set, regular Go identifiers are
165 // accepted instead. The field may be changed at any time.
166 IsIdentRune func(ch rune, i int) bool
167 168 // Start position of most recently scanned token; set by Scan.
169 // Calling Init or Next invalidates the position (Line == 0).
170 // The Filename field is always left untouched by the Scanner.
171 // If an error is reported (via Error) and Position is invalid,
172 // the scanner is not inside a token. Call Pos to obtain an error
173 // position in that case, or to obtain the position immediately
174 // after the most recently scanned token.
175 Position
176 }
177 178 // Init initializes a [Scanner] with a new source and returns s.
179 // [Scanner.Error] is set to nil, [Scanner.ErrorCount] is set to 0, [Scanner.Mode] is set to [GoTokens],
180 // and [Scanner.Whitespace] is set to [GoWhitespace].
181 func (s *Scanner) Init(src io.Reader) *Scanner {
182 s.src = src
183 184 // initialize source buffer
185 // (the first call to next() will fill it by calling src.Read)
186 s.srcBuf[0] = utf8.RuneSelf // sentinel
187 s.srcPos = 0
188 s.srcEnd = 0
189 190 // initialize source position
191 s.srcBufOffset = 0
192 s.line = 1
193 s.column = 0
194 s.lastLineLen = 0
195 s.lastCharLen = 0
196 197 // initialize token text buffer
198 // (required for first call to next()).
199 s.tokPos = -1
200 201 // initialize one character look-ahead
202 s.ch = -2 // no char read yet, not EOF
203 204 // initialize public fields
205 s.Error = nil
206 s.ErrorCount = 0
207 s.Mode = GoTokens
208 s.Whitespace = GoWhitespace
209 s.Line = 0 // invalidate token position
210 211 return s
212 }
213 214 // next reads and returns the next Unicode character. It is designed such
215 // that only a minimal amount of work needs to be done in the common ASCII
216 // case (one test to check for both ASCII and end-of-buffer, and one test
217 // to check for newlines).
218 func (s *Scanner) next() rune {
219 ch, width := rune(s.srcBuf[s.srcPos]), 1
220 221 if ch >= utf8.RuneSelf {
222 // uncommon case: not ASCII or not enough bytes
223 for s.srcPos+utf8.UTFMax > s.srcEnd && !utf8.FullRune(s.srcBuf[s.srcPos:s.srcEnd]) {
224 // not enough bytes: read some more, but first
225 // save away token text if any
226 if s.tokPos >= 0 {
227 s.tokBuf.Write(s.srcBuf[s.tokPos:s.srcPos])
228 s.tokPos = 0
229 // s.tokEnd is set by Scan()
230 }
231 // move unread bytes to beginning of buffer
232 copy(s.srcBuf[0:], s.srcBuf[s.srcPos:s.srcEnd])
233 s.srcBufOffset += s.srcPos
234 // read more bytes
235 // (an io.Reader must return io.EOF when it reaches
236 // the end of what it is reading - simply returning
237 // n == 0 will make this loop retry forever; but the
238 // error is in the reader implementation in that case)
239 i := s.srcEnd - s.srcPos
240 n, err := s.src.Read(s.srcBuf[i:bufLen])
241 s.srcPos = 0
242 s.srcEnd = i + n
243 s.srcBuf[s.srcEnd] = utf8.RuneSelf // sentinel
244 if err != nil {
245 if err != io.EOF {
246 s.error(err.Error())
247 }
248 if s.srcEnd == 0 {
249 if s.lastCharLen > 0 {
250 // previous character was not EOF
251 s.column++
252 }
253 s.lastCharLen = 0
254 return EOF
255 }
256 // If err == EOF, we won't be getting more
257 // bytes; break to avoid infinite loop. If
258 // err is something else, we don't know if
259 // we can get more bytes; thus also break.
260 break
261 }
262 }
263 // at least one byte
264 ch = rune(s.srcBuf[s.srcPos])
265 if ch >= utf8.RuneSelf {
266 // uncommon case: not ASCII
267 ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
268 if ch == utf8.RuneError && width == 1 {
269 // advance for correct error position
270 s.srcPos += width
271 s.lastCharLen = width
272 s.column++
273 s.error("invalid UTF-8 encoding")
274 return ch
275 }
276 }
277 }
278 279 // advance
280 s.srcPos += width
281 s.lastCharLen = width
282 s.column++
283 284 // special situations
285 switch ch {
286 case 0:
287 // for compatibility with other tools
288 s.error("invalid character NUL")
289 case '\n':
290 s.line++
291 s.lastLineLen = s.column
292 s.column = 0
293 }
294 295 return ch
296 }
297 298 // Next reads and returns the next Unicode character.
299 // It returns [EOF] at the end of the source. It reports
300 // a read error by calling s.Error, if not nil; otherwise
301 // it prints an error message to [os.Stderr]. Next does not
302 // update the [Scanner.Position] field; use [Scanner.Pos]() to
303 // get the current position.
304 func (s *Scanner) Next() rune {
305 s.tokPos = -1 // don't collect token text
306 s.Line = 0 // invalidate token position
307 ch := s.Peek()
308 if ch != EOF {
309 s.ch = s.next()
310 }
311 return ch
312 }
313 314 // Peek returns the next Unicode character in the source without advancing
315 // the scanner. It returns [EOF] if the scanner's position is at the last
316 // character of the source.
317 func (s *Scanner) Peek() rune {
318 if s.ch == -2 {
319 // this code is only run for the very first character
320 s.ch = s.next()
321 if s.ch == '\uFEFF' {
322 s.ch = s.next() // ignore BOM
323 }
324 }
325 return s.ch
326 }
327 328 func (s *Scanner) error(msg []byte) {
329 s.tokEnd = s.srcPos - s.lastCharLen // make sure token text is terminated
330 s.ErrorCount++
331 if s.Error != nil {
332 s.Error(s, msg)
333 return
334 }
335 pos := s.Position
336 if !pos.IsValid() {
337 pos = s.Pos()
338 }
339 fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
340 }
341 342 func (s *Scanner) errorf(format []byte, args ...any) {
343 s.error(fmt.Sprintf(format, args...))
344 }
345 346 func (s *Scanner) isIdentRune(ch rune, i int) bool {
347 if s.IsIdentRune != nil {
348 return ch != EOF && s.IsIdentRune(ch, i)
349 }
350 return ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) && i > 0
351 }
352 353 func (s *Scanner) scanIdentifier() rune {
354 // we know the zero'th rune is OK; start scanning at the next one
355 ch := s.next()
356 for i := 1; s.isIdentRune(ch, i); i++ {
357 ch = s.next()
358 }
359 return ch
360 }
361 362 func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
363 func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
364 func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
365 366 // digits accepts the sequence { digit | '_' } starting with ch0.
367 // If base <= 10, digits accepts any decimal digit but records
368 // the first invalid digit >= base in *invalid if *invalid == 0.
369 // digits returns the first rune that is not part of the sequence
370 // anymore, and a bitset describing whether the sequence contained
371 // digits (bit 0 is set), or separators '_' (bit 1 is set).
372 func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int) {
373 ch = ch0
374 if base <= 10 {
375 max := rune('0' + base)
376 for isDecimal(ch) || ch == '_' {
377 ds := 1
378 if ch == '_' {
379 ds = 2
380 } else if ch >= max && *invalid == 0 {
381 *invalid = ch
382 }
383 digsep |= ds
384 ch = s.next()
385 }
386 } else {
387 for isHex(ch) || ch == '_' {
388 ds := 1
389 if ch == '_' {
390 ds = 2
391 }
392 digsep |= ds
393 ch = s.next()
394 }
395 }
396 return
397 }
398 399 func (s *Scanner) scanNumber(ch rune, seenDot bool) (rune, rune) {
400 base := 10 // number base
401 prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
402 digsep := 0 // bit 0: digit present, bit 1: '_' present
403 invalid := rune(0) // invalid digit in literal, or 0
404 405 // integer part
406 var tok rune
407 var ds int
408 if !seenDot {
409 tok = Int
410 if ch == '0' {
411 ch = s.next()
412 switch lower(ch) {
413 case 'x':
414 ch = s.next()
415 base, prefix = 16, 'x'
416 case 'o':
417 ch = s.next()
418 base, prefix = 8, 'o'
419 case 'b':
420 ch = s.next()
421 base, prefix = 2, 'b'
422 default:
423 base, prefix = 8, '0'
424 digsep = 1 // leading 0
425 }
426 }
427 ch, ds = s.digits(ch, base, &invalid)
428 digsep |= ds
429 if ch == '.' && s.Mode&ScanFloats != 0 {
430 ch = s.next()
431 seenDot = true
432 }
433 }
434 435 // fractional part
436 if seenDot {
437 tok = Float
438 if prefix == 'o' || prefix == 'b' {
439 s.error("invalid radix point in " + litname(prefix))
440 }
441 ch, ds = s.digits(ch, base, &invalid)
442 digsep |= ds
443 }
444 445 if digsep&1 == 0 {
446 s.error(litname(prefix) + " has no digits")
447 }
448 449 // exponent
450 if e := lower(ch); (e == 'e' || e == 'p') && s.Mode&ScanFloats != 0 {
451 switch {
452 case e == 'e' && prefix != 0 && prefix != '0':
453 s.errorf("%q exponent requires decimal mantissa", ch)
454 case e == 'p' && prefix != 'x':
455 s.errorf("%q exponent requires hexadecimal mantissa", ch)
456 }
457 ch = s.next()
458 tok = Float
459 if ch == '+' || ch == '-' {
460 ch = s.next()
461 }
462 ch, ds = s.digits(ch, 10, nil)
463 digsep |= ds
464 if ds&1 == 0 {
465 s.error("exponent has no digits")
466 }
467 } else if prefix == 'x' && tok == Float {
468 s.error("hexadecimal mantissa requires a 'p' exponent")
469 }
470 471 if tok == Int && invalid != 0 {
472 s.errorf("invalid digit %q in %s", invalid, litname(prefix))
473 }
474 475 if digsep&2 != 0 {
476 s.tokEnd = s.srcPos - s.lastCharLen // make sure token text is terminated
477 if i := invalidSep(s.TokenText()); i >= 0 {
478 s.error("'_' must separate successive digits")
479 }
480 }
481 482 return tok, ch
483 }
484 485 func litname(prefix rune) []byte {
486 switch prefix {
487 default:
488 return "decimal literal"
489 case 'x':
490 return "hexadecimal literal"
491 case 'o', '0':
492 return "octal literal"
493 case 'b':
494 return "binary literal"
495 }
496 }
497 498 // invalidSep returns the index of the first invalid separator in x, or -1.
499 func invalidSep(x []byte) int {
500 x1 := ' ' // prefix char, we only care if it's 'x'
501 d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else)
502 i := 0
503 504 // a prefix counts as a digit
505 if len(x) >= 2 && x[0] == '0' {
506 x1 = lower(rune(x[1]))
507 if x1 == 'x' || x1 == 'o' || x1 == 'b' {
508 d = '0'
509 i = 2
510 }
511 }
512 513 // mantissa and exponent
514 for ; i < len(x); i++ {
515 p := d // previous digit
516 d = rune(x[i])
517 switch {
518 case d == '_':
519 if p != '0' {
520 return i
521 }
522 case isDecimal(d) || x1 == 'x' && isHex(d):
523 d = '0'
524 default:
525 if p == '_' {
526 return i - 1
527 }
528 d = '.'
529 }
530 }
531 if d == '_' {
532 return len(x) - 1
533 }
534 535 return -1
536 }
537 538 func digitVal(ch rune) int {
539 switch {
540 case '0' <= ch && ch <= '9':
541 return int(ch - '0')
542 case 'a' <= lower(ch) && lower(ch) <= 'f':
543 return int(lower(ch) - 'a' + 10)
544 }
545 return 16 // larger than any legal digit val
546 }
547 548 func (s *Scanner) scanDigits(ch rune, base, n int) rune {
549 for n > 0 && digitVal(ch) < base {
550 ch = s.next()
551 n--
552 }
553 if n > 0 {
554 s.error("invalid char escape")
555 }
556 return ch
557 }
558 559 func (s *Scanner) scanEscape(quote rune) rune {
560 ch := s.next() // read character after '/'
561 switch ch {
562 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
563 // nothing to do
564 ch = s.next()
565 case '0', '1', '2', '3', '4', '5', '6', '7':
566 ch = s.scanDigits(ch, 8, 3)
567 case 'x':
568 ch = s.scanDigits(s.next(), 16, 2)
569 case 'u':
570 ch = s.scanDigits(s.next(), 16, 4)
571 case 'U':
572 ch = s.scanDigits(s.next(), 16, 8)
573 default:
574 s.error("invalid char escape")
575 }
576 return ch
577 }
578 579 func (s *Scanner) scanString(quote rune) (n int) {
580 ch := s.next() // read character after quote
581 for ch != quote {
582 if ch == '\n' || ch < 0 {
583 s.error("literal not terminated")
584 return
585 }
586 if ch == '\\' {
587 ch = s.scanEscape(quote)
588 } else {
589 ch = s.next()
590 }
591 n++
592 }
593 return
594 }
595 596 func (s *Scanner) scanRawString() {
597 ch := s.next() // read character after '`'
598 for ch != '`' {
599 if ch < 0 {
600 s.error("literal not terminated")
601 return
602 }
603 ch = s.next()
604 }
605 }
606 607 func (s *Scanner) scanChar() {
608 if s.scanString('\'') != 1 {
609 s.error("invalid char literal")
610 }
611 }
612 613 func (s *Scanner) scanComment(ch rune) rune {
614 // ch == '/' || ch == '*'
615 if ch == '/' {
616 // line comment
617 ch = s.next() // read character after "//"
618 for ch != '\n' && ch >= 0 {
619 ch = s.next()
620 }
621 return ch
622 }
623 624 // general comment
625 ch = s.next() // read character after "/*"
626 for {
627 if ch < 0 {
628 s.error("comment not terminated")
629 break
630 }
631 ch0 := ch
632 ch = s.next()
633 if ch0 == '*' && ch == '/' {
634 ch = s.next()
635 break
636 }
637 }
638 return ch
639 }
640 641 // Scan reads the next token or Unicode character from source and returns it.
642 // It only recognizes tokens t for which the respective [Scanner.Mode] bit (1<<-t) is set.
643 // It returns [EOF] at the end of the source. It reports scanner errors (read and
644 // token errors) by calling s.Error, if not nil; otherwise it prints an error
645 // message to [os.Stderr].
646 func (s *Scanner) Scan() rune {
647 ch := s.Peek()
648 649 // reset token text position
650 s.tokPos = -1
651 s.Line = 0
652 653 redo:
654 // skip white space
655 for s.Whitespace&(1<<uint(ch)) != 0 {
656 ch = s.next()
657 }
658 659 // start collecting token text
660 s.tokBuf.Reset()
661 s.tokPos = s.srcPos - s.lastCharLen
662 663 // set token position
664 // (this is a slightly optimized version of the code in Pos())
665 s.Offset = s.srcBufOffset + s.tokPos
666 if s.column > 0 {
667 // common case: last character was not a '\n'
668 s.Line = s.line
669 s.Column = s.column
670 } else {
671 // last character was a '\n'
672 // (we cannot be at the beginning of the source
673 // since we have called next() at least once)
674 s.Line = s.line - 1
675 s.Column = s.lastLineLen
676 }
677 678 // determine token value
679 tok := ch
680 switch {
681 case s.isIdentRune(ch, 0):
682 if s.Mode&ScanIdents != 0 {
683 tok = Ident
684 ch = s.scanIdentifier()
685 } else {
686 ch = s.next()
687 }
688 case isDecimal(ch):
689 if s.Mode&(ScanInts|ScanFloats) != 0 {
690 tok, ch = s.scanNumber(ch, false)
691 } else {
692 ch = s.next()
693 }
694 default:
695 switch ch {
696 case EOF:
697 break
698 case '"':
699 if s.Mode&ScanStrings != 0 {
700 s.scanString('"')
701 tok = String
702 }
703 ch = s.next()
704 case '\'':
705 if s.Mode&ScanChars != 0 {
706 s.scanChar()
707 tok = Char
708 }
709 ch = s.next()
710 case '.':
711 ch = s.next()
712 if isDecimal(ch) && s.Mode&ScanFloats != 0 {
713 tok, ch = s.scanNumber(ch, true)
714 }
715 case '/':
716 ch = s.next()
717 if (ch == '/' || ch == '*') && s.Mode&ScanComments != 0 {
718 if s.Mode&SkipComments != 0 {
719 s.tokPos = -1 // don't collect token text
720 ch = s.scanComment(ch)
721 goto redo
722 }
723 ch = s.scanComment(ch)
724 tok = Comment
725 }
726 case '`':
727 if s.Mode&ScanRawStrings != 0 {
728 s.scanRawString()
729 tok = RawString
730 }
731 ch = s.next()
732 default:
733 ch = s.next()
734 }
735 }
736 737 // end of token text
738 s.tokEnd = s.srcPos - s.lastCharLen
739 740 s.ch = ch
741 return tok
742 }
743 744 // Pos returns the position of the character immediately after
745 // the character or token returned by the last call to [Scanner.Next] or [Scanner.Scan].
746 // Use the [Scanner.Position] field for the start position of the most
747 // recently scanned token.
748 func (s *Scanner) Pos() (pos Position) {
749 pos.Filename = s.Filename
750 pos.Offset = s.srcBufOffset + s.srcPos - s.lastCharLen
751 switch {
752 case s.column > 0:
753 // common case: last character was not a '\n'
754 pos.Line = s.line
755 pos.Column = s.column
756 case s.lastLineLen > 0:
757 // last character was a '\n'
758 pos.Line = s.line - 1
759 pos.Column = s.lastLineLen
760 default:
761 // at the beginning of the source
762 pos.Line = 1
763 pos.Column = 1
764 }
765 return
766 }
767 768 // TokenText returns the string corresponding to the most recently scanned token.
769 // Valid after calling [Scanner.Scan] and in calls of [Scanner.Error].
770 func (s *Scanner) TokenText() []byte {
771 if s.tokPos < 0 {
772 // no token text
773 return ""
774 }
775 776 if s.tokEnd < s.tokPos {
777 // if EOF was reached, s.tokEnd is set to -1 (s.srcPos == 0)
778 s.tokEnd = s.tokPos
779 }
780 // s.tokEnd >= s.tokPos
781 782 if s.tokBuf.Len() == 0 {
783 // common case: the entire token text is still in srcBuf
784 return []byte(s.srcBuf[s.tokPos:s.tokEnd])
785 }
786 787 // part of the token text was saved in tokBuf: save the rest in
788 // tokBuf as well and return its content
789 s.tokBuf.Write(s.srcBuf[s.tokPos:s.tokEnd])
790 s.tokPos = s.tokEnd // ensure idempotency of TokenText() call
791 return s.tokBuf.String()
792 }
793