1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package fmt
6 7 import (
8 "errors"
9 "io"
10 "math"
11 "os"
12 "strconv"
13 "sync"
14 "unicode/utf8"
15 )
16 17 // ScanState represents the scanner state passed to custom scanners.
18 // Scanners may do rune-at-a-time scanning or ask the ScanState
19 // to discover the next space-delimited token.
20 type ScanState interface {
21 // ReadRune reads the next rune (Unicode code point) from the input.
22 // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
23 // return EOF after returning the first '\n' or when reading beyond
24 // the specified width.
25 ReadRune() (r rune, size int, err error)
26 // UnreadRune causes the next call to ReadRune to return the same rune.
27 UnreadRune() error
28 // SkipSpace skips space in the input. Newlines are treated appropriately
29 // for the operation being performed; see the package documentation
30 // for more information.
31 SkipSpace()
32 // Token skips space in the input if skipSpace is true, then returns the
33 // run of Unicode code points c satisfying f(c). If f is nil,
34 // !unicode.IsSpace(c) is used; that is, the token will hold non-space
35 // characters. Newlines are treated appropriately for the operation being
36 // performed; see the package documentation for more information.
37 // The returned slice points to shared data that may be overwritten
38 // by the next call to Token, a call to a Scan function using the ScanState
39 // as input, or when the calling Scan method returns.
40 Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
41 // Width returns the value of the width option and whether it has been set.
42 // The unit is Unicode code points.
43 Width() (wid int, ok bool)
44 // Because ReadRune is implemented by the interface, Read should never be
45 // called by the scanning routines and a valid implementation of
46 // ScanState may choose always to return an error from Read.
47 Read(buf []byte) (n int, err error)
48 }
49 50 // Scanner is implemented by any value that has a Scan method, which scans
51 // the input for the representation of a value and stores the result in the
52 // receiver, which must be a pointer to be useful. The Scan method is called
53 // for any argument to [Scan], [Scanf], or [Scanln] that implements it.
54 type Scanner interface {
55 Scan(state ScanState, verb rune) error
56 }
57 58 // Scan scans text read from standard input, storing successive
59 // space-separated values into successive arguments. Newlines count
60 // as space. It returns the number of items successfully scanned.
61 // If that is less than the number of arguments, err will report why.
62 func Scan(a ...any) (n int, err error) {
63 return Fscan(os.Stdin, a...)
64 }
65 66 // Scanln is similar to [Scan], but stops scanning at a newline and
67 // after the final item there must be a newline or EOF.
68 func Scanln(a ...any) (n int, err error) {
69 return Fscanln(os.Stdin, a...)
70 }
71 72 // Scanf scans text read from standard input, storing successive
73 // space-separated values into successive arguments as determined by
74 // the format. It returns the number of items successfully scanned.
75 // If that is less than the number of arguments, err will report why.
76 // Newlines in the input must match newlines in the format.
77 // The one exception: the verb %c always scans the next rune in the
78 // input, even if it is a space (or tab etc.) or newline.
79 func Scanf(format []byte, a ...any) (n int, err error) {
80 return Fscanf(os.Stdin, format, a...)
81 }
82 83 type stringReader []byte
84 85 func (r *stringReader) Read(b []byte) (n int, err error) {
86 n = copy(b, *r)
87 *r = (*r)[n:]
88 if n == 0 {
89 err = io.EOF
90 }
91 return
92 }
93 94 // Sscan scans the argument string, storing successive space-separated
95 // values into successive arguments. Newlines count as space. It
96 // returns the number of items successfully scanned. If that is less
97 // than the number of arguments, err will report why.
98 func Sscan(str []byte, a ...any) (n int, err error) {
99 return Fscan((*stringReader)(&str), a...)
100 }
101 102 // Sscanln is similar to [Sscan], but stops scanning at a newline and
103 // after the final item there must be a newline or EOF.
104 func Sscanln(str []byte, a ...any) (n int, err error) {
105 return Fscanln((*stringReader)(&str), a...)
106 }
107 108 // Sscanf scans the argument string, storing successive space-separated
109 // values into successive arguments as determined by the format. It
110 // returns the number of items successfully parsed.
111 // Newlines in the input must match newlines in the format.
112 func Sscanf(str []byte, format []byte, a ...any) (n int, err error) {
113 return Fscanf((*stringReader)(&str), format, a...)
114 }
115 116 // Fscan scans text read from r, storing successive space-separated
117 // values into successive arguments. Newlines count as space. It
118 // returns the number of items successfully scanned. If that is less
119 // than the number of arguments, err will report why.
120 func Fscan(r io.Reader, a ...any) (n int, err error) {
121 s, old := newScanState(r, true, false)
122 n, err = s.doScan(a)
123 s.free(old)
124 return
125 }
126 127 // Fscanln is similar to [Fscan], but stops scanning at a newline and
128 // after the final item there must be a newline or EOF.
129 func Fscanln(r io.Reader, a ...any) (n int, err error) {
130 s, old := newScanState(r, false, true)
131 n, err = s.doScan(a)
132 s.free(old)
133 return
134 }
135 136 // Fscanf scans text read from r, storing successive space-separated
137 // values into successive arguments as determined by the format. It
138 // returns the number of items successfully parsed.
139 // Newlines in the input must match newlines in the format.
140 func Fscanf(r io.Reader, format []byte, a ...any) (n int, err error) {
141 s, old := newScanState(r, false, false)
142 n, err = s.doScanf(format, a)
143 s.free(old)
144 return
145 }
146 147 // scanError represents an error generated by the scanning software.
148 // It's used as a unique signature to identify such errors when recovering.
149 type scanError struct {
150 err error
151 }
152 153 const eof = -1
154 155 // ss is the internal implementation of ScanState.
156 type ss struct {
157 rs io.RuneScanner // where to read input
158 buf buffer // token accumulator
159 count int // runes consumed so far.
160 atEOF bool // already read EOF
161 ssave
162 }
163 164 // ssave holds the parts of ss that need to be
165 // saved and restored on recursive scans.
166 type ssave struct {
167 validSave bool // is or was a part of an actual ss.
168 nlIsEnd bool // whether newline terminates scan
169 nlIsSpace bool // whether newline counts as white space
170 argLimit int // max value of ss.count for this arg; argLimit <= limit
171 limit int // max value of ss.count.
172 maxWid int // width of this arg.
173 }
174 175 // The Read method is only in ScanState so that ScanState
176 // satisfies io.Reader. It will never be called when used as
177 // intended, so there is no need to make it actually work.
178 func (s *ss) Read(buf []byte) (n int, err error) {
179 return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
180 }
181 182 func (s *ss) ReadRune() (r rune, size int, err error) {
183 if s.atEOF || s.count >= s.argLimit {
184 err = io.EOF
185 return
186 }
187 188 r, size, err = s.rs.ReadRune()
189 if err == nil {
190 s.count++
191 if s.nlIsEnd && r == '\n' {
192 s.atEOF = true
193 }
194 } else if err == io.EOF {
195 s.atEOF = true
196 }
197 return
198 }
199 200 func (s *ss) Width() (wid int, ok bool) {
201 if s.maxWid == hugeWid {
202 return 0, false
203 }
204 return s.maxWid, true
205 }
206 207 // The public method returns an error; this private one panics.
208 // If getRune reaches EOF, the return value is EOF (-1).
209 func (s *ss) getRune() (r rune) {
210 r, _, err := s.ReadRune()
211 if err != nil {
212 if err == io.EOF {
213 return eof
214 }
215 s.error(err)
216 }
217 return
218 }
219 220 // mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
221 // It is called in cases such as string scanning where an EOF is a
222 // syntax error.
223 func (s *ss) mustReadRune() (r rune) {
224 r = s.getRune()
225 if r == eof {
226 s.error(io.ErrUnexpectedEOF)
227 }
228 return
229 }
230 231 func (s *ss) UnreadRune() error {
232 s.rs.UnreadRune()
233 s.atEOF = false
234 s.count--
235 return nil
236 }
237 238 func (s *ss) error(err error) {
239 panic(scanError{err})
240 }
241 242 func (s *ss) errorString(err []byte) {
243 panic(scanError{errors.New(err)})
244 }
245 246 func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
247 defer func() {
248 if e := recover(); e != nil {
249 if se, ok := e.(scanError); ok {
250 err = se.err
251 } else {
252 panic(e)
253 }
254 }
255 }()
256 if f == nil {
257 f = notSpace
258 }
259 s.buf = s.buf[:0]
260 tok = s.token(skipSpace, f)
261 return
262 }
263 264 // space is a copy of the unicode.White_Space ranges,
265 // to avoid depending on package unicode.
266 var space = [][2]uint16{
267 {0x0009, 0x000d},
268 {0x0020, 0x0020},
269 {0x0085, 0x0085},
270 {0x00a0, 0x00a0},
271 {0x1680, 0x1680},
272 {0x2000, 0x200a},
273 {0x2028, 0x2029},
274 {0x202f, 0x202f},
275 {0x205f, 0x205f},
276 {0x3000, 0x3000},
277 }
278 279 func isSpace(r rune) bool {
280 if r >= 1<<16 {
281 return false
282 }
283 rx := uint16(r)
284 for _, rng := range space {
285 if rx < rng[0] {
286 return false
287 }
288 if rx <= rng[1] {
289 return true
290 }
291 }
292 return false
293 }
294 295 // notSpace is the default scanning function used in Token.
296 func notSpace(r rune) bool {
297 return !isSpace(r)
298 }
299 300 // readRune is a structure to enable reading UTF-8 encoded code points
301 // from an io.Reader. It is used if the Reader given to the scanner does
302 // not already implement io.RuneScanner.
303 type readRune struct {
304 reader io.Reader
305 buf [utf8.UTFMax]byte // used only inside ReadRune
306 pending int // number of bytes in pendBuf; only >0 for bad UTF-8
307 pendBuf [utf8.UTFMax]byte // bytes left over
308 peekRune rune // if >=0 next rune; when <0 is ^(previous Rune)
309 }
310 311 // readByte returns the next byte from the input, which may be
312 // left over from a previous read if the UTF-8 was ill-formed.
313 func (r *readRune) readByte() (b byte, err error) {
314 if r.pending > 0 {
315 b = r.pendBuf[0]
316 copy(r.pendBuf[0:], r.pendBuf[1:])
317 r.pending--
318 return
319 }
320 n, err := io.ReadFull(r.reader, r.pendBuf[:1])
321 if n != 1 {
322 return 0, err
323 }
324 return r.pendBuf[0], err
325 }
326 327 // ReadRune returns the next UTF-8 encoded code point from the
328 // io.Reader inside r.
329 func (r *readRune) ReadRune() (rr rune, size int, err error) {
330 if r.peekRune >= 0 {
331 rr = r.peekRune
332 r.peekRune = ^r.peekRune
333 size = utf8.RuneLen(rr)
334 return
335 }
336 r.buf[0], err = r.readByte()
337 if err != nil {
338 return
339 }
340 if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
341 rr = rune(r.buf[0])
342 size = 1 // Known to be 1.
343 // Flip the bits of the rune so it's available to UnreadRune.
344 r.peekRune = ^rr
345 return
346 }
347 var n int
348 for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
349 r.buf[n], err = r.readByte()
350 if err != nil {
351 if err == io.EOF {
352 err = nil
353 break
354 }
355 return
356 }
357 }
358 rr, size = utf8.DecodeRune(r.buf[:n])
359 if size < n { // an error, save the bytes for the next read
360 copy(r.pendBuf[r.pending:], r.buf[size:n])
361 r.pending += n - size
362 }
363 // Flip the bits of the rune so it's available to UnreadRune.
364 r.peekRune = ^rr
365 return
366 }
367 368 func (r *readRune) UnreadRune() error {
369 if r.peekRune >= 0 {
370 return errors.New("fmt: scanning called UnreadRune with no rune available")
371 }
372 // Reverse bit flip of previously read rune to obtain valid >=0 state.
373 r.peekRune = ^r.peekRune
374 return nil
375 }
376 377 var ssFree = sync.Pool{
378 New: func() any { return &ss{} },
379 }
380 381 // newScanState allocates a new ss struct or grab a cached one.
382 func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
383 s = ssFree.Get().(*ss)
384 if rs, ok := r.(io.RuneScanner); ok {
385 s.rs = rs
386 } else {
387 s.rs = &readRune{reader: r, peekRune: -1}
388 }
389 s.nlIsSpace = nlIsSpace
390 s.nlIsEnd = nlIsEnd
391 s.atEOF = false
392 s.limit = hugeWid
393 s.argLimit = hugeWid
394 s.maxWid = hugeWid
395 s.validSave = true
396 s.count = 0
397 return
398 }
399 400 // free saves used ss structs in ssFree; avoid an allocation per invocation.
401 func (s *ss) free(old ssave) {
402 // If it was used recursively, just restore the old state.
403 if old.validSave {
404 s.ssave = old
405 return
406 }
407 // Don't hold on to ss structs with large buffers.
408 if cap(s.buf) > 1024 {
409 return
410 }
411 s.buf = s.buf[:0]
412 s.rs = nil
413 ssFree.Put(s)
414 }
415 416 // SkipSpace provides Scan methods the ability to skip space and newline
417 // characters in keeping with the current scanning mode set by format strings
418 // and [Scan]/[Scanln].
419 func (s *ss) SkipSpace() {
420 for {
421 r := s.getRune()
422 if r == eof {
423 return
424 }
425 if r == '\r' && s.peek("\n") {
426 continue
427 }
428 if r == '\n' {
429 if s.nlIsSpace {
430 continue
431 }
432 s.errorString("unexpected newline")
433 return
434 }
435 if !isSpace(r) {
436 s.UnreadRune()
437 break
438 }
439 }
440 }
441 442 // token returns the next space-delimited string from the input. It
443 // skips white space. For Scanln, it stops at newlines. For Scan,
444 // newlines are treated as spaces.
445 func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
446 if skipSpace {
447 s.SkipSpace()
448 }
449 // read until white space or newline
450 for {
451 r := s.getRune()
452 if r == eof {
453 break
454 }
455 if !f(r) {
456 s.UnreadRune()
457 break
458 }
459 s.buf.writeRune(r)
460 }
461 return s.buf
462 }
463 464 var errComplex = errors.New("syntax error scanning complex number")
465 var errBool = errors.New("syntax error scanning boolean")
466 467 func indexRune(s []byte, r rune) int {
468 for i := 0; i < len(s); {
469 c, w := utf8.DecodeRune(s[i:])
470 if c == r {
471 return i
472 }
473 i += w
474 }
475 return -1
476 }
477 478 // consume reads the next rune in the input and reports whether it is in the ok string.
479 // If accept is true, it puts the character into the input token.
480 func (s *ss) consume(ok []byte, accept bool) bool {
481 r := s.getRune()
482 if r == eof {
483 return false
484 }
485 if indexRune(ok, r) >= 0 {
486 if accept {
487 s.buf.writeRune(r)
488 }
489 return true
490 }
491 if r != eof && accept {
492 s.UnreadRune()
493 }
494 return false
495 }
496 497 // peek reports whether the next character is in the ok string, without consuming it.
498 func (s *ss) peek(ok []byte) bool {
499 r := s.getRune()
500 if r != eof {
501 s.UnreadRune()
502 }
503 return indexRune(ok, r) >= 0
504 }
505 506 func (s *ss) notEOF() {
507 // Guarantee there is data to be read.
508 if r := s.getRune(); r == eof {
509 panic(io.EOF)
510 }
511 s.UnreadRune()
512 }
513 514 // accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
515 // buffer and returns true. Otherwise it return false.
516 func (s *ss) accept(ok []byte) bool {
517 return s.consume(ok, true)
518 }
519 520 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
521 func (s *ss) okVerb(verb rune, okVerbs, typ []byte) bool {
522 for i := 0; i < len(okVerbs); {
523 v, w := utf8.DecodeRune(okVerbs[i:])
524 if v == verb {
525 return true
526 }
527 i += w
528 }
529 var vbuf [utf8.UTFMax]byte
530 vn := utf8.EncodeRune(vbuf[:], verb)
531 s.errorString("bad verb '%" + vbuf[:vn] + "' for " + typ)
532 return false
533 }
534 535 // scanBool returns the value of the boolean represented by the next token.
536 func (s *ss) scanBool(verb rune) bool {
537 s.SkipSpace()
538 s.notEOF()
539 if !s.okVerb(verb, "tv", "boolean") {
540 return false
541 }
542 // Syntax-checking a boolean is annoying. We're not fastidious about case.
543 switch s.getRune() {
544 case '0':
545 return false
546 case '1':
547 return true
548 case 't', 'T':
549 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
550 s.error(errBool)
551 }
552 return true
553 case 'f', 'F':
554 if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
555 s.error(errBool)
556 }
557 return false
558 }
559 return false
560 }
561 562 // Numerical elements
563 const (
564 binaryDigits = "01"
565 octalDigits = "01234567"
566 decimalDigits = "0123456789"
567 hexadecimalDigits = "0123456789aAbBcCdDeEfF"
568 sign = "+-"
569 period = "."
570 exponent = "eEpP"
571 )
572 573 // getBase returns the numeric base represented by the verb and its digit string.
574 func (s *ss) getBase(verb rune) (base int, digits []byte) {
575 s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
576 base = 10
577 digits = decimalDigits
578 switch verb {
579 case 'b':
580 base = 2
581 digits = binaryDigits
582 case 'o':
583 base = 8
584 digits = octalDigits
585 case 'x', 'X', 'U':
586 base = 16
587 digits = hexadecimalDigits
588 }
589 return
590 }
591 592 // scanNumber returns the numerical string with specified digits starting here.
593 func (s *ss) scanNumber(digits []byte, haveDigits bool) []byte {
594 if !haveDigits {
595 s.notEOF()
596 if !s.accept(digits) {
597 s.errorString("expected integer")
598 }
599 }
600 for s.accept(digits) {
601 }
602 return []byte(s.buf)
603 }
604 605 // scanRune returns the next rune value in the input.
606 func (s *ss) scanRune(bitSize int) int64 {
607 s.notEOF()
608 r := s.getRune()
609 n := uint(bitSize)
610 x := (int64(r) << (64 - n)) >> (64 - n)
611 if x != int64(r) {
612 var rbuf [utf8.UTFMax]byte
613 rn := utf8.EncodeRune(rbuf[:], rune(r))
614 s.errorString("overflow on character value " + rbuf[:rn])
615 }
616 return int64(r)
617 }
618 619 // scanBasePrefix reports whether the integer begins with a base prefix
620 // and returns the base, digit string, and whether a zero was found.
621 // It is called only if the verb is %v.
622 func (s *ss) scanBasePrefix() (base int, digits []byte, zeroFound bool) {
623 if !s.peek("0") {
624 return 0, decimalDigits + "_", false
625 }
626 s.accept("0")
627 // Special cases for 0, 0b, 0o, 0x.
628 switch {
629 case s.peek("bB"):
630 s.consume("bB", true)
631 return 0, binaryDigits + "_", true
632 case s.peek("oO"):
633 s.consume("oO", true)
634 return 0, octalDigits + "_", true
635 case s.peek("xX"):
636 s.consume("xX", true)
637 return 0, hexadecimalDigits + "_", true
638 default:
639 return 0, octalDigits + "_", true
640 }
641 }
642 643 // scanInt returns the value of the integer represented by the next
644 // token, checking for overflow. Any error is stored in s.err.
645 func (s *ss) scanInt(verb rune, bitSize int) int64 {
646 if verb == 'c' {
647 return s.scanRune(bitSize)
648 }
649 s.SkipSpace()
650 s.notEOF()
651 base, digits := s.getBase(verb)
652 haveDigits := false
653 if verb == 'U' {
654 if !s.consume("U", false) || !s.consume("+", false) {
655 s.errorString("bad unicode format ")
656 }
657 } else {
658 s.accept(sign) // If there's a sign, it will be left in the token buffer.
659 if verb == 'v' {
660 base, digits, haveDigits = s.scanBasePrefix()
661 }
662 }
663 tok := s.scanNumber(digits, haveDigits)
664 i, err := strconv.ParseInt(tok, base, 64)
665 if err != nil {
666 s.error(err)
667 }
668 n := uint(bitSize)
669 x := (i << (64 - n)) >> (64 - n)
670 if x != i {
671 s.errorString("integer overflow on token " + tok)
672 }
673 return i
674 }
675 676 // scanUint returns the value of the unsigned integer represented
677 // by the next token, checking for overflow. Any error is stored in s.err.
678 func (s *ss) scanUint(verb rune, bitSize int) uint64 {
679 if verb == 'c' {
680 return uint64(s.scanRune(bitSize))
681 }
682 s.SkipSpace()
683 s.notEOF()
684 base, digits := s.getBase(verb)
685 haveDigits := false
686 if verb == 'U' {
687 if !s.consume("U", false) || !s.consume("+", false) {
688 s.errorString("bad unicode format ")
689 }
690 } else if verb == 'v' {
691 base, digits, haveDigits = s.scanBasePrefix()
692 }
693 tok := s.scanNumber(digits, haveDigits)
694 i, err := strconv.ParseUint(tok, base, 64)
695 if err != nil {
696 s.error(err)
697 }
698 n := uint(bitSize)
699 x := (i << (64 - n)) >> (64 - n)
700 if x != i {
701 s.errorString("unsigned integer overflow on token " + tok)
702 }
703 return i
704 }
705 706 // floatToken returns the floating-point number starting here, no longer than swid
707 // if the width is specified. It's not rigorous about syntax because it doesn't check that
708 // we have at least some digits, but Atof will do that.
709 func (s *ss) floatToken() []byte {
710 s.buf = s.buf[:0]
711 // NaN?
712 if s.accept("nN") && s.accept("aA") && s.accept("nN") {
713 return []byte(s.buf)
714 }
715 // leading sign?
716 s.accept(sign)
717 // Inf?
718 if s.accept("iI") && s.accept("nN") && s.accept("fF") {
719 return []byte(s.buf)
720 }
721 digits := decimalDigits + "_"
722 exp := exponent
723 if s.accept("0") && s.accept("xX") {
724 digits = hexadecimalDigits + "_"
725 exp = "pP"
726 }
727 // digits?
728 for s.accept(digits) {
729 }
730 // decimal point?
731 if s.accept(period) {
732 // fraction?
733 for s.accept(digits) {
734 }
735 }
736 // exponent?
737 if s.accept(exp) {
738 // leading sign?
739 s.accept(sign)
740 // digits?
741 for s.accept(decimalDigits + "_") {
742 }
743 }
744 return []byte(s.buf)
745 }
746 747 // complexTokens returns the real and imaginary parts of the complex number starting here.
748 // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
749 // number and there are no spaces within.
750 func (s *ss) complexTokens() (real, imag []byte) {
751 // TODO: accept N and Ni independently?
752 parens := s.accept("(")
753 real = s.floatToken()
754 s.buf = s.buf[:0]
755 // Must now have a sign.
756 if !s.accept("+-") {
757 s.error(errComplex)
758 }
759 // Sign is now in buffer
760 imagSign := []byte(s.buf)
761 imag = s.floatToken()
762 if !s.accept("i") {
763 s.error(errComplex)
764 }
765 if parens && !s.accept(")") {
766 s.error(errComplex)
767 }
768 return real, imagSign + imag
769 }
770 771 func hasX(s []byte) bool {
772 for i := 0; i < len(s); i++ {
773 if s[i] == 'x' || s[i] == 'X' {
774 return true
775 }
776 }
777 return false
778 }
779 780 // convertFloat converts the string to a float64value.
781 func (s *ss) convertFloat(str []byte, n int) float64 {
782 // strconv.ParseFloat will handle "+0x1.fp+2",
783 // but we have to implement our non-standard
784 // decimal+binary exponent mix (1.2p4) ourselves.
785 if p := indexRune(str, 'p'); p >= 0 && !hasX(str) {
786 // Atof doesn't handle power-of-2 exponents,
787 // but they're easy to evaluate.
788 f, err := strconv.ParseFloat(str[:p], n)
789 if err != nil {
790 // Put full string into error.
791 if e, ok := err.(*strconv.NumError); ok {
792 e.Num = str
793 }
794 s.error(err)
795 }
796 m, err := strconv.Atoi(str[p+1:])
797 if err != nil {
798 // Put full string into error.
799 if e, ok := err.(*strconv.NumError); ok {
800 e.Num = str
801 }
802 s.error(err)
803 }
804 return math.Ldexp(f, m)
805 }
806 f, err := strconv.ParseFloat(str, n)
807 if err != nil {
808 s.error(err)
809 }
810 return f
811 }
812 813 // scanComplex converts the next token to a complex128 value.
814 // The atof argument is a type-specific reader for the underlying type.
815 // If we're reading complex64, atof will parse float32s and convert them
816 // to float64's to avoid reproducing this code for each complex type.
817 func (s *ss) scanComplex(verb rune, n int) (float64, float64) {
818 panic("moxie: complex numbers not supported")
819 }
820 821 // convertString returns the string represented by the next input characters.
822 // The format of the input is determined by the verb.
823 func (s *ss) convertString(verb rune) (str []byte) {
824 if !s.okVerb(verb, "svqxX", "string") {
825 return ""
826 }
827 s.SkipSpace()
828 s.notEOF()
829 switch verb {
830 case 'q':
831 str = s.quotedString()
832 case 'x', 'X':
833 str = s.hexString()
834 default:
835 str = []byte(s.token(true, notSpace)) // %s and %v just return the next word
836 }
837 return
838 }
839 840 // quotedString returns the double- or back-quoted string represented by the next input characters.
841 func (s *ss) quotedString() []byte {
842 s.notEOF()
843 quote := s.getRune()
844 switch quote {
845 case '`':
846 // Back-quoted: Anything goes until EOF or back quote.
847 for {
848 r := s.mustReadRune()
849 if r == quote {
850 break
851 }
852 s.buf.writeRune(r)
853 }
854 return []byte(s.buf)
855 case '"':
856 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
857 s.buf.writeByte('"')
858 for {
859 r := s.mustReadRune()
860 s.buf.writeRune(r)
861 if r == '\\' {
862 // In a legal backslash escape, no matter how long, only the character
863 // immediately after the escape can itself be a backslash or quote.
864 // Thus we only need to protect the first character after the backslash.
865 s.buf.writeRune(s.mustReadRune())
866 } else if r == '"' {
867 break
868 }
869 }
870 result, err := strconv.Unquote([]byte(s.buf))
871 if err != nil {
872 s.error(err)
873 }
874 return result
875 default:
876 s.errorString("expected quoted string")
877 }
878 return ""
879 }
880 881 // hexDigit returns the value of the hexadecimal digit.
882 func hexDigit(d rune) (int, bool) {
883 digit := int(d)
884 switch digit {
885 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
886 return digit - '0', true
887 case 'a', 'b', 'c', 'd', 'e', 'f':
888 return 10 + digit - 'a', true
889 case 'A', 'B', 'C', 'D', 'E', 'F':
890 return 10 + digit - 'A', true
891 }
892 return -1, false
893 }
894 895 // hexByte returns the next hex-encoded (two-character) byte from the input.
896 // It returns ok==false if the next bytes in the input do not encode a hex byte.
897 // If the first byte is hex and the second is not, processing stops.
898 func (s *ss) hexByte() (b byte, ok bool) {
899 rune1 := s.getRune()
900 if rune1 == eof {
901 return
902 }
903 value1, ok := hexDigit(rune1)
904 if !ok {
905 s.UnreadRune()
906 return
907 }
908 value2, ok := hexDigit(s.mustReadRune())
909 if !ok {
910 s.errorString("illegal hex digit")
911 return
912 }
913 return byte(value1<<4 | value2), true
914 }
915 916 // hexString returns the space-delimited hexpair-encoded string.
917 func (s *ss) hexString() []byte {
918 s.notEOF()
919 for {
920 b, ok := s.hexByte()
921 if !ok {
922 break
923 }
924 s.buf.writeByte(b)
925 }
926 if len(s.buf) == 0 {
927 s.errorString("no hex data for %x string")
928 return ""
929 }
930 return []byte(s.buf)
931 }
932 933 const (
934 floatVerbs = "beEfFgGv"
935 936 hugeWid = 1 << 30
937 938 intBits = 32 << (^uint(0) >> 63)
939 uintptrBits = 32 << (^uintptr(0) >> 63)
940 )
941 942 // scanPercent scans a literal percent character.
943 func (s *ss) scanPercent() {
944 s.SkipSpace()
945 s.notEOF()
946 if !s.accept("%") {
947 s.errorString("missing literal %")
948 }
949 }
950 951 // scanOne scans a single value, deriving the scanner from the type of the argument.
952 func (s *ss) scanOne(verb rune, arg any) {
953 s.buf = s.buf[:0]
954 var err error
955 // If the parameter has its own Scan method, use that.
956 if v, ok := arg.(Scanner); ok {
957 err = v.Scan(s, verb)
958 if err != nil {
959 if err == io.EOF {
960 err = io.ErrUnexpectedEOF
961 }
962 s.error(err)
963 }
964 return
965 }
966 967 switch v := arg.(type) {
968 case *bool:
969 *v = s.scanBool(verb)
970 // *complex64, *complex128 cases removed — not supported in moxie
971 case *int:
972 *v = int(s.scanInt(verb, 32))
973 case *int8:
974 *v = int8(s.scanInt(verb, 8))
975 case *int16:
976 *v = int16(s.scanInt(verb, 16))
977 case *int64:
978 *v = s.scanInt(verb, 64)
979 case *uint:
980 *v = uint(s.scanUint(verb, 32))
981 case *uint8:
982 *v = uint8(s.scanUint(verb, 8))
983 case *uint16:
984 *v = uint16(s.scanUint(verb, 16))
985 case *uint64:
986 *v = s.scanUint(verb, 64)
987 // Floats are tricky because you want to scan in the precision of the result, not
988 // scan in high precision and convert, in order to preserve the correct error condition.
989 case *float32:
990 if s.okVerb(verb, floatVerbs, "float32") {
991 s.SkipSpace()
992 s.notEOF()
993 *v = float32(s.convertFloat(s.floatToken(), 32))
994 }
995 case *float64:
996 if s.okVerb(verb, floatVerbs, "float64") {
997 s.SkipSpace()
998 s.notEOF()
999 *v = s.convertFloat(s.floatToken(), 64)
1000 }
1001 case *[]byte:
1002 // We scan to string and convert so we get a copy of the data.
1003 // If we scanned to bytes, the slice would point at the buffer.
1004 *v = []byte(s.convertString(verb))
1005 default:
1006 s.errorString("can't scan type: unsupported")
1007 }
1008 }
1009 1010 // errorHandler turns local panics into error returns.
1011 func errorHandler(errp *error) {
1012 if e := recover(); e != nil {
1013 if se, ok := e.(scanError); ok { // catch local error
1014 *errp = se.err
1015 } else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
1016 *errp = eof
1017 } else {
1018 panic(e)
1019 }
1020 }
1021 }
1022 1023 // doScan does the real work for scanning without a format string.
1024 func (s *ss) doScan(a []any) (numProcessed int, err error) {
1025 defer errorHandler(&err)
1026 for _, arg := range a {
1027 s.scanOne('v', arg)
1028 numProcessed++
1029 }
1030 // Check for newline (or EOF) if required (Scanln etc.).
1031 if s.nlIsEnd {
1032 for {
1033 r := s.getRune()
1034 if r == '\n' || r == eof {
1035 break
1036 }
1037 if !isSpace(r) {
1038 s.errorString("expected newline")
1039 break
1040 }
1041 }
1042 }
1043 return
1044 }
1045 1046 // advance determines whether the next characters in the input match
1047 // those of the format. It returns the number of bytes (sic) consumed
1048 // in the format. All runs of space characters in either input or
1049 // format behave as a single space. Newlines are special, though:
1050 // newlines in the format must match those in the input and vice versa.
1051 // This routine also handles the %% case. If the return value is zero,
1052 // either format starts with a % (with no following %) or the input
1053 // is empty. If it is negative, the input did not match the string.
1054 func (s *ss) advance(format []byte) (i int) {
1055 for i < len(format) {
1056 fmtc, w := utf8.DecodeRuneInString(format[i:])
1057 1058 // Space processing.
1059 // In the rest of this comment "space" means spaces other than newline.
1060 // Newline in the format matches input of zero or more spaces and then newline or end-of-input.
1061 // Spaces in the format before the newline are collapsed into the newline.
1062 // Spaces in the format after the newline match zero or more spaces after the corresponding input newline.
1063 // Other spaces in the format match input of one or more spaces or end-of-input.
1064 if isSpace(fmtc) {
1065 newlines := 0
1066 trailingSpace := false
1067 for isSpace(fmtc) && i < len(format) {
1068 if fmtc == '\n' {
1069 newlines++
1070 trailingSpace = false
1071 } else {
1072 trailingSpace = true
1073 }
1074 i += w
1075 fmtc, w = utf8.DecodeRuneInString(format[i:])
1076 }
1077 for j := 0; j < newlines; j++ {
1078 inputc := s.getRune()
1079 for isSpace(inputc) && inputc != '\n' {
1080 inputc = s.getRune()
1081 }
1082 if inputc != '\n' && inputc != eof {
1083 s.errorString("newline in format does not match input")
1084 }
1085 }
1086 if trailingSpace {
1087 inputc := s.getRune()
1088 if newlines == 0 {
1089 // If the trailing space stood alone (did not follow a newline),
1090 // it must find at least one space to consume.
1091 if !isSpace(inputc) && inputc != eof {
1092 s.errorString("expected space in input to match format")
1093 }
1094 if inputc == '\n' {
1095 s.errorString("newline in input does not match format")
1096 }
1097 }
1098 for isSpace(inputc) && inputc != '\n' {
1099 inputc = s.getRune()
1100 }
1101 if inputc != eof {
1102 s.UnreadRune()
1103 }
1104 }
1105 continue
1106 }
1107 1108 // Verbs.
1109 if fmtc == '%' {
1110 // % at end of string is an error.
1111 if i+w == len(format) {
1112 s.errorString("missing verb: % at end of format string")
1113 }
1114 // %% acts like a real percent
1115 nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
1116 if nextc != '%' {
1117 return
1118 }
1119 i += w // skip the first %
1120 }
1121 1122 // Literals.
1123 inputc := s.mustReadRune()
1124 if fmtc != inputc {
1125 s.UnreadRune()
1126 return -1
1127 }
1128 i += w
1129 }
1130 return
1131 }
1132 1133 // doScanf does the real work when scanning with a format string.
1134 // At the moment, it handles only pointers to basic types.
1135 func (s *ss) doScanf(format []byte, a []any) (numProcessed int, err error) {
1136 defer errorHandler(&err)
1137 end := len(format) - 1
1138 // We process one item per non-trivial format
1139 for i := 0; i <= end; {
1140 w := s.advance(format[i:])
1141 if w > 0 {
1142 i += w
1143 continue
1144 }
1145 // Either we failed to advance, we have a percent character, or we ran out of input.
1146 if format[i] != '%' {
1147 // Can't advance format. Why not?
1148 if w < 0 {
1149 s.errorString("input does not match format")
1150 }
1151 // Otherwise at EOF; "too many operands" error handled below
1152 break
1153 }
1154 i++ // % is one byte
1155 1156 // do we have 20 (width)?
1157 var widPresent bool
1158 s.maxWid, widPresent, i = parsenum(format, i, end)
1159 if !widPresent {
1160 s.maxWid = hugeWid
1161 }
1162 1163 c, w := utf8.DecodeRuneInString(format[i:])
1164 i += w
1165 1166 if c != 'c' {
1167 s.SkipSpace()
1168 }
1169 if c == '%' {
1170 s.scanPercent()
1171 continue // Do not consume an argument.
1172 }
1173 s.argLimit = s.limit
1174 if f := s.count + s.maxWid; f < s.argLimit {
1175 s.argLimit = f
1176 }
1177 1178 if numProcessed >= len(a) { // out of operands
1179 s.errorString("too few operands for format '%" + format[i-w:] + "'")
1180 break
1181 }
1182 arg := a[numProcessed]
1183 1184 s.scanOne(c, arg)
1185 numProcessed++
1186 s.argLimit = s.limit
1187 }
1188 if numProcessed < len(a) {
1189 s.errorString("too many operands")
1190 }
1191 return
1192 }
1193