scan.mx raw

   1  // Copyright 2010 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package fmt
   6  
   7  import (
   8  	"errors"
   9  	"io"
  10  	"math"
  11  	"os"
  12  	"strconv"
  13  	"sync"
  14  	"unicode/utf8"
  15  )
  16  
  17  // ScanState represents the scanner state passed to custom scanners.
  18  // Scanners may do rune-at-a-time scanning or ask the ScanState
  19  // to discover the next space-delimited token.
  20  type ScanState interface {
  21  	// ReadRune reads the next rune (Unicode code point) from the input.
  22  	// If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
  23  	// return EOF after returning the first '\n' or when reading beyond
  24  	// the specified width.
  25  	ReadRune() (r rune, size int, err error)
  26  	// UnreadRune causes the next call to ReadRune to return the same rune.
  27  	UnreadRune() error
  28  	// SkipSpace skips space in the input. Newlines are treated appropriately
  29  	// for the operation being performed; see the package documentation
  30  	// for more information.
  31  	SkipSpace()
  32  	// Token skips space in the input if skipSpace is true, then returns the
  33  	// run of Unicode code points c satisfying f(c).  If f is nil,
  34  	// !unicode.IsSpace(c) is used; that is, the token will hold non-space
  35  	// characters. Newlines are treated appropriately for the operation being
  36  	// performed; see the package documentation for more information.
  37  	// The returned slice points to shared data that may be overwritten
  38  	// by the next call to Token, a call to a Scan function using the ScanState
  39  	// as input, or when the calling Scan method returns.
  40  	Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
  41  	// Width returns the value of the width option and whether it has been set.
  42  	// The unit is Unicode code points.
  43  	Width() (wid int, ok bool)
  44  	// Because ReadRune is implemented by the interface, Read should never be
  45  	// called by the scanning routines and a valid implementation of
  46  	// ScanState may choose always to return an error from Read.
  47  	Read(buf []byte) (n int, err error)
  48  }
  49  
  50  // Scanner is implemented by any value that has a Scan method, which scans
  51  // the input for the representation of a value and stores the result in the
  52  // receiver, which must be a pointer to be useful. The Scan method is called
  53  // for any argument to [Scan], [Scanf], or [Scanln] that implements it.
  54  type Scanner interface {
  55  	Scan(state ScanState, verb rune) error
  56  }
  57  
  58  // Scan scans text read from standard input, storing successive
  59  // space-separated values into successive arguments. Newlines count
  60  // as space. It returns the number of items successfully scanned.
  61  // If that is less than the number of arguments, err will report why.
  62  func Scan(a ...any) (n int, err error) {
  63  	return Fscan(os.Stdin, a...)
  64  }
  65  
  66  // Scanln is similar to [Scan], but stops scanning at a newline and
  67  // after the final item there must be a newline or EOF.
  68  func Scanln(a ...any) (n int, err error) {
  69  	return Fscanln(os.Stdin, a...)
  70  }
  71  
  72  // Scanf scans text read from standard input, storing successive
  73  // space-separated values into successive arguments as determined by
  74  // the format. It returns the number of items successfully scanned.
  75  // If that is less than the number of arguments, err will report why.
  76  // Newlines in the input must match newlines in the format.
  77  // The one exception: the verb %c always scans the next rune in the
  78  // input, even if it is a space (or tab etc.) or newline.
  79  func Scanf(format []byte, a ...any) (n int, err error) {
  80  	return Fscanf(os.Stdin, format, a...)
  81  }
  82  
  83  type stringReader []byte
  84  
  85  func (r *stringReader) Read(b []byte) (n int, err error) {
  86  	n = copy(b, *r)
  87  	*r = (*r)[n:]
  88  	if n == 0 {
  89  		err = io.EOF
  90  	}
  91  	return
  92  }
  93  
  94  // Sscan scans the argument string, storing successive space-separated
  95  // values into successive arguments. Newlines count as space. It
  96  // returns the number of items successfully scanned. If that is less
  97  // than the number of arguments, err will report why.
  98  func Sscan(str []byte, a ...any) (n int, err error) {
  99  	return Fscan((*stringReader)(&str), a...)
 100  }
 101  
 102  // Sscanln is similar to [Sscan], but stops scanning at a newline and
 103  // after the final item there must be a newline or EOF.
 104  func Sscanln(str []byte, a ...any) (n int, err error) {
 105  	return Fscanln((*stringReader)(&str), a...)
 106  }
 107  
 108  // Sscanf scans the argument string, storing successive space-separated
 109  // values into successive arguments as determined by the format. It
 110  // returns the number of items successfully parsed.
 111  // Newlines in the input must match newlines in the format.
 112  func Sscanf(str []byte, format []byte, a ...any) (n int, err error) {
 113  	return Fscanf((*stringReader)(&str), format, a...)
 114  }
 115  
 116  // Fscan scans text read from r, storing successive space-separated
 117  // values into successive arguments. Newlines count as space. It
 118  // returns the number of items successfully scanned. If that is less
 119  // than the number of arguments, err will report why.
 120  func Fscan(r io.Reader, a ...any) (n int, err error) {
 121  	s, old := newScanState(r, true, false)
 122  	n, err = s.doScan(a)
 123  	s.free(old)
 124  	return
 125  }
 126  
 127  // Fscanln is similar to [Fscan], but stops scanning at a newline and
 128  // after the final item there must be a newline or EOF.
 129  func Fscanln(r io.Reader, a ...any) (n int, err error) {
 130  	s, old := newScanState(r, false, true)
 131  	n, err = s.doScan(a)
 132  	s.free(old)
 133  	return
 134  }
 135  
 136  // Fscanf scans text read from r, storing successive space-separated
 137  // values into successive arguments as determined by the format. It
 138  // returns the number of items successfully parsed.
 139  // Newlines in the input must match newlines in the format.
 140  func Fscanf(r io.Reader, format []byte, a ...any) (n int, err error) {
 141  	s, old := newScanState(r, false, false)
 142  	n, err = s.doScanf(format, a)
 143  	s.free(old)
 144  	return
 145  }
 146  
 147  // scanError represents an error generated by the scanning software.
 148  // It's used as a unique signature to identify such errors when recovering.
 149  type scanError struct {
 150  	err error
 151  }
 152  
 153  const eof = -1
 154  
 155  // ss is the internal implementation of ScanState.
 156  type ss struct {
 157  	rs    io.RuneScanner // where to read input
 158  	buf   buffer         // token accumulator
 159  	count int            // runes consumed so far.
 160  	atEOF bool           // already read EOF
 161  	ssave
 162  }
 163  
 164  // ssave holds the parts of ss that need to be
 165  // saved and restored on recursive scans.
 166  type ssave struct {
 167  	validSave bool // is or was a part of an actual ss.
 168  	nlIsEnd   bool // whether newline terminates scan
 169  	nlIsSpace bool // whether newline counts as white space
 170  	argLimit  int  // max value of ss.count for this arg; argLimit <= limit
 171  	limit     int  // max value of ss.count.
 172  	maxWid    int  // width of this arg.
 173  }
 174  
 175  // The Read method is only in ScanState so that ScanState
 176  // satisfies io.Reader. It will never be called when used as
 177  // intended, so there is no need to make it actually work.
 178  func (s *ss) Read(buf []byte) (n int, err error) {
 179  	return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
 180  }
 181  
 182  func (s *ss) ReadRune() (r rune, size int, err error) {
 183  	if s.atEOF || s.count >= s.argLimit {
 184  		err = io.EOF
 185  		return
 186  	}
 187  
 188  	r, size, err = s.rs.ReadRune()
 189  	if err == nil {
 190  		s.count++
 191  		if s.nlIsEnd && r == '\n' {
 192  			s.atEOF = true
 193  		}
 194  	} else if err == io.EOF {
 195  		s.atEOF = true
 196  	}
 197  	return
 198  }
 199  
 200  func (s *ss) Width() (wid int, ok bool) {
 201  	if s.maxWid == hugeWid {
 202  		return 0, false
 203  	}
 204  	return s.maxWid, true
 205  }
 206  
 207  // The public method returns an error; this private one panics.
 208  // If getRune reaches EOF, the return value is EOF (-1).
 209  func (s *ss) getRune() (r rune) {
 210  	r, _, err := s.ReadRune()
 211  	if err != nil {
 212  		if err == io.EOF {
 213  			return eof
 214  		}
 215  		s.error(err)
 216  	}
 217  	return
 218  }
 219  
 220  // mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
 221  // It is called in cases such as string scanning where an EOF is a
 222  // syntax error.
 223  func (s *ss) mustReadRune() (r rune) {
 224  	r = s.getRune()
 225  	if r == eof {
 226  		s.error(io.ErrUnexpectedEOF)
 227  	}
 228  	return
 229  }
 230  
 231  func (s *ss) UnreadRune() error {
 232  	s.rs.UnreadRune()
 233  	s.atEOF = false
 234  	s.count--
 235  	return nil
 236  }
 237  
 238  func (s *ss) error(err error) {
 239  	panic(scanError{err})
 240  }
 241  
 242  func (s *ss) errorString(err []byte) {
 243  	panic(scanError{errors.New(err)})
 244  }
 245  
 246  func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
 247  	defer func() {
 248  		if e := recover(); e != nil {
 249  			if se, ok := e.(scanError); ok {
 250  				err = se.err
 251  			} else {
 252  				panic(e)
 253  			}
 254  		}
 255  	}()
 256  	if f == nil {
 257  		f = notSpace
 258  	}
 259  	s.buf = s.buf[:0]
 260  	tok = s.token(skipSpace, f)
 261  	return
 262  }
 263  
 264  // space is a copy of the unicode.White_Space ranges,
 265  // to avoid depending on package unicode.
 266  var space = [][2]uint16{
 267  	{0x0009, 0x000d},
 268  	{0x0020, 0x0020},
 269  	{0x0085, 0x0085},
 270  	{0x00a0, 0x00a0},
 271  	{0x1680, 0x1680},
 272  	{0x2000, 0x200a},
 273  	{0x2028, 0x2029},
 274  	{0x202f, 0x202f},
 275  	{0x205f, 0x205f},
 276  	{0x3000, 0x3000},
 277  }
 278  
 279  func isSpace(r rune) bool {
 280  	if r >= 1<<16 {
 281  		return false
 282  	}
 283  	rx := uint16(r)
 284  	for _, rng := range space {
 285  		if rx < rng[0] {
 286  			return false
 287  		}
 288  		if rx <= rng[1] {
 289  			return true
 290  		}
 291  	}
 292  	return false
 293  }
 294  
 295  // notSpace is the default scanning function used in Token.
 296  func notSpace(r rune) bool {
 297  	return !isSpace(r)
 298  }
 299  
 300  // readRune is a structure to enable reading UTF-8 encoded code points
 301  // from an io.Reader. It is used if the Reader given to the scanner does
 302  // not already implement io.RuneScanner.
 303  type readRune struct {
 304  	reader   io.Reader
 305  	buf      [utf8.UTFMax]byte // used only inside ReadRune
 306  	pending  int               // number of bytes in pendBuf; only >0 for bad UTF-8
 307  	pendBuf  [utf8.UTFMax]byte // bytes left over
 308  	peekRune rune              // if >=0 next rune; when <0 is ^(previous Rune)
 309  }
 310  
 311  // readByte returns the next byte from the input, which may be
 312  // left over from a previous read if the UTF-8 was ill-formed.
 313  func (r *readRune) readByte() (b byte, err error) {
 314  	if r.pending > 0 {
 315  		b = r.pendBuf[0]
 316  		copy(r.pendBuf[0:], r.pendBuf[1:])
 317  		r.pending--
 318  		return
 319  	}
 320  	n, err := io.ReadFull(r.reader, r.pendBuf[:1])
 321  	if n != 1 {
 322  		return 0, err
 323  	}
 324  	return r.pendBuf[0], err
 325  }
 326  
 327  // ReadRune returns the next UTF-8 encoded code point from the
 328  // io.Reader inside r.
 329  func (r *readRune) ReadRune() (rr rune, size int, err error) {
 330  	if r.peekRune >= 0 {
 331  		rr = r.peekRune
 332  		r.peekRune = ^r.peekRune
 333  		size = utf8.RuneLen(rr)
 334  		return
 335  	}
 336  	r.buf[0], err = r.readByte()
 337  	if err != nil {
 338  		return
 339  	}
 340  	if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
 341  		rr = rune(r.buf[0])
 342  		size = 1 // Known to be 1.
 343  		// Flip the bits of the rune so it's available to UnreadRune.
 344  		r.peekRune = ^rr
 345  		return
 346  	}
 347  	var n int
 348  	for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
 349  		r.buf[n], err = r.readByte()
 350  		if err != nil {
 351  			if err == io.EOF {
 352  				err = nil
 353  				break
 354  			}
 355  			return
 356  		}
 357  	}
 358  	rr, size = utf8.DecodeRune(r.buf[:n])
 359  	if size < n { // an error, save the bytes for the next read
 360  		copy(r.pendBuf[r.pending:], r.buf[size:n])
 361  		r.pending += n - size
 362  	}
 363  	// Flip the bits of the rune so it's available to UnreadRune.
 364  	r.peekRune = ^rr
 365  	return
 366  }
 367  
 368  func (r *readRune) UnreadRune() error {
 369  	if r.peekRune >= 0 {
 370  		return errors.New("fmt: scanning called UnreadRune with no rune available")
 371  	}
 372  	// Reverse bit flip of previously read rune to obtain valid >=0 state.
 373  	r.peekRune = ^r.peekRune
 374  	return nil
 375  }
 376  
 377  var ssFree = sync.Pool{
 378  	New: func() any { return &ss{} },
 379  }
 380  
 381  // newScanState allocates a new ss struct or grab a cached one.
 382  func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
 383  	s = ssFree.Get().(*ss)
 384  	if rs, ok := r.(io.RuneScanner); ok {
 385  		s.rs = rs
 386  	} else {
 387  		s.rs = &readRune{reader: r, peekRune: -1}
 388  	}
 389  	s.nlIsSpace = nlIsSpace
 390  	s.nlIsEnd = nlIsEnd
 391  	s.atEOF = false
 392  	s.limit = hugeWid
 393  	s.argLimit = hugeWid
 394  	s.maxWid = hugeWid
 395  	s.validSave = true
 396  	s.count = 0
 397  	return
 398  }
 399  
 400  // free saves used ss structs in ssFree; avoid an allocation per invocation.
 401  func (s *ss) free(old ssave) {
 402  	// If it was used recursively, just restore the old state.
 403  	if old.validSave {
 404  		s.ssave = old
 405  		return
 406  	}
 407  	// Don't hold on to ss structs with large buffers.
 408  	if cap(s.buf) > 1024 {
 409  		return
 410  	}
 411  	s.buf = s.buf[:0]
 412  	s.rs = nil
 413  	ssFree.Put(s)
 414  }
 415  
 416  // SkipSpace provides Scan methods the ability to skip space and newline
 417  // characters in keeping with the current scanning mode set by format strings
 418  // and [Scan]/[Scanln].
 419  func (s *ss) SkipSpace() {
 420  	for {
 421  		r := s.getRune()
 422  		if r == eof {
 423  			return
 424  		}
 425  		if r == '\r' && s.peek("\n") {
 426  			continue
 427  		}
 428  		if r == '\n' {
 429  			if s.nlIsSpace {
 430  				continue
 431  			}
 432  			s.errorString("unexpected newline")
 433  			return
 434  		}
 435  		if !isSpace(r) {
 436  			s.UnreadRune()
 437  			break
 438  		}
 439  	}
 440  }
 441  
 442  // token returns the next space-delimited string from the input. It
 443  // skips white space. For Scanln, it stops at newlines. For Scan,
 444  // newlines are treated as spaces.
 445  func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
 446  	if skipSpace {
 447  		s.SkipSpace()
 448  	}
 449  	// read until white space or newline
 450  	for {
 451  		r := s.getRune()
 452  		if r == eof {
 453  			break
 454  		}
 455  		if !f(r) {
 456  			s.UnreadRune()
 457  			break
 458  		}
 459  		s.buf.writeRune(r)
 460  	}
 461  	return s.buf
 462  }
 463  
 464  var errComplex = errors.New("syntax error scanning complex number")
 465  var errBool = errors.New("syntax error scanning boolean")
 466  
 467  func indexRune(s []byte, r rune) int {
 468  	for i := 0; i < len(s); {
 469  		c, w := utf8.DecodeRune(s[i:])
 470  		if c == r {
 471  			return i
 472  		}
 473  		i += w
 474  	}
 475  	return -1
 476  }
 477  
 478  // consume reads the next rune in the input and reports whether it is in the ok string.
 479  // If accept is true, it puts the character into the input token.
 480  func (s *ss) consume(ok []byte, accept bool) bool {
 481  	r := s.getRune()
 482  	if r == eof {
 483  		return false
 484  	}
 485  	if indexRune(ok, r) >= 0 {
 486  		if accept {
 487  			s.buf.writeRune(r)
 488  		}
 489  		return true
 490  	}
 491  	if r != eof && accept {
 492  		s.UnreadRune()
 493  	}
 494  	return false
 495  }
 496  
 497  // peek reports whether the next character is in the ok string, without consuming it.
 498  func (s *ss) peek(ok []byte) bool {
 499  	r := s.getRune()
 500  	if r != eof {
 501  		s.UnreadRune()
 502  	}
 503  	return indexRune(ok, r) >= 0
 504  }
 505  
 506  func (s *ss) notEOF() {
 507  	// Guarantee there is data to be read.
 508  	if r := s.getRune(); r == eof {
 509  		panic(io.EOF)
 510  	}
 511  	s.UnreadRune()
 512  }
 513  
 514  // accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
 515  // buffer and returns true. Otherwise it return false.
 516  func (s *ss) accept(ok []byte) bool {
 517  	return s.consume(ok, true)
 518  }
 519  
 520  // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
 521  func (s *ss) okVerb(verb rune, okVerbs, typ []byte) bool {
 522  	for i := 0; i < len(okVerbs); {
 523  		v, w := utf8.DecodeRune(okVerbs[i:])
 524  		if v == verb {
 525  			return true
 526  		}
 527  		i += w
 528  	}
 529  	var vbuf [utf8.UTFMax]byte
 530  	vn := utf8.EncodeRune(vbuf[:], verb)
 531  	s.errorString("bad verb '%" + vbuf[:vn] + "' for " + typ)
 532  	return false
 533  }
 534  
 535  // scanBool returns the value of the boolean represented by the next token.
 536  func (s *ss) scanBool(verb rune) bool {
 537  	s.SkipSpace()
 538  	s.notEOF()
 539  	if !s.okVerb(verb, "tv", "boolean") {
 540  		return false
 541  	}
 542  	// Syntax-checking a boolean is annoying. We're not fastidious about case.
 543  	switch s.getRune() {
 544  	case '0':
 545  		return false
 546  	case '1':
 547  		return true
 548  	case 't', 'T':
 549  		if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
 550  			s.error(errBool)
 551  		}
 552  		return true
 553  	case 'f', 'F':
 554  		if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
 555  			s.error(errBool)
 556  		}
 557  		return false
 558  	}
 559  	return false
 560  }
 561  
 562  // Numerical elements
 563  const (
 564  	binaryDigits      = "01"
 565  	octalDigits       = "01234567"
 566  	decimalDigits     = "0123456789"
 567  	hexadecimalDigits = "0123456789aAbBcCdDeEfF"
 568  	sign              = "+-"
 569  	period            = "."
 570  	exponent          = "eEpP"
 571  )
 572  
 573  // getBase returns the numeric base represented by the verb and its digit string.
 574  func (s *ss) getBase(verb rune) (base int, digits []byte) {
 575  	s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
 576  	base = 10
 577  	digits = decimalDigits
 578  	switch verb {
 579  	case 'b':
 580  		base = 2
 581  		digits = binaryDigits
 582  	case 'o':
 583  		base = 8
 584  		digits = octalDigits
 585  	case 'x', 'X', 'U':
 586  		base = 16
 587  		digits = hexadecimalDigits
 588  	}
 589  	return
 590  }
 591  
 592  // scanNumber returns the numerical string with specified digits starting here.
 593  func (s *ss) scanNumber(digits []byte, haveDigits bool) []byte {
 594  	if !haveDigits {
 595  		s.notEOF()
 596  		if !s.accept(digits) {
 597  			s.errorString("expected integer")
 598  		}
 599  	}
 600  	for s.accept(digits) {
 601  	}
 602  	return []byte(s.buf)
 603  }
 604  
 605  // scanRune returns the next rune value in the input.
 606  func (s *ss) scanRune(bitSize int) int64 {
 607  	s.notEOF()
 608  	r := s.getRune()
 609  	n := uint(bitSize)
 610  	x := (int64(r) << (64 - n)) >> (64 - n)
 611  	if x != int64(r) {
 612  		var rbuf [utf8.UTFMax]byte
 613  	rn := utf8.EncodeRune(rbuf[:], rune(r))
 614  	s.errorString("overflow on character value " + rbuf[:rn])
 615  	}
 616  	return int64(r)
 617  }
 618  
 619  // scanBasePrefix reports whether the integer begins with a base prefix
 620  // and returns the base, digit string, and whether a zero was found.
 621  // It is called only if the verb is %v.
 622  func (s *ss) scanBasePrefix() (base int, digits []byte, zeroFound bool) {
 623  	if !s.peek("0") {
 624  		return 0, decimalDigits + "_", false
 625  	}
 626  	s.accept("0")
 627  	// Special cases for 0, 0b, 0o, 0x.
 628  	switch {
 629  	case s.peek("bB"):
 630  		s.consume("bB", true)
 631  		return 0, binaryDigits + "_", true
 632  	case s.peek("oO"):
 633  		s.consume("oO", true)
 634  		return 0, octalDigits + "_", true
 635  	case s.peek("xX"):
 636  		s.consume("xX", true)
 637  		return 0, hexadecimalDigits + "_", true
 638  	default:
 639  		return 0, octalDigits + "_", true
 640  	}
 641  }
 642  
 643  // scanInt returns the value of the integer represented by the next
 644  // token, checking for overflow. Any error is stored in s.err.
 645  func (s *ss) scanInt(verb rune, bitSize int) int64 {
 646  	if verb == 'c' {
 647  		return s.scanRune(bitSize)
 648  	}
 649  	s.SkipSpace()
 650  	s.notEOF()
 651  	base, digits := s.getBase(verb)
 652  	haveDigits := false
 653  	if verb == 'U' {
 654  		if !s.consume("U", false) || !s.consume("+", false) {
 655  			s.errorString("bad unicode format ")
 656  		}
 657  	} else {
 658  		s.accept(sign) // If there's a sign, it will be left in the token buffer.
 659  		if verb == 'v' {
 660  			base, digits, haveDigits = s.scanBasePrefix()
 661  		}
 662  	}
 663  	tok := s.scanNumber(digits, haveDigits)
 664  	i, err := strconv.ParseInt(tok, base, 64)
 665  	if err != nil {
 666  		s.error(err)
 667  	}
 668  	n := uint(bitSize)
 669  	x := (i << (64 - n)) >> (64 - n)
 670  	if x != i {
 671  		s.errorString("integer overflow on token " + tok)
 672  	}
 673  	return i
 674  }
 675  
 676  // scanUint returns the value of the unsigned integer represented
 677  // by the next token, checking for overflow. Any error is stored in s.err.
 678  func (s *ss) scanUint(verb rune, bitSize int) uint64 {
 679  	if verb == 'c' {
 680  		return uint64(s.scanRune(bitSize))
 681  	}
 682  	s.SkipSpace()
 683  	s.notEOF()
 684  	base, digits := s.getBase(verb)
 685  	haveDigits := false
 686  	if verb == 'U' {
 687  		if !s.consume("U", false) || !s.consume("+", false) {
 688  			s.errorString("bad unicode format ")
 689  		}
 690  	} else if verb == 'v' {
 691  		base, digits, haveDigits = s.scanBasePrefix()
 692  	}
 693  	tok := s.scanNumber(digits, haveDigits)
 694  	i, err := strconv.ParseUint(tok, base, 64)
 695  	if err != nil {
 696  		s.error(err)
 697  	}
 698  	n := uint(bitSize)
 699  	x := (i << (64 - n)) >> (64 - n)
 700  	if x != i {
 701  		s.errorString("unsigned integer overflow on token " + tok)
 702  	}
 703  	return i
 704  }
 705  
 706  // floatToken returns the floating-point number starting here, no longer than swid
 707  // if the width is specified. It's not rigorous about syntax because it doesn't check that
 708  // we have at least some digits, but Atof will do that.
 709  func (s *ss) floatToken() []byte {
 710  	s.buf = s.buf[:0]
 711  	// NaN?
 712  	if s.accept("nN") && s.accept("aA") && s.accept("nN") {
 713  		return []byte(s.buf)
 714  	}
 715  	// leading sign?
 716  	s.accept(sign)
 717  	// Inf?
 718  	if s.accept("iI") && s.accept("nN") && s.accept("fF") {
 719  		return []byte(s.buf)
 720  	}
 721  	digits := decimalDigits + "_"
 722  	exp := exponent
 723  	if s.accept("0") && s.accept("xX") {
 724  		digits = hexadecimalDigits + "_"
 725  		exp = "pP"
 726  	}
 727  	// digits?
 728  	for s.accept(digits) {
 729  	}
 730  	// decimal point?
 731  	if s.accept(period) {
 732  		// fraction?
 733  		for s.accept(digits) {
 734  		}
 735  	}
 736  	// exponent?
 737  	if s.accept(exp) {
 738  		// leading sign?
 739  		s.accept(sign)
 740  		// digits?
 741  		for s.accept(decimalDigits + "_") {
 742  		}
 743  	}
 744  	return []byte(s.buf)
 745  }
 746  
 747  // complexTokens returns the real and imaginary parts of the complex number starting here.
 748  // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
 749  // number and there are no spaces within.
 750  func (s *ss) complexTokens() (real, imag []byte) {
 751  	// TODO: accept N and Ni independently?
 752  	parens := s.accept("(")
 753  	real = s.floatToken()
 754  	s.buf = s.buf[:0]
 755  	// Must now have a sign.
 756  	if !s.accept("+-") {
 757  		s.error(errComplex)
 758  	}
 759  	// Sign is now in buffer
 760  	imagSign := []byte(s.buf)
 761  	imag = s.floatToken()
 762  	if !s.accept("i") {
 763  		s.error(errComplex)
 764  	}
 765  	if parens && !s.accept(")") {
 766  		s.error(errComplex)
 767  	}
 768  	return real, imagSign + imag
 769  }
 770  
 771  func hasX(s []byte) bool {
 772  	for i := 0; i < len(s); i++ {
 773  		if s[i] == 'x' || s[i] == 'X' {
 774  			return true
 775  		}
 776  	}
 777  	return false
 778  }
 779  
 780  // convertFloat converts the string to a float64value.
 781  func (s *ss) convertFloat(str []byte, n int) float64 {
 782  	// strconv.ParseFloat will handle "+0x1.fp+2",
 783  	// but we have to implement our non-standard
 784  	// decimal+binary exponent mix (1.2p4) ourselves.
 785  	if p := indexRune(str, 'p'); p >= 0 && !hasX(str) {
 786  		// Atof doesn't handle power-of-2 exponents,
 787  		// but they're easy to evaluate.
 788  		f, err := strconv.ParseFloat(str[:p], n)
 789  		if err != nil {
 790  			// Put full string into error.
 791  			if e, ok := err.(*strconv.NumError); ok {
 792  				e.Num = str
 793  			}
 794  			s.error(err)
 795  		}
 796  		m, err := strconv.Atoi(str[p+1:])
 797  		if err != nil {
 798  			// Put full string into error.
 799  			if e, ok := err.(*strconv.NumError); ok {
 800  				e.Num = str
 801  			}
 802  			s.error(err)
 803  		}
 804  		return math.Ldexp(f, m)
 805  	}
 806  	f, err := strconv.ParseFloat(str, n)
 807  	if err != nil {
 808  		s.error(err)
 809  	}
 810  	return f
 811  }
 812  
 813  // scanComplex converts the next token to a complex128 value.
 814  // The atof argument is a type-specific reader for the underlying type.
 815  // If we're reading complex64, atof will parse float32s and convert them
 816  // to float64's to avoid reproducing this code for each complex type.
 817  func (s *ss) scanComplex(verb rune, n int) (float64, float64) {
 818  	panic("moxie: complex numbers not supported")
 819  }
 820  
 821  // convertString returns the string represented by the next input characters.
 822  // The format of the input is determined by the verb.
 823  func (s *ss) convertString(verb rune) (str []byte) {
 824  	if !s.okVerb(verb, "svqxX", "string") {
 825  		return ""
 826  	}
 827  	s.SkipSpace()
 828  	s.notEOF()
 829  	switch verb {
 830  	case 'q':
 831  		str = s.quotedString()
 832  	case 'x', 'X':
 833  		str = s.hexString()
 834  	default:
 835  		str = []byte(s.token(true, notSpace)) // %s and %v just return the next word
 836  	}
 837  	return
 838  }
 839  
 840  // quotedString returns the double- or back-quoted string represented by the next input characters.
 841  func (s *ss) quotedString() []byte {
 842  	s.notEOF()
 843  	quote := s.getRune()
 844  	switch quote {
 845  	case '`':
 846  		// Back-quoted: Anything goes until EOF or back quote.
 847  		for {
 848  			r := s.mustReadRune()
 849  			if r == quote {
 850  				break
 851  			}
 852  			s.buf.writeRune(r)
 853  		}
 854  		return []byte(s.buf)
 855  	case '"':
 856  		// Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
 857  		s.buf.writeByte('"')
 858  		for {
 859  			r := s.mustReadRune()
 860  			s.buf.writeRune(r)
 861  			if r == '\\' {
 862  				// In a legal backslash escape, no matter how long, only the character
 863  				// immediately after the escape can itself be a backslash or quote.
 864  				// Thus we only need to protect the first character after the backslash.
 865  				s.buf.writeRune(s.mustReadRune())
 866  			} else if r == '"' {
 867  				break
 868  			}
 869  		}
 870  		result, err := strconv.Unquote([]byte(s.buf))
 871  		if err != nil {
 872  			s.error(err)
 873  		}
 874  		return result
 875  	default:
 876  		s.errorString("expected quoted string")
 877  	}
 878  	return ""
 879  }
 880  
 881  // hexDigit returns the value of the hexadecimal digit.
 882  func hexDigit(d rune) (int, bool) {
 883  	digit := int(d)
 884  	switch digit {
 885  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 886  		return digit - '0', true
 887  	case 'a', 'b', 'c', 'd', 'e', 'f':
 888  		return 10 + digit - 'a', true
 889  	case 'A', 'B', 'C', 'D', 'E', 'F':
 890  		return 10 + digit - 'A', true
 891  	}
 892  	return -1, false
 893  }
 894  
 895  // hexByte returns the next hex-encoded (two-character) byte from the input.
 896  // It returns ok==false if the next bytes in the input do not encode a hex byte.
 897  // If the first byte is hex and the second is not, processing stops.
 898  func (s *ss) hexByte() (b byte, ok bool) {
 899  	rune1 := s.getRune()
 900  	if rune1 == eof {
 901  		return
 902  	}
 903  	value1, ok := hexDigit(rune1)
 904  	if !ok {
 905  		s.UnreadRune()
 906  		return
 907  	}
 908  	value2, ok := hexDigit(s.mustReadRune())
 909  	if !ok {
 910  		s.errorString("illegal hex digit")
 911  		return
 912  	}
 913  	return byte(value1<<4 | value2), true
 914  }
 915  
 916  // hexString returns the space-delimited hexpair-encoded string.
 917  func (s *ss) hexString() []byte {
 918  	s.notEOF()
 919  	for {
 920  		b, ok := s.hexByte()
 921  		if !ok {
 922  			break
 923  		}
 924  		s.buf.writeByte(b)
 925  	}
 926  	if len(s.buf) == 0 {
 927  		s.errorString("no hex data for %x string")
 928  		return ""
 929  	}
 930  	return []byte(s.buf)
 931  }
 932  
 933  const (
 934  	floatVerbs = "beEfFgGv"
 935  
 936  	hugeWid = 1 << 30
 937  
 938  	intBits     = 32 << (^uint(0) >> 63)
 939  	uintptrBits = 32 << (^uintptr(0) >> 63)
 940  )
 941  
 942  // scanPercent scans a literal percent character.
 943  func (s *ss) scanPercent() {
 944  	s.SkipSpace()
 945  	s.notEOF()
 946  	if !s.accept("%") {
 947  		s.errorString("missing literal %")
 948  	}
 949  }
 950  
 951  // scanOne scans a single value, deriving the scanner from the type of the argument.
 952  func (s *ss) scanOne(verb rune, arg any) {
 953  	s.buf = s.buf[:0]
 954  	var err error
 955  	// If the parameter has its own Scan method, use that.
 956  	if v, ok := arg.(Scanner); ok {
 957  		err = v.Scan(s, verb)
 958  		if err != nil {
 959  			if err == io.EOF {
 960  				err = io.ErrUnexpectedEOF
 961  			}
 962  			s.error(err)
 963  		}
 964  		return
 965  	}
 966  
 967  	switch v := arg.(type) {
 968  	case *bool:
 969  		*v = s.scanBool(verb)
 970  	// *complex64, *complex128 cases removed — not supported in moxie
 971  	case *int:
 972  		*v = int(s.scanInt(verb, 32))
 973  	case *int8:
 974  		*v = int8(s.scanInt(verb, 8))
 975  	case *int16:
 976  		*v = int16(s.scanInt(verb, 16))
 977  	case *int64:
 978  		*v = s.scanInt(verb, 64)
 979  	case *uint:
 980  		*v = uint(s.scanUint(verb, 32))
 981  	case *uint8:
 982  		*v = uint8(s.scanUint(verb, 8))
 983  	case *uint16:
 984  		*v = uint16(s.scanUint(verb, 16))
 985  	case *uint64:
 986  		*v = s.scanUint(verb, 64)
 987  	// Floats are tricky because you want to scan in the precision of the result, not
 988  	// scan in high precision and convert, in order to preserve the correct error condition.
 989  	case *float32:
 990  		if s.okVerb(verb, floatVerbs, "float32") {
 991  			s.SkipSpace()
 992  			s.notEOF()
 993  			*v = float32(s.convertFloat(s.floatToken(), 32))
 994  		}
 995  	case *float64:
 996  		if s.okVerb(verb, floatVerbs, "float64") {
 997  			s.SkipSpace()
 998  			s.notEOF()
 999  			*v = s.convertFloat(s.floatToken(), 64)
1000  		}
1001  	case *[]byte:
1002  		// We scan to string and convert so we get a copy of the data.
1003  		// If we scanned to bytes, the slice would point at the buffer.
1004  		*v = []byte(s.convertString(verb))
1005  	default:
1006  		s.errorString("can't scan type: unsupported")
1007  	}
1008  }
1009  
1010  // errorHandler turns local panics into error returns.
1011  func errorHandler(errp *error) {
1012  	if e := recover(); e != nil {
1013  		if se, ok := e.(scanError); ok { // catch local error
1014  			*errp = se.err
1015  		} else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
1016  			*errp = eof
1017  		} else {
1018  			panic(e)
1019  		}
1020  	}
1021  }
1022  
1023  // doScan does the real work for scanning without a format string.
1024  func (s *ss) doScan(a []any) (numProcessed int, err error) {
1025  	defer errorHandler(&err)
1026  	for _, arg := range a {
1027  		s.scanOne('v', arg)
1028  		numProcessed++
1029  	}
1030  	// Check for newline (or EOF) if required (Scanln etc.).
1031  	if s.nlIsEnd {
1032  		for {
1033  			r := s.getRune()
1034  			if r == '\n' || r == eof {
1035  				break
1036  			}
1037  			if !isSpace(r) {
1038  				s.errorString("expected newline")
1039  				break
1040  			}
1041  		}
1042  	}
1043  	return
1044  }
1045  
1046  // advance determines whether the next characters in the input match
1047  // those of the format. It returns the number of bytes (sic) consumed
1048  // in the format. All runs of space characters in either input or
1049  // format behave as a single space. Newlines are special, though:
1050  // newlines in the format must match those in the input and vice versa.
1051  // This routine also handles the %% case. If the return value is zero,
1052  // either format starts with a % (with no following %) or the input
1053  // is empty. If it is negative, the input did not match the string.
1054  func (s *ss) advance(format []byte) (i int) {
1055  	for i < len(format) {
1056  		fmtc, w := utf8.DecodeRuneInString(format[i:])
1057  
1058  		// Space processing.
1059  		// In the rest of this comment "space" means spaces other than newline.
1060  		// Newline in the format matches input of zero or more spaces and then newline or end-of-input.
1061  		// Spaces in the format before the newline are collapsed into the newline.
1062  		// Spaces in the format after the newline match zero or more spaces after the corresponding input newline.
1063  		// Other spaces in the format match input of one or more spaces or end-of-input.
1064  		if isSpace(fmtc) {
1065  			newlines := 0
1066  			trailingSpace := false
1067  			for isSpace(fmtc) && i < len(format) {
1068  				if fmtc == '\n' {
1069  					newlines++
1070  					trailingSpace = false
1071  				} else {
1072  					trailingSpace = true
1073  				}
1074  				i += w
1075  				fmtc, w = utf8.DecodeRuneInString(format[i:])
1076  			}
1077  			for j := 0; j < newlines; j++ {
1078  				inputc := s.getRune()
1079  				for isSpace(inputc) && inputc != '\n' {
1080  					inputc = s.getRune()
1081  				}
1082  				if inputc != '\n' && inputc != eof {
1083  					s.errorString("newline in format does not match input")
1084  				}
1085  			}
1086  			if trailingSpace {
1087  				inputc := s.getRune()
1088  				if newlines == 0 {
1089  					// If the trailing space stood alone (did not follow a newline),
1090  					// it must find at least one space to consume.
1091  					if !isSpace(inputc) && inputc != eof {
1092  						s.errorString("expected space in input to match format")
1093  					}
1094  					if inputc == '\n' {
1095  						s.errorString("newline in input does not match format")
1096  					}
1097  				}
1098  				for isSpace(inputc) && inputc != '\n' {
1099  					inputc = s.getRune()
1100  				}
1101  				if inputc != eof {
1102  					s.UnreadRune()
1103  				}
1104  			}
1105  			continue
1106  		}
1107  
1108  		// Verbs.
1109  		if fmtc == '%' {
1110  			// % at end of string is an error.
1111  			if i+w == len(format) {
1112  				s.errorString("missing verb: % at end of format string")
1113  			}
1114  			// %% acts like a real percent
1115  			nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
1116  			if nextc != '%' {
1117  				return
1118  			}
1119  			i += w // skip the first %
1120  		}
1121  
1122  		// Literals.
1123  		inputc := s.mustReadRune()
1124  		if fmtc != inputc {
1125  			s.UnreadRune()
1126  			return -1
1127  		}
1128  		i += w
1129  	}
1130  	return
1131  }
1132  
1133  // doScanf does the real work when scanning with a format string.
1134  // At the moment, it handles only pointers to basic types.
1135  func (s *ss) doScanf(format []byte, a []any) (numProcessed int, err error) {
1136  	defer errorHandler(&err)
1137  	end := len(format) - 1
1138  	// We process one item per non-trivial format
1139  	for i := 0; i <= end; {
1140  		w := s.advance(format[i:])
1141  		if w > 0 {
1142  			i += w
1143  			continue
1144  		}
1145  		// Either we failed to advance, we have a percent character, or we ran out of input.
1146  		if format[i] != '%' {
1147  			// Can't advance format. Why not?
1148  			if w < 0 {
1149  				s.errorString("input does not match format")
1150  			}
1151  			// Otherwise at EOF; "too many operands" error handled below
1152  			break
1153  		}
1154  		i++ // % is one byte
1155  
1156  		// do we have 20 (width)?
1157  		var widPresent bool
1158  		s.maxWid, widPresent, i = parsenum(format, i, end)
1159  		if !widPresent {
1160  			s.maxWid = hugeWid
1161  		}
1162  
1163  		c, w := utf8.DecodeRuneInString(format[i:])
1164  		i += w
1165  
1166  		if c != 'c' {
1167  			s.SkipSpace()
1168  		}
1169  		if c == '%' {
1170  			s.scanPercent()
1171  			continue // Do not consume an argument.
1172  		}
1173  		s.argLimit = s.limit
1174  		if f := s.count + s.maxWid; f < s.argLimit {
1175  			s.argLimit = f
1176  		}
1177  
1178  		if numProcessed >= len(a) { // out of operands
1179  			s.errorString("too few operands for format '%" + format[i-w:] + "'")
1180  			break
1181  		}
1182  		arg := a[numProcessed]
1183  
1184  		s.scanOne(c, arg)
1185  		numProcessed++
1186  		s.argLimit = s.limit
1187  	}
1188  	if numProcessed < len(a) {
1189  		s.errorString("too many operands")
1190  	}
1191  	return
1192  }
1193