parser.go raw

   1  package unstable
   2  
   3  import (
   4  	"bytes"
   5  	"fmt"
   6  	"unicode"
   7  
   8  	"github.com/pelletier/go-toml/v2/internal/characters"
   9  	"github.com/pelletier/go-toml/v2/internal/danger"
  10  )
  11  
  12  // ParserError describes an error relative to the content of the document.
  13  //
  14  // It cannot outlive the instance of Parser it refers to, and may cause panics
  15  // if the parser is reset.
  16  type ParserError struct {
  17  	Highlight []byte
  18  	Message   string
  19  	Key       []string // optional
  20  }
  21  
  22  // Error is the implementation of the error interface.
  23  func (e *ParserError) Error() string {
  24  	return e.Message
  25  }
  26  
  27  // NewParserError is a convenience function to create a ParserError
  28  //
  29  // Warning: Highlight needs to be a subslice of Parser.data, so only slices
  30  // returned by Parser.Raw are valid candidates.
  31  func NewParserError(highlight []byte, format string, args ...interface{}) error {
  32  	return &ParserError{
  33  		Highlight: highlight,
  34  		Message:   fmt.Errorf(format, args...).Error(),
  35  	}
  36  }
  37  
  38  // Parser scans over a TOML-encoded document and generates an iterative AST.
  39  //
  40  // To prime the Parser, first reset it with the contents of a TOML document.
  41  // Then, process all top-level expressions sequentially. See Example.
  42  //
  43  // Don't forget to check Error() after you're done parsing.
  44  //
  45  // Each top-level expression needs to be fully processed before calling
  46  // NextExpression() again. Otherwise, calls to various Node methods may panic if
  47  // the parser has moved on the next expression.
  48  //
  49  // For performance reasons, go-toml doesn't make a copy of the input bytes to
  50  // the parser. Make sure to copy all the bytes you need to outlive the slice
  51  // given to the parser.
  52  type Parser struct {
  53  	data    []byte
  54  	builder builder
  55  	ref     reference
  56  	left    []byte
  57  	err     error
  58  	first   bool
  59  
  60  	KeepComments bool
  61  }
  62  
  63  // Data returns the slice provided to the last call to Reset.
  64  func (p *Parser) Data() []byte {
  65  	return p.data
  66  }
  67  
  68  // Range returns a range description that corresponds to a given slice of the
  69  // input. If the argument is not a subslice of the parser input, this function
  70  // panics.
  71  func (p *Parser) Range(b []byte) Range {
  72  	return Range{
  73  		Offset: uint32(danger.SubsliceOffset(p.data, b)),
  74  		Length: uint32(len(b)),
  75  	}
  76  }
  77  
  78  // Raw returns the slice corresponding to the bytes in the given range.
  79  func (p *Parser) Raw(raw Range) []byte {
  80  	return p.data[raw.Offset : raw.Offset+raw.Length]
  81  }
  82  
  83  // Reset brings the parser to its initial state for a given input. It wipes an
  84  // reuses internal storage to reduce allocation.
  85  func (p *Parser) Reset(b []byte) {
  86  	p.builder.Reset()
  87  	p.ref = invalidReference
  88  	p.data = b
  89  	p.left = b
  90  	p.err = nil
  91  	p.first = true
  92  }
  93  
  94  // NextExpression parses the next top-level expression. If an expression was
  95  // successfully parsed, it returns true. If the parser is at the end of the
  96  // document or an error occurred, it returns false.
  97  //
  98  // Retrieve the parsed expression with Expression().
  99  func (p *Parser) NextExpression() bool {
 100  	if len(p.left) == 0 || p.err != nil {
 101  		return false
 102  	}
 103  
 104  	p.builder.Reset()
 105  	p.ref = invalidReference
 106  
 107  	for {
 108  		if len(p.left) == 0 || p.err != nil {
 109  			return false
 110  		}
 111  
 112  		if !p.first {
 113  			p.left, p.err = p.parseNewline(p.left)
 114  		}
 115  
 116  		if len(p.left) == 0 || p.err != nil {
 117  			return false
 118  		}
 119  
 120  		p.ref, p.left, p.err = p.parseExpression(p.left)
 121  
 122  		if p.err != nil {
 123  			return false
 124  		}
 125  
 126  		p.first = false
 127  
 128  		if p.ref.Valid() {
 129  			return true
 130  		}
 131  	}
 132  }
 133  
 134  // Expression returns a pointer to the node representing the last successfully
 135  // parsed expression.
 136  func (p *Parser) Expression() *Node {
 137  	return p.builder.NodeAt(p.ref)
 138  }
 139  
 140  // Error returns any error that has occurred during parsing.
 141  func (p *Parser) Error() error {
 142  	return p.err
 143  }
 144  
 145  // Position describes a position in the input.
 146  type Position struct {
 147  	// Number of bytes from the beginning of the input.
 148  	Offset int
 149  	// Line number, starting at 1.
 150  	Line int
 151  	// Column number, starting at 1.
 152  	Column int
 153  }
 154  
 155  // Shape describes the position of a range in the input.
 156  type Shape struct {
 157  	Start Position
 158  	End   Position
 159  }
 160  
 161  func (p *Parser) position(b []byte) Position {
 162  	offset := danger.SubsliceOffset(p.data, b)
 163  
 164  	lead := p.data[:offset]
 165  
 166  	return Position{
 167  		Offset: offset,
 168  		Line:   bytes.Count(lead, []byte{'\n'}) + 1,
 169  		Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
 170  	}
 171  }
 172  
 173  // Shape returns the shape of the given range in the input.  Will
 174  // panic if the range is not a subslice of the input.
 175  func (p *Parser) Shape(r Range) Shape {
 176  	raw := p.Raw(r)
 177  	return Shape{
 178  		Start: p.position(raw),
 179  		End:   p.position(raw[r.Length:]),
 180  	}
 181  }
 182  
 183  func (p *Parser) parseNewline(b []byte) ([]byte, error) {
 184  	if b[0] == '\n' {
 185  		return b[1:], nil
 186  	}
 187  
 188  	if b[0] == '\r' {
 189  		_, rest, err := scanWindowsNewline(b)
 190  		return rest, err
 191  	}
 192  
 193  	return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
 194  }
 195  
 196  func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
 197  	ref := invalidReference
 198  	data, rest, err := scanComment(b)
 199  	if p.KeepComments && err == nil {
 200  		ref = p.builder.Push(Node{
 201  			Kind: Comment,
 202  			Raw:  p.Range(data),
 203  			Data: data,
 204  		})
 205  	}
 206  	return ref, rest, err
 207  }
 208  
 209  func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
 210  	// expression =  ws [ comment ]
 211  	// expression =/ ws keyval ws [ comment ]
 212  	// expression =/ ws table ws [ comment ]
 213  	ref := invalidReference
 214  
 215  	b = p.parseWhitespace(b)
 216  
 217  	if len(b) == 0 {
 218  		return ref, b, nil
 219  	}
 220  
 221  	if b[0] == '#' {
 222  		ref, rest, err := p.parseComment(b)
 223  		return ref, rest, err
 224  	}
 225  
 226  	if b[0] == '\n' || b[0] == '\r' {
 227  		return ref, b, nil
 228  	}
 229  
 230  	var err error
 231  	if b[0] == '[' {
 232  		ref, b, err = p.parseTable(b)
 233  	} else {
 234  		ref, b, err = p.parseKeyval(b)
 235  	}
 236  
 237  	if err != nil {
 238  		return ref, nil, err
 239  	}
 240  
 241  	b = p.parseWhitespace(b)
 242  
 243  	if len(b) > 0 && b[0] == '#' {
 244  		cref, rest, err := p.parseComment(b)
 245  		if cref != invalidReference {
 246  			p.builder.Chain(ref, cref)
 247  		}
 248  		return ref, rest, err
 249  	}
 250  
 251  	return ref, b, nil
 252  }
 253  
 254  func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
 255  	// table = std-table / array-table
 256  	if len(b) > 1 && b[1] == '[' {
 257  		return p.parseArrayTable(b)
 258  	}
 259  
 260  	return p.parseStdTable(b)
 261  }
 262  
 263  func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
 264  	// array-table = array-table-open key array-table-close
 265  	// array-table-open  = %x5B.5B ws  ; [[ Double left square bracket
 266  	// array-table-close = ws %x5D.5D  ; ]] Double right square bracket
 267  	ref := p.builder.Push(Node{
 268  		Kind: ArrayTable,
 269  	})
 270  
 271  	b = b[2:]
 272  	b = p.parseWhitespace(b)
 273  
 274  	k, b, err := p.parseKey(b)
 275  	if err != nil {
 276  		return ref, nil, err
 277  	}
 278  
 279  	p.builder.AttachChild(ref, k)
 280  	b = p.parseWhitespace(b)
 281  
 282  	b, err = expect(']', b)
 283  	if err != nil {
 284  		return ref, nil, err
 285  	}
 286  
 287  	b, err = expect(']', b)
 288  
 289  	return ref, b, err
 290  }
 291  
 292  func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
 293  	// std-table = std-table-open key std-table-close
 294  	// std-table-open  = %x5B ws     ; [ Left square bracket
 295  	// std-table-close = ws %x5D     ; ] Right square bracket
 296  	ref := p.builder.Push(Node{
 297  		Kind: Table,
 298  	})
 299  
 300  	b = b[1:]
 301  	b = p.parseWhitespace(b)
 302  
 303  	key, b, err := p.parseKey(b)
 304  	if err != nil {
 305  		return ref, nil, err
 306  	}
 307  
 308  	p.builder.AttachChild(ref, key)
 309  
 310  	b = p.parseWhitespace(b)
 311  
 312  	b, err = expect(']', b)
 313  
 314  	return ref, b, err
 315  }
 316  
 317  func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
 318  	// keyval = key keyval-sep val
 319  	ref := p.builder.Push(Node{
 320  		Kind: KeyValue,
 321  	})
 322  
 323  	key, b, err := p.parseKey(b)
 324  	if err != nil {
 325  		return invalidReference, nil, err
 326  	}
 327  
 328  	// keyval-sep = ws %x3D ws ; =
 329  
 330  	b = p.parseWhitespace(b)
 331  
 332  	if len(b) == 0 {
 333  		return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
 334  	}
 335  
 336  	b, err = expect('=', b)
 337  	if err != nil {
 338  		return invalidReference, nil, err
 339  	}
 340  
 341  	b = p.parseWhitespace(b)
 342  
 343  	valRef, b, err := p.parseVal(b)
 344  	if err != nil {
 345  		return ref, b, err
 346  	}
 347  
 348  	p.builder.Chain(valRef, key)
 349  	p.builder.AttachChild(ref, valRef)
 350  
 351  	return ref, b, err
 352  }
 353  
 354  //nolint:cyclop,funlen
 355  func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
 356  	// val = string / boolean / array / inline-table / date-time / float / integer
 357  	ref := invalidReference
 358  
 359  	if len(b) == 0 {
 360  		return ref, nil, NewParserError(b, "expected value, not eof")
 361  	}
 362  
 363  	var err error
 364  	c := b[0]
 365  
 366  	switch c {
 367  	case '"':
 368  		var raw []byte
 369  		var v []byte
 370  		if scanFollowsMultilineBasicStringDelimiter(b) {
 371  			raw, v, b, err = p.parseMultilineBasicString(b)
 372  		} else {
 373  			raw, v, b, err = p.parseBasicString(b)
 374  		}
 375  
 376  		if err == nil {
 377  			ref = p.builder.Push(Node{
 378  				Kind: String,
 379  				Raw:  p.Range(raw),
 380  				Data: v,
 381  			})
 382  		}
 383  
 384  		return ref, b, err
 385  	case '\'':
 386  		var raw []byte
 387  		var v []byte
 388  		if scanFollowsMultilineLiteralStringDelimiter(b) {
 389  			raw, v, b, err = p.parseMultilineLiteralString(b)
 390  		} else {
 391  			raw, v, b, err = p.parseLiteralString(b)
 392  		}
 393  
 394  		if err == nil {
 395  			ref = p.builder.Push(Node{
 396  				Kind: String,
 397  				Raw:  p.Range(raw),
 398  				Data: v,
 399  			})
 400  		}
 401  
 402  		return ref, b, err
 403  	case 't':
 404  		if !scanFollowsTrue(b) {
 405  			return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
 406  		}
 407  
 408  		ref = p.builder.Push(Node{
 409  			Kind: Bool,
 410  			Data: b[:4],
 411  		})
 412  
 413  		return ref, b[4:], nil
 414  	case 'f':
 415  		if !scanFollowsFalse(b) {
 416  			return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
 417  		}
 418  
 419  		ref = p.builder.Push(Node{
 420  			Kind: Bool,
 421  			Data: b[:5],
 422  		})
 423  
 424  		return ref, b[5:], nil
 425  	case '[':
 426  		return p.parseValArray(b)
 427  	case '{':
 428  		return p.parseInlineTable(b)
 429  	default:
 430  		return p.parseIntOrFloatOrDateTime(b)
 431  	}
 432  }
 433  
 434  func atmost(b []byte, n int) []byte {
 435  	if n >= len(b) {
 436  		return b
 437  	}
 438  
 439  	return b[:n]
 440  }
 441  
 442  func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
 443  	v, rest, err := scanLiteralString(b)
 444  	if err != nil {
 445  		return nil, nil, nil, err
 446  	}
 447  
 448  	return v, v[1 : len(v)-1], rest, nil
 449  }
 450  
 451  func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
 452  	// inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
 453  	// inline-table-open  = %x7B ws     ; {
 454  	// inline-table-close = ws %x7D     ; }
 455  	// inline-table-sep   = ws %x2C ws  ; , Comma
 456  	// inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
 457  	parent := p.builder.Push(Node{
 458  		Kind: InlineTable,
 459  		Raw:  p.Range(b[:1]),
 460  	})
 461  
 462  	first := true
 463  
 464  	var child reference
 465  
 466  	b = b[1:]
 467  
 468  	var err error
 469  
 470  	for len(b) > 0 {
 471  		previousB := b
 472  		b = p.parseWhitespace(b)
 473  
 474  		if len(b) == 0 {
 475  			return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
 476  		}
 477  
 478  		if b[0] == '}' {
 479  			break
 480  		}
 481  
 482  		if !first {
 483  			b, err = expect(',', b)
 484  			if err != nil {
 485  				return parent, nil, err
 486  			}
 487  			b = p.parseWhitespace(b)
 488  		}
 489  
 490  		var kv reference
 491  
 492  		kv, b, err = p.parseKeyval(b)
 493  		if err != nil {
 494  			return parent, nil, err
 495  		}
 496  
 497  		if first {
 498  			p.builder.AttachChild(parent, kv)
 499  		} else {
 500  			p.builder.Chain(child, kv)
 501  		}
 502  		child = kv
 503  
 504  		first = false
 505  	}
 506  
 507  	rest, err := expect('}', b)
 508  
 509  	return parent, rest, err
 510  }
 511  
 512  //nolint:funlen,cyclop
 513  func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
 514  	// array = array-open [ array-values ] ws-comment-newline array-close
 515  	// array-open =  %x5B ; [
 516  	// array-close = %x5D ; ]
 517  	// array-values =  ws-comment-newline val ws-comment-newline array-sep array-values
 518  	// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
 519  	// array-sep = %x2C  ; , Comma
 520  	// ws-comment-newline = *( wschar / [ comment ] newline )
 521  	arrayStart := b
 522  	b = b[1:]
 523  
 524  	parent := p.builder.Push(Node{
 525  		Kind: Array,
 526  	})
 527  
 528  	// First indicates whether the parser is looking for the first element
 529  	// (non-comment) of the array.
 530  	first := true
 531  
 532  	lastChild := invalidReference
 533  
 534  	addChild := func(valueRef reference) {
 535  		if lastChild == invalidReference {
 536  			p.builder.AttachChild(parent, valueRef)
 537  		} else {
 538  			p.builder.Chain(lastChild, valueRef)
 539  		}
 540  		lastChild = valueRef
 541  	}
 542  
 543  	var err error
 544  	for len(b) > 0 {
 545  		cref := invalidReference
 546  		cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
 547  		if err != nil {
 548  			return parent, nil, err
 549  		}
 550  
 551  		if cref != invalidReference {
 552  			addChild(cref)
 553  		}
 554  
 555  		if len(b) == 0 {
 556  			return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
 557  		}
 558  
 559  		if b[0] == ']' {
 560  			break
 561  		}
 562  
 563  		if b[0] == ',' {
 564  			if first {
 565  				return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
 566  			}
 567  			b = b[1:]
 568  
 569  			cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
 570  			if err != nil {
 571  				return parent, nil, err
 572  			}
 573  			if cref != invalidReference {
 574  				addChild(cref)
 575  			}
 576  		} else if !first {
 577  			return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
 578  		}
 579  
 580  		// TOML allows trailing commas in arrays.
 581  		if len(b) > 0 && b[0] == ']' {
 582  			break
 583  		}
 584  
 585  		var valueRef reference
 586  		valueRef, b, err = p.parseVal(b)
 587  		if err != nil {
 588  			return parent, nil, err
 589  		}
 590  
 591  		addChild(valueRef)
 592  
 593  		cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
 594  		if err != nil {
 595  			return parent, nil, err
 596  		}
 597  		if cref != invalidReference {
 598  			addChild(cref)
 599  		}
 600  
 601  		first = false
 602  	}
 603  
 604  	rest, err := expect(']', b)
 605  
 606  	return parent, rest, err
 607  }
 608  
 609  func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
 610  	rootCommentRef := invalidReference
 611  	latestCommentRef := invalidReference
 612  
 613  	addComment := func(ref reference) {
 614  		if rootCommentRef == invalidReference {
 615  			rootCommentRef = ref
 616  		} else if latestCommentRef == invalidReference {
 617  			p.builder.AttachChild(rootCommentRef, ref)
 618  			latestCommentRef = ref
 619  		} else {
 620  			p.builder.Chain(latestCommentRef, ref)
 621  			latestCommentRef = ref
 622  		}
 623  	}
 624  
 625  	for len(b) > 0 {
 626  		var err error
 627  		b = p.parseWhitespace(b)
 628  
 629  		if len(b) > 0 && b[0] == '#' {
 630  			var ref reference
 631  			ref, b, err = p.parseComment(b)
 632  			if err != nil {
 633  				return invalidReference, nil, err
 634  			}
 635  			if ref != invalidReference {
 636  				addComment(ref)
 637  			}
 638  		}
 639  
 640  		if len(b) == 0 {
 641  			break
 642  		}
 643  
 644  		if b[0] == '\n' || b[0] == '\r' {
 645  			b, err = p.parseNewline(b)
 646  			if err != nil {
 647  				return invalidReference, nil, err
 648  			}
 649  		} else {
 650  			break
 651  		}
 652  	}
 653  
 654  	return rootCommentRef, b, nil
 655  }
 656  
 657  func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
 658  	token, rest, err := scanMultilineLiteralString(b)
 659  	if err != nil {
 660  		return nil, nil, nil, err
 661  	}
 662  
 663  	i := 3
 664  
 665  	// skip the immediate new line
 666  	if token[i] == '\n' {
 667  		i++
 668  	} else if token[i] == '\r' && token[i+1] == '\n' {
 669  		i += 2
 670  	}
 671  
 672  	return token, token[i : len(token)-3], rest, err
 673  }
 674  
 675  //nolint:funlen,gocognit,cyclop
 676  func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
 677  	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
 678  	// ml-basic-string-delim
 679  	// ml-basic-string-delim = 3quotation-mark
 680  	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
 681  	//
 682  	// mlb-content = mlb-char / newline / mlb-escaped-nl
 683  	// mlb-char = mlb-unescaped / escaped
 684  	// mlb-quotes = 1*2quotation-mark
 685  	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
 686  	// mlb-escaped-nl = escape ws newline *( wschar / newline )
 687  	token, escaped, rest, err := scanMultilineBasicString(b)
 688  	if err != nil {
 689  		return nil, nil, nil, err
 690  	}
 691  
 692  	i := 3
 693  
 694  	// skip the immediate new line
 695  	if token[i] == '\n' {
 696  		i++
 697  	} else if token[i] == '\r' && token[i+1] == '\n' {
 698  		i += 2
 699  	}
 700  
 701  	// fast path
 702  	startIdx := i
 703  	endIdx := len(token) - len(`"""`)
 704  
 705  	if !escaped {
 706  		str := token[startIdx:endIdx]
 707  		verr := characters.Utf8TomlValidAlreadyEscaped(str)
 708  		if verr.Zero() {
 709  			return token, str, rest, nil
 710  		}
 711  		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
 712  	}
 713  
 714  	var builder bytes.Buffer
 715  
 716  	// The scanner ensures that the token starts and ends with quotes and that
 717  	// escapes are balanced.
 718  	for i < len(token)-3 {
 719  		c := token[i]
 720  
 721  		//nolint:nestif
 722  		if c == '\\' {
 723  			// When the last non-whitespace character on a line is an unescaped \,
 724  			// it will be trimmed along with all whitespace (including newlines) up
 725  			// to the next non-whitespace character or closing delimiter.
 726  
 727  			isLastNonWhitespaceOnLine := false
 728  			j := 1
 729  		findEOLLoop:
 730  			for ; j < len(token)-3-i; j++ {
 731  				switch token[i+j] {
 732  				case ' ', '\t':
 733  					continue
 734  				case '\r':
 735  					if token[i+j+1] == '\n' {
 736  						continue
 737  					}
 738  				case '\n':
 739  					isLastNonWhitespaceOnLine = true
 740  				}
 741  				break findEOLLoop
 742  			}
 743  			if isLastNonWhitespaceOnLine {
 744  				i += j
 745  				for ; i < len(token)-3; i++ {
 746  					c := token[i]
 747  					if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
 748  						i--
 749  						break
 750  					}
 751  				}
 752  				i++
 753  				continue
 754  			}
 755  
 756  			// handle escaping
 757  			i++
 758  			c = token[i]
 759  
 760  			switch c {
 761  			case '"', '\\':
 762  				builder.WriteByte(c)
 763  			case 'b':
 764  				builder.WriteByte('\b')
 765  			case 'f':
 766  				builder.WriteByte('\f')
 767  			case 'n':
 768  				builder.WriteByte('\n')
 769  			case 'r':
 770  				builder.WriteByte('\r')
 771  			case 't':
 772  				builder.WriteByte('\t')
 773  			case 'e':
 774  				builder.WriteByte(0x1B)
 775  			case 'u':
 776  				x, err := hexToRune(atmost(token[i+1:], 4), 4)
 777  				if err != nil {
 778  					return nil, nil, nil, err
 779  				}
 780  				builder.WriteRune(x)
 781  				i += 4
 782  			case 'U':
 783  				x, err := hexToRune(atmost(token[i+1:], 8), 8)
 784  				if err != nil {
 785  					return nil, nil, nil, err
 786  				}
 787  
 788  				builder.WriteRune(x)
 789  				i += 8
 790  			default:
 791  				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
 792  			}
 793  			i++
 794  		} else {
 795  			size := characters.Utf8ValidNext(token[i:])
 796  			if size == 0 {
 797  				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
 798  			}
 799  			builder.Write(token[i : i+size])
 800  			i += size
 801  		}
 802  	}
 803  
 804  	return token, builder.Bytes(), rest, nil
 805  }
 806  
 807  func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
 808  	// key = simple-key / dotted-key
 809  	// simple-key = quoted-key / unquoted-key
 810  	//
 811  	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
 812  	// quoted-key = basic-string / literal-string
 813  	// dotted-key = simple-key 1*( dot-sep simple-key )
 814  	//
 815  	// dot-sep   = ws %x2E ws  ; . Period
 816  	raw, key, b, err := p.parseSimpleKey(b)
 817  	if err != nil {
 818  		return invalidReference, nil, err
 819  	}
 820  
 821  	ref := p.builder.Push(Node{
 822  		Kind: Key,
 823  		Raw:  p.Range(raw),
 824  		Data: key,
 825  	})
 826  
 827  	for {
 828  		b = p.parseWhitespace(b)
 829  		if len(b) > 0 && b[0] == '.' {
 830  			b = p.parseWhitespace(b[1:])
 831  
 832  			raw, key, b, err = p.parseSimpleKey(b)
 833  			if err != nil {
 834  				return ref, nil, err
 835  			}
 836  
 837  			p.builder.PushAndChain(Node{
 838  				Kind: Key,
 839  				Raw:  p.Range(raw),
 840  				Data: key,
 841  			})
 842  		} else {
 843  			break
 844  		}
 845  	}
 846  
 847  	return ref, b, nil
 848  }
 849  
 850  func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
 851  	if len(b) == 0 {
 852  		return nil, nil, nil, NewParserError(b, "expected key but found none")
 853  	}
 854  
 855  	// simple-key = quoted-key / unquoted-key
 856  	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
 857  	// quoted-key = basic-string / literal-string
 858  	switch {
 859  	case b[0] == '\'':
 860  		return p.parseLiteralString(b)
 861  	case b[0] == '"':
 862  		return p.parseBasicString(b)
 863  	case isUnquotedKeyChar(b[0]):
 864  		key, rest = scanUnquotedKey(b)
 865  		return key, key, rest, nil
 866  	default:
 867  		return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
 868  	}
 869  }
 870  
 871  //nolint:funlen,cyclop
 872  func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
 873  	// basic-string = quotation-mark *basic-char quotation-mark
 874  	// quotation-mark = %x22            ; "
 875  	// basic-char = basic-unescaped / escaped
 876  	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
 877  	// escaped = escape escape-seq-char
 878  	// escape-seq-char =  %x22         ; "    quotation mark  U+0022
 879  	// escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
 880  	// escape-seq-char =/ %x62         ; b    backspace       U+0008
 881  	// escape-seq-char =/ %x66         ; f    form feed       U+000C
 882  	// escape-seq-char =/ %x6E         ; n    line feed       U+000A
 883  	// escape-seq-char =/ %x72         ; r    carriage return U+000D
 884  	// escape-seq-char =/ %x74         ; t    tab             U+0009
 885  	// escape-seq-char =/ %x75 4HEXDIG ; uXXXX                U+XXXX
 886  	// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX            U+XXXXXXXX
 887  	token, escaped, rest, err := scanBasicString(b)
 888  	if err != nil {
 889  		return nil, nil, nil, err
 890  	}
 891  
 892  	startIdx := len(`"`)
 893  	endIdx := len(token) - len(`"`)
 894  
 895  	// Fast path. If there is no escape sequence, the string should just be
 896  	// an UTF-8 encoded string, which is the same as Go. In that case,
 897  	// validate the string and return a direct reference to the buffer.
 898  	if !escaped {
 899  		str := token[startIdx:endIdx]
 900  		verr := characters.Utf8TomlValidAlreadyEscaped(str)
 901  		if verr.Zero() {
 902  			return token, str, rest, nil
 903  		}
 904  		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
 905  	}
 906  
 907  	i := startIdx
 908  
 909  	var builder bytes.Buffer
 910  
 911  	// The scanner ensures that the token starts and ends with quotes and that
 912  	// escapes are balanced.
 913  	for i < len(token)-1 {
 914  		c := token[i]
 915  		if c == '\\' {
 916  			i++
 917  			c = token[i]
 918  
 919  			switch c {
 920  			case '"', '\\':
 921  				builder.WriteByte(c)
 922  			case 'b':
 923  				builder.WriteByte('\b')
 924  			case 'f':
 925  				builder.WriteByte('\f')
 926  			case 'n':
 927  				builder.WriteByte('\n')
 928  			case 'r':
 929  				builder.WriteByte('\r')
 930  			case 't':
 931  				builder.WriteByte('\t')
 932  			case 'e':
 933  				builder.WriteByte(0x1B)
 934  			case 'u':
 935  				x, err := hexToRune(token[i+1:len(token)-1], 4)
 936  				if err != nil {
 937  					return nil, nil, nil, err
 938  				}
 939  
 940  				builder.WriteRune(x)
 941  				i += 4
 942  			case 'U':
 943  				x, err := hexToRune(token[i+1:len(token)-1], 8)
 944  				if err != nil {
 945  					return nil, nil, nil, err
 946  				}
 947  
 948  				builder.WriteRune(x)
 949  				i += 8
 950  			default:
 951  				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
 952  			}
 953  			i++
 954  		} else {
 955  			size := characters.Utf8ValidNext(token[i:])
 956  			if size == 0 {
 957  				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
 958  			}
 959  			builder.Write(token[i : i+size])
 960  			i += size
 961  		}
 962  	}
 963  
 964  	return token, builder.Bytes(), rest, nil
 965  }
 966  
 967  func hexToRune(b []byte, length int) (rune, error) {
 968  	if len(b) < length {
 969  		return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
 970  	}
 971  	b = b[:length]
 972  
 973  	var r uint32
 974  	for i, c := range b {
 975  		d := uint32(0)
 976  		switch {
 977  		case '0' <= c && c <= '9':
 978  			d = uint32(c - '0')
 979  		case 'a' <= c && c <= 'f':
 980  			d = uint32(c - 'a' + 10)
 981  		case 'A' <= c && c <= 'F':
 982  			d = uint32(c - 'A' + 10)
 983  		default:
 984  			return -1, NewParserError(b[i:i+1], "non-hex character")
 985  		}
 986  		r = r*16 + d
 987  	}
 988  
 989  	if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
 990  		return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
 991  	}
 992  
 993  	return rune(r), nil
 994  }
 995  
 996  func (p *Parser) parseWhitespace(b []byte) []byte {
 997  	// ws = *wschar
 998  	// wschar =  %x20  ; Space
 999  	// wschar =/ %x09  ; Horizontal tab
1000  	_, rest := scanWhitespace(b)
1001  
1002  	return rest
1003  }
1004  
1005  //nolint:cyclop
1006  func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
1007  	switch b[0] {
1008  	case 'i':
1009  		if !scanFollowsInf(b) {
1010  			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
1011  		}
1012  
1013  		return p.builder.Push(Node{
1014  			Kind: Float,
1015  			Data: b[:3],
1016  			Raw:  p.Range(b[:3]),
1017  		}), b[3:], nil
1018  	case 'n':
1019  		if !scanFollowsNan(b) {
1020  			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
1021  		}
1022  
1023  		return p.builder.Push(Node{
1024  			Kind: Float,
1025  			Data: b[:3],
1026  			Raw:  p.Range(b[:3]),
1027  		}), b[3:], nil
1028  	case '+', '-':
1029  		return p.scanIntOrFloat(b)
1030  	}
1031  
1032  	if len(b) < 3 {
1033  		return p.scanIntOrFloat(b)
1034  	}
1035  
1036  	s := 5
1037  	if len(b) < s {
1038  		s = len(b)
1039  	}
1040  
1041  	for idx, c := range b[:s] {
1042  		if isDigit(c) {
1043  			continue
1044  		}
1045  
1046  		if idx == 2 && c == ':' || (idx == 4 && c == '-') {
1047  			return p.scanDateTime(b)
1048  		}
1049  
1050  		break
1051  	}
1052  
1053  	return p.scanIntOrFloat(b)
1054  }
1055  
1056  func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
1057  	// scans for contiguous characters in [0-9T:Z.+-], and up to one space if
1058  	// followed by a digit.
1059  	hasDate := false
1060  	hasTime := false
1061  	hasTz := false
1062  	seenSpace := false
1063  
1064  	i := 0
1065  byteLoop:
1066  	for ; i < len(b); i++ {
1067  		c := b[i]
1068  
1069  		switch {
1070  		case isDigit(c):
1071  		case c == '-':
1072  			hasDate = true
1073  			const minOffsetOfTz = 8
1074  			if i >= minOffsetOfTz {
1075  				hasTz = true
1076  			}
1077  		case c == 'T' || c == 't' || c == ':' || c == '.':
1078  			hasTime = true
1079  		case c == '+' || c == '-' || c == 'Z' || c == 'z':
1080  			hasTz = true
1081  		case c == ' ':
1082  			if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
1083  				i += 2
1084  				// Avoid reaching past the end of the document in case the time
1085  				// is malformed. See TestIssue585.
1086  				if i >= len(b) {
1087  					i--
1088  				}
1089  				seenSpace = true
1090  				hasTime = true
1091  			} else {
1092  				break byteLoop
1093  			}
1094  		default:
1095  			break byteLoop
1096  		}
1097  	}
1098  
1099  	var kind Kind
1100  
1101  	if hasTime {
1102  		if hasDate {
1103  			if hasTz {
1104  				kind = DateTime
1105  			} else {
1106  				kind = LocalDateTime
1107  			}
1108  		} else {
1109  			kind = LocalTime
1110  		}
1111  	} else {
1112  		kind = LocalDate
1113  	}
1114  
1115  	return p.builder.Push(Node{
1116  		Kind: kind,
1117  		Data: b[:i],
1118  	}), b[i:], nil
1119  }
1120  
1121  //nolint:funlen,gocognit,cyclop
1122  func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
1123  	i := 0
1124  
1125  	if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
1126  		var isValidRune validRuneFn
1127  
1128  		switch b[1] {
1129  		case 'x':
1130  			isValidRune = isValidHexRune
1131  		case 'o':
1132  			isValidRune = isValidOctalRune
1133  		case 'b':
1134  			isValidRune = isValidBinaryRune
1135  		default:
1136  			i++
1137  		}
1138  
1139  		if isValidRune != nil {
1140  			i += 2
1141  			for ; i < len(b); i++ {
1142  				if !isValidRune(b[i]) {
1143  					break
1144  				}
1145  			}
1146  		}
1147  
1148  		return p.builder.Push(Node{
1149  			Kind: Integer,
1150  			Data: b[:i],
1151  			Raw:  p.Range(b[:i]),
1152  		}), b[i:], nil
1153  	}
1154  
1155  	isFloat := false
1156  
1157  	for ; i < len(b); i++ {
1158  		c := b[i]
1159  
1160  		if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
1161  			continue
1162  		}
1163  
1164  		if c == '.' || c == 'e' || c == 'E' {
1165  			isFloat = true
1166  
1167  			continue
1168  		}
1169  
1170  		if c == 'i' {
1171  			if scanFollowsInf(b[i:]) {
1172  				return p.builder.Push(Node{
1173  					Kind: Float,
1174  					Data: b[:i+3],
1175  					Raw:  p.Range(b[:i+3]),
1176  				}), b[i+3:], nil
1177  			}
1178  
1179  			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
1180  		}
1181  
1182  		if c == 'n' {
1183  			if scanFollowsNan(b[i:]) {
1184  				return p.builder.Push(Node{
1185  					Kind: Float,
1186  					Data: b[:i+3],
1187  					Raw:  p.Range(b[:i+3]),
1188  				}), b[i+3:], nil
1189  			}
1190  
1191  			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
1192  		}
1193  
1194  		break
1195  	}
1196  
1197  	if i == 0 {
1198  		return invalidReference, b, NewParserError(b, "incomplete number")
1199  	}
1200  
1201  	kind := Integer
1202  
1203  	if isFloat {
1204  		kind = Float
1205  	}
1206  
1207  	return p.builder.Push(Node{
1208  		Kind: kind,
1209  		Data: b[:i],
1210  		Raw:  p.Range(b[:i]),
1211  	}), b[i:], nil
1212  }
1213  
1214  func isDigit(r byte) bool {
1215  	return r >= '0' && r <= '9'
1216  }
1217  
1218  type validRuneFn func(r byte) bool
1219  
1220  func isValidHexRune(r byte) bool {
1221  	return r >= 'a' && r <= 'f' ||
1222  		r >= 'A' && r <= 'F' ||
1223  		r >= '0' && r <= '9' ||
1224  		r == '_'
1225  }
1226  
1227  func isValidOctalRune(r byte) bool {
1228  	return r >= '0' && r <= '7' || r == '_'
1229  }
1230  
1231  func isValidBinaryRune(r byte) bool {
1232  	return r == '0' || r == '1' || r == '_'
1233  }
1234  
1235  func expect(x byte, b []byte) ([]byte, error) {
1236  	if len(b) == 0 {
1237  		return nil, NewParserError(b, "expected character %c but the document ended here", x)
1238  	}
1239  
1240  	if b[0] != x {
1241  		return nil, NewParserError(b[0:1], "expected character %c", x)
1242  	}
1243  
1244  	return b[1:], nil
1245  }
1246