read.mx raw

   1  // Copyright 2012 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package build
   6  
   7  import (
   8  	"bufio"
   9  	"bytes"
  10  	"errors"
  11  	"fmt"
  12  	"go/ast"
  13  	"go/parser"
  14  	"go/scanner"
  15  	"go/token"
  16  	"io"
  17  	"strconv"
  18  	"unicode"
  19  	"unicode/utf8"
  20  	_ "unsafe" // for linkname
  21  )
  22  
  23  type importReader struct {
  24  	b    *bufio.Reader
  25  	buf  []byte
  26  	peek byte
  27  	err  error
  28  	eof  bool
  29  	nerr int
  30  	pos  token.Position
  31  }
  32  
  33  var bom = []byte{0xef, 0xbb, 0xbf}
  34  
  35  func newImportReader(name []byte, r io.Reader) *importReader {
  36  	b := bufio.NewReader(r)
  37  	// Remove leading UTF-8 BOM.
  38  	// Per https://golang.org/ref/spec#Source_code_representation:
  39  	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
  40  	// if it is the first Unicode code point in the source text.
  41  	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
  42  		b.Discard(3)
  43  	}
  44  	return &importReader{
  45  		b: b,
  46  		pos: token.Position{
  47  			Filename: name,
  48  			Line:     1,
  49  			Column:   1,
  50  		},
  51  	}
  52  }
  53  
  54  func isIdent(c byte) bool {
  55  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
  56  }
  57  
  58  var (
  59  	errSyntax = errors.New("syntax error")
  60  	errNUL    = errors.New("unexpected NUL in input")
  61  )
  62  
  63  // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
  64  func (r *importReader) syntaxError() {
  65  	if r.err == nil {
  66  		r.err = errSyntax
  67  	}
  68  }
  69  
  70  // readByte reads the next byte from the input, saves it in buf, and returns it.
  71  // If an error occurs, readByte records the error in r.err and returns 0.
  72  func (r *importReader) readByte() byte {
  73  	c, err := r.b.ReadByte()
  74  	if err == nil {
  75  		r.buf = append(r.buf, c)
  76  		if c == 0 {
  77  			err = errNUL
  78  		}
  79  	}
  80  	if err != nil {
  81  		if err == io.EOF {
  82  			r.eof = true
  83  		} else if r.err == nil {
  84  			r.err = err
  85  		}
  86  		c = 0
  87  	}
  88  	return c
  89  }
  90  
  91  // readByteNoBuf is like readByte but doesn't buffer the byte.
  92  // It exhausts r.buf before reading from r.b.
  93  func (r *importReader) readByteNoBuf() byte {
  94  	var c byte
  95  	var err error
  96  	if len(r.buf) > 0 {
  97  		c = r.buf[0]
  98  		r.buf = r.buf[1:]
  99  	} else {
 100  		c, err = r.b.ReadByte()
 101  		if err == nil && c == 0 {
 102  			err = errNUL
 103  		}
 104  	}
 105  
 106  	if err != nil {
 107  		if err == io.EOF {
 108  			r.eof = true
 109  		} else if r.err == nil {
 110  			r.err = err
 111  		}
 112  		return 0
 113  	}
 114  	r.pos.Offset++
 115  	if c == '\n' {
 116  		r.pos.Line++
 117  		r.pos.Column = 1
 118  	} else {
 119  		r.pos.Column++
 120  	}
 121  	return c
 122  }
 123  
 124  // peekByte returns the next byte from the input reader but does not advance beyond it.
 125  // If skipSpace is set, peekByte skips leading spaces and comments.
 126  func (r *importReader) peekByte(skipSpace bool) byte {
 127  	if r.err != nil {
 128  		if r.nerr++; r.nerr > 10000 {
 129  			panic("go/build: import reader looping")
 130  		}
 131  		return 0
 132  	}
 133  
 134  	// Use r.peek as first input byte.
 135  	// Don't just return r.peek here: it might have been left by peekByte(false)
 136  	// and this might be peekByte(true).
 137  	c := r.peek
 138  	if c == 0 {
 139  		c = r.readByte()
 140  	}
 141  	for r.err == nil && !r.eof {
 142  		if skipSpace {
 143  			// For the purposes of this reader, semicolons are never necessary to
 144  			// understand the input and are treated as spaces.
 145  			switch c {
 146  			case ' ', '\f', '\t', '\r', '\n', ';':
 147  				c = r.readByte()
 148  				continue
 149  
 150  			case '/':
 151  				c = r.readByte()
 152  				if c == '/' {
 153  					for c != '\n' && r.err == nil && !r.eof {
 154  						c = r.readByte()
 155  					}
 156  				} else if c == '*' {
 157  					var c1 byte
 158  					for (c != '*' || c1 != '/') && r.err == nil {
 159  						if r.eof {
 160  							r.syntaxError()
 161  						}
 162  						c, c1 = c1, r.readByte()
 163  					}
 164  				} else {
 165  					r.syntaxError()
 166  				}
 167  				c = r.readByte()
 168  				continue
 169  			}
 170  		}
 171  		break
 172  	}
 173  	r.peek = c
 174  	return r.peek
 175  }
 176  
 177  // nextByte is like peekByte but advances beyond the returned byte.
 178  func (r *importReader) nextByte(skipSpace bool) byte {
 179  	c := r.peekByte(skipSpace)
 180  	r.peek = 0
 181  	return c
 182  }
 183  
 184  var goEmbed = []byte("go:embed")
 185  
 186  // findEmbed advances the input reader to the next //go:embed comment.
 187  // It reports whether it found a comment.
 188  // (Otherwise it found an error or EOF.)
 189  func (r *importReader) findEmbed(first bool) bool {
 190  	// The import block scan stopped after a non-space character,
 191  	// so the reader is not at the start of a line on the first call.
 192  	// After that, each //go:embed extraction leaves the reader
 193  	// at the end of a line.
 194  	startLine := !first
 195  	var c byte
 196  	for r.err == nil && !r.eof {
 197  		c = r.readByteNoBuf()
 198  	Reswitch:
 199  		switch c {
 200  		default:
 201  			startLine = false
 202  
 203  		case '\n':
 204  			startLine = true
 205  
 206  		case ' ', '\t':
 207  			// leave startLine alone
 208  
 209  		case '"':
 210  			startLine = false
 211  			for r.err == nil {
 212  				if r.eof {
 213  					r.syntaxError()
 214  				}
 215  				c = r.readByteNoBuf()
 216  				if c == '\\' {
 217  					r.readByteNoBuf()
 218  					if r.err != nil {
 219  						r.syntaxError()
 220  						return false
 221  					}
 222  					continue
 223  				}
 224  				if c == '"' {
 225  					c = r.readByteNoBuf()
 226  					goto Reswitch
 227  				}
 228  			}
 229  			goto Reswitch
 230  
 231  		case '`':
 232  			startLine = false
 233  			for r.err == nil {
 234  				if r.eof {
 235  					r.syntaxError()
 236  				}
 237  				c = r.readByteNoBuf()
 238  				if c == '`' {
 239  					c = r.readByteNoBuf()
 240  					goto Reswitch
 241  				}
 242  			}
 243  
 244  		case '\'':
 245  			startLine = false
 246  			for r.err == nil {
 247  				if r.eof {
 248  					r.syntaxError()
 249  				}
 250  				c = r.readByteNoBuf()
 251  				if c == '\\' {
 252  					r.readByteNoBuf()
 253  					if r.err != nil {
 254  						r.syntaxError()
 255  						return false
 256  					}
 257  					continue
 258  				}
 259  				if c == '\'' {
 260  					c = r.readByteNoBuf()
 261  					goto Reswitch
 262  				}
 263  			}
 264  
 265  		case '/':
 266  			c = r.readByteNoBuf()
 267  			switch c {
 268  			default:
 269  				startLine = false
 270  				goto Reswitch
 271  
 272  			case '*':
 273  				var c1 byte
 274  				for (c != '*' || c1 != '/') && r.err == nil {
 275  					if r.eof {
 276  						r.syntaxError()
 277  					}
 278  					c, c1 = c1, r.readByteNoBuf()
 279  				}
 280  				startLine = false
 281  
 282  			case '/':
 283  				if startLine {
 284  					// Try to read this as a //go:embed comment.
 285  					for i := range goEmbed {
 286  						c = r.readByteNoBuf()
 287  						if c != goEmbed[i] {
 288  							goto SkipSlashSlash
 289  						}
 290  					}
 291  					c = r.readByteNoBuf()
 292  					if c == ' ' || c == '\t' {
 293  						// Found one!
 294  						return true
 295  					}
 296  				}
 297  			SkipSlashSlash:
 298  				for c != '\n' && r.err == nil && !r.eof {
 299  					c = r.readByteNoBuf()
 300  				}
 301  				startLine = true
 302  			}
 303  		}
 304  	}
 305  	return false
 306  }
 307  
 308  // readKeyword reads the given keyword from the input.
 309  // If the keyword is not present, readKeyword records a syntax error.
 310  func (r *importReader) readKeyword(kw []byte) {
 311  	r.peekByte(true)
 312  	for i := 0; i < len(kw); i++ {
 313  		if r.nextByte(false) != kw[i] {
 314  			r.syntaxError()
 315  			return
 316  		}
 317  	}
 318  	if isIdent(r.peekByte(false)) {
 319  		r.syntaxError()
 320  	}
 321  }
 322  
 323  // readIdent reads an identifier from the input.
 324  // If an identifier is not present, readIdent records a syntax error.
 325  func (r *importReader) readIdent() {
 326  	c := r.peekByte(true)
 327  	if !isIdent(c) {
 328  		r.syntaxError()
 329  		return
 330  	}
 331  	for isIdent(r.peekByte(false)) {
 332  		r.peek = 0
 333  	}
 334  }
 335  
 336  // readString reads a quoted string literal from the input.
 337  // If an identifier is not present, readString records a syntax error.
 338  func (r *importReader) readString() {
 339  	switch r.nextByte(true) {
 340  	case '`':
 341  		for r.err == nil {
 342  			if r.nextByte(false) == '`' {
 343  				break
 344  			}
 345  			if r.eof {
 346  				r.syntaxError()
 347  			}
 348  		}
 349  	case '"':
 350  		for r.err == nil {
 351  			c := r.nextByte(false)
 352  			if c == '"' {
 353  				break
 354  			}
 355  			if r.eof || c == '\n' {
 356  				r.syntaxError()
 357  			}
 358  			if c == '\\' {
 359  				r.nextByte(false)
 360  			}
 361  		}
 362  	default:
 363  		r.syntaxError()
 364  	}
 365  }
 366  
 367  // readImport reads an import clause - optional identifier followed by quoted string -
 368  // from the input.
 369  func (r *importReader) readImport() {
 370  	c := r.peekByte(true)
 371  	if c == '.' {
 372  		r.peek = 0
 373  	} else if isIdent(c) {
 374  		r.readIdent()
 375  	}
 376  	r.readString()
 377  }
 378  
 379  // readComments is like io.ReadAll, except that it only reads the leading
 380  // block of comments in the file.
 381  //
 382  // readComments should be an internal detail,
 383  // but widely used packages access it using linkname.
 384  // Notable members of the hall of shame include:
 385  //   - github.com/bazelbuild/bazel-gazelle
 386  //
 387  // Do not remove or change the type signature.
 388  // See go.dev/issue/67401.
 389  //
 390  //go:linkname readComments
 391  func readComments(f io.Reader) ([]byte, error) {
 392  	r := newImportReader("", f)
 393  	r.peekByte(true)
 394  	if r.err == nil && !r.eof {
 395  		// Didn't reach EOF, so must have found a non-space byte. Remove it.
 396  		r.buf = r.buf[:len(r.buf)-1]
 397  	}
 398  	return r.buf, r.err
 399  }
 400  
 401  // readGoInfo expects a Go file as input and reads the file up to and including the import section.
 402  // It records what it learned in *info.
 403  // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
 404  // info.imports and info.embeds.
 405  //
 406  // It only returns an error if there are problems reading the file,
 407  // not for syntax errors in the file itself.
 408  func readGoInfo(f io.Reader, info *fileInfo) error {
 409  	r := newImportReader(info.name, f)
 410  
 411  	r.readKeyword("package")
 412  	r.readIdent()
 413  	for r.peekByte(true) == 'i' {
 414  		r.readKeyword("import")
 415  		if r.peekByte(true) == '(' {
 416  			r.nextByte(false)
 417  			for r.peekByte(true) != ')' && r.err == nil {
 418  				r.readImport()
 419  			}
 420  			r.nextByte(false)
 421  		} else {
 422  			r.readImport()
 423  		}
 424  	}
 425  
 426  	info.header = r.buf
 427  
 428  	// If we stopped successfully before EOF, we read a byte that told us we were done.
 429  	// Return all but that last byte, which would cause a syntax error if we let it through.
 430  	if r.err == nil && !r.eof {
 431  		info.header = r.buf[:len(r.buf)-1]
 432  	}
 433  
 434  	// If we stopped for a syntax error, consume the whole file so that
 435  	// we are sure we don't change the errors that go/parser returns.
 436  	if r.err == errSyntax {
 437  		r.err = nil
 438  		for r.err == nil && !r.eof {
 439  			r.readByte()
 440  		}
 441  		info.header = r.buf
 442  	}
 443  	if r.err != nil {
 444  		return r.err
 445  	}
 446  
 447  	if info.fset == nil {
 448  		return nil
 449  	}
 450  
 451  	// Parse file header & record imports.
 452  	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
 453  	if info.parseErr != nil {
 454  		return nil
 455  	}
 456  
 457  	hasEmbed := false
 458  	for _, decl := range info.parsed.Decls {
 459  		d, ok := decl.(*ast.GenDecl)
 460  		if !ok {
 461  			continue
 462  		}
 463  		for _, dspec := range d.Specs {
 464  			spec, ok := dspec.(*ast.ImportSpec)
 465  			if !ok {
 466  				continue
 467  			}
 468  			quoted := spec.Path.Value
 469  			path, err := strconv.Unquote(quoted)
 470  			if err != nil {
 471  				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
 472  			}
 473  			if !isValidImport(path) {
 474  				// The parser used to return a parse error for invalid import paths, but
 475  				// no longer does, so check for and create the error here instead.
 476  				info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path}
 477  				info.imports = nil
 478  				return nil
 479  			}
 480  			if path == "embed" {
 481  				hasEmbed = true
 482  			}
 483  
 484  			doc := spec.Doc
 485  			if doc == nil && len(d.Specs) == 1 {
 486  				doc = d.Doc
 487  			}
 488  			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
 489  		}
 490  	}
 491  
 492  	// Extract directives.
 493  	for _, group := range info.parsed.Comments {
 494  		if group.Pos() >= info.parsed.Package {
 495  			break
 496  		}
 497  		for _, c := range group.List {
 498  			if bytes.HasPrefix(c.Text, "//go:") {
 499  				info.directives = append(info.directives, Directive{c.Text, info.fset.Position(c.Slash)})
 500  			}
 501  		}
 502  	}
 503  
 504  	// If the file imports "embed",
 505  	// we have to look for //go:embed comments
 506  	// in the remainder of the file.
 507  	// The compiler will enforce the mapping of comments to
 508  	// declared variables. We just need to know the patterns.
 509  	// If there were //go:embed comments earlier in the file
 510  	// (near the package statement or imports), the compiler
 511  	// will reject them. They can be (and have already been) ignored.
 512  	if hasEmbed {
 513  		var line []byte
 514  		for first := true; r.findEmbed(first); first = false {
 515  			line = line[:0]
 516  			pos := r.pos
 517  			for {
 518  				c := r.readByteNoBuf()
 519  				if c == '\n' || r.err != nil || r.eof {
 520  					break
 521  				}
 522  				line = append(line, c)
 523  			}
 524  			// Add args if line is well-formed.
 525  			// Ignore badly-formed lines - the compiler will report them when it finds them,
 526  			// and we can pretend they are not there to help go list succeed with what it knows.
 527  			embs, err := parseGoEmbed([]byte(line), pos)
 528  			if err == nil {
 529  				info.embeds = append(info.embeds, embs...)
 530  			}
 531  		}
 532  	}
 533  
 534  	return nil
 535  }
 536  
 537  // isValidImport checks if the import is a valid import using the more strict
 538  // checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations.
 539  // It was ported from the function of the same name that was removed from the
 540  // parser in CL 424855, when the parser stopped doing these checks.
 541  func isValidImport(s []byte) bool {
 542  	const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD"
 543  	for _, r := range s {
 544  		if !unicode.IsGraphic(r) || unicode.IsSpace(r) || bytes.ContainsRune(illegalChars, r) {
 545  			return false
 546  		}
 547  	}
 548  	return s != ""
 549  }
 550  
 551  // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
 552  // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go bytes.
 553  // This is based on a similar function in cmd/compile/internal/gc/noder.go;
 554  // this version calculates position information as well.
 555  func parseGoEmbed(args []byte, pos token.Position) ([]fileEmbed, error) {
 556  	trimBytes := func(n int) {
 557  		pos.Offset += n
 558  		pos.Column += utf8.RuneCountInString(args[:n])
 559  		args = args[n:]
 560  	}
 561  	trimSpace := func() {
 562  		trim := bytes.TrimLeftFunc(args, unicode.IsSpace)
 563  		trimBytes(len(args) - len(trim))
 564  	}
 565  
 566  	var list []fileEmbed
 567  	for trimSpace(); args != ""; trimSpace() {
 568  		var path []byte
 569  		pathPos := pos
 570  	Switch:
 571  		switch args[0] {
 572  		default:
 573  			i := len(args)
 574  			for j, c := range args {
 575  				if unicode.IsSpace(c) {
 576  					i = j
 577  					break
 578  				}
 579  			}
 580  			path = args[:i]
 581  			trimBytes(i)
 582  
 583  		case '`':
 584  			var ok bool
 585  			path, _, ok = bytes.Cut(args[1:], "`")
 586  			if !ok {
 587  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
 588  			}
 589  			trimBytes(1 + len(path) + 1)
 590  
 591  		case '"':
 592  			i := 1
 593  			for ; i < len(args); i++ {
 594  				if args[i] == '\\' {
 595  					i++
 596  					continue
 597  				}
 598  				if args[i] == '"' {
 599  					q, err := strconv.Unquote(args[:i+1])
 600  					if err != nil {
 601  						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
 602  					}
 603  					path = q
 604  					trimBytes(i + 1)
 605  					break Switch
 606  				}
 607  			}
 608  			if i >= len(args) {
 609  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
 610  			}
 611  		}
 612  
 613  		if args != "" {
 614  			r, _ := utf8.DecodeRuneInString(args)
 615  			if !unicode.IsSpace(r) {
 616  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
 617  			}
 618  		}
 619  		list = append(list, fileEmbed{path, pathPos})
 620  	}
 621  	return list, nil
 622  }
 623