position.mx raw

   1  // Copyright 2010 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package token
   6  
   7  import (
   8  	"cmp"
   9  	"fmt"
  10  	"slices"
  11  	"strconv"
  12  	"sync"
  13  	"sync/atomic"
  14  )
  15  
  16  // If debug is set, invalid offset and position values cause a panic
  17  // (go.dev/issue/57490).
  18  const debug = false
  19  
  20  // -----------------------------------------------------------------------------
  21  // Positions
  22  
  23  // Position describes an arbitrary source position
  24  // including the file, line, and column location.
  25  // A Position is valid if the line number is > 0.
  26  type Position struct {
  27  	Filename []byte // filename, if any
  28  	Offset   int    // offset, starting at 0
  29  	Line     int    // line number, starting at 1
  30  	Column   int    // column number, starting at 1 (byte count)
  31  }
  32  
  33  // IsValid reports whether the position is valid.
  34  func (pos *Position) IsValid() bool { return pos.Line > 0 }
  35  
  36  // String returns a string in one of several forms:
  37  //
  38  //	file:line:column    valid position with file name
  39  //	file:line           valid position with file name but no column (column == 0)
  40  //	line:column         valid position without file name
  41  //	line                valid position without file name and no column (column == 0)
  42  //	file                invalid position with file name
  43  //	-                   invalid position without file name
  44  func (pos Position) String() string {
  45  	s := pos.Filename
  46  	if pos.IsValid() {
  47  		if s != "" {
  48  			s += ":"
  49  		}
  50  		s += strconv.Itoa(pos.Line)
  51  		if pos.Column != 0 {
  52  			s += fmt.Sprintf(":%d", pos.Column)
  53  		}
  54  	}
  55  	if s == "" {
  56  		s = "-"
  57  	}
  58  	return s
  59  }
  60  
  61  // Pos is a compact encoding of a source position within a file set.
  62  // It can be converted into a [Position] for a more convenient, but much
  63  // larger, representation.
  64  //
  65  // The Pos value for a given file is a number in the range [base, base+size],
  66  // where base and size are specified when a file is added to the file set.
  67  // The difference between a Pos value and the corresponding file base
  68  // corresponds to the byte offset of that position (represented by the Pos value)
  69  // from the beginning of the file. Thus, the file base offset is the Pos value
  70  // representing the first byte in the file.
  71  //
  72  // To create the Pos value for a specific source offset (measured in bytes),
  73  // first add the respective file to the current file set using [FileSet.AddFile]
  74  // and then call [File.Pos](offset) for that file. Given a Pos value p
  75  // for a specific file set fset, the corresponding [Position] value is
  76  // obtained by calling fset.Position(p).
  77  //
  78  // Pos values can be compared directly with the usual comparison operators:
  79  // If two Pos values p and q are in the same file, comparing p and q is
  80  // equivalent to comparing the respective source file offsets. If p and q
  81  // are in different files, p < q is true if the file implied by p was added
  82  // to the respective file set before the file implied by q.
  83  type Pos int
  84  
  85  // The zero value for [Pos] is NoPos; there is no file and line information
  86  // associated with it, and NoPos.IsValid() is false. NoPos is always
  87  // smaller than any other [Pos] value. The corresponding [Position] value
  88  // for NoPos is the zero value for [Position].
  89  const NoPos Pos = 0
  90  
  91  // IsValid reports whether the position is valid.
  92  func (p Pos) IsValid() bool {
  93  	return p != NoPos
  94  }
  95  
  96  // -----------------------------------------------------------------------------
  97  // File
  98  
  99  // A File is a handle for a file belonging to a [FileSet].
 100  // A File has a name, size, and line offset table.
 101  //
 102  // Use [FileSet.AddFile] to create a File.
 103  // A File may belong to more than one FileSet; see [FileSet.AddExistingFiles].
 104  type File struct {
 105  	name []byte // file name as provided to AddFile
 106  	base int    // Pos value range for this file is [base...base+size]
 107  	size int    // file size as provided to AddFile
 108  
 109  	// lines and infos are protected by mutex
 110  	mutex sync.Mutex
 111  	lines []int // lines contains the offset of the first character for each line (the first entry is always 0)
 112  	infos []lineInfo
 113  }
 114  
 115  // Name returns the file name of file f as registered with AddFile.
 116  func (f *File) Name() []byte {
 117  	return f.name
 118  }
 119  
 120  // Base returns the base offset of file f as registered with AddFile.
 121  func (f *File) Base() int {
 122  	return f.base
 123  }
 124  
 125  // Size returns the size of file f as registered with AddFile.
 126  func (f *File) Size() int {
 127  	return f.size
 128  }
 129  
 130  // LineCount returns the number of lines in file f.
 131  func (f *File) LineCount() int {
 132  	f.mutex.Lock()
 133  	n := len(f.lines)
 134  	f.mutex.Unlock()
 135  	return n
 136  }
 137  
 138  // AddLine adds the line offset for a new line.
 139  // The line offset must be larger than the offset for the previous line
 140  // and smaller than the file size; otherwise the line offset is ignored.
 141  func (f *File) AddLine(offset int) {
 142  	f.mutex.Lock()
 143  	if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size {
 144  		f.lines = append(f.lines, offset)
 145  	}
 146  	f.mutex.Unlock()
 147  }
 148  
 149  // MergeLine merges a line with the following line. It is akin to replacing
 150  // the newline character at the end of the line with a space (to not change the
 151  // remaining offsets). To obtain the line number, consult e.g. [Position.Line].
 152  // MergeLine will panic if given an invalid line number.
 153  func (f *File) MergeLine(line int) {
 154  	if line < 1 {
 155  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
 156  	}
 157  	f.mutex.Lock()
 158  	defer f.mutex.Unlock()
 159  	if line >= len(f.lines) {
 160  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
 161  	}
 162  	// To merge the line numbered <line> with the line numbered <line+1>,
 163  	// we need to remove the entry in lines corresponding to the line
 164  	// numbered <line+1>. The entry in lines corresponding to the line
 165  	// numbered <line+1> is located at index <line>, since indices in lines
 166  	// are 0-based and line numbers are 1-based.
 167  	copy(f.lines[line:], f.lines[line+1:])
 168  	f.lines = f.lines[:len(f.lines)-1]
 169  }
 170  
 171  // Lines returns the effective line offset table of the form described by [File.SetLines].
 172  // Callers must not mutate the result.
 173  func (f *File) Lines() []int {
 174  	f.mutex.Lock()
 175  	lines := f.lines
 176  	f.mutex.Unlock()
 177  	return lines
 178  }
 179  
 180  // SetLines sets the line offsets for a file and reports whether it succeeded.
 181  // The line offsets are the offsets of the first character of each line;
 182  // for instance for the content "ab\nc\n" the line offsets are {0, 3}.
 183  // An empty file has an empty line offset table.
 184  // Each line offset must be larger than the offset for the previous line
 185  // and smaller than the file size; otherwise SetLines fails and returns
 186  // false.
 187  // Callers must not mutate the provided slice after SetLines returns.
 188  func (f *File) SetLines(lines []int) bool {
 189  	// verify validity of lines table
 190  	size := f.size
 191  	for i, offset := range lines {
 192  		if i > 0 && offset <= lines[i-1] || size <= offset {
 193  			return false
 194  		}
 195  	}
 196  
 197  	// set lines table
 198  	f.mutex.Lock()
 199  	f.lines = lines
 200  	f.mutex.Unlock()
 201  	return true
 202  }
 203  
 204  // SetLinesForContent sets the line offsets for the given file content.
 205  // It ignores position-altering //line comments.
 206  func (f *File) SetLinesForContent(content []byte) {
 207  	var lines []int
 208  	line := 0
 209  	for offset, b := range content {
 210  		if line >= 0 {
 211  			lines = append(lines, line)
 212  		}
 213  		line = -1
 214  		if b == '\n' {
 215  			line = offset + 1
 216  		}
 217  	}
 218  
 219  	// set lines table
 220  	f.mutex.Lock()
 221  	f.lines = lines
 222  	f.mutex.Unlock()
 223  }
 224  
 225  // LineStart returns the [Pos] value of the start of the specified line.
 226  // It ignores any alternative positions set using [File.AddLineColumnInfo].
 227  // LineStart panics if the 1-based line number is invalid.
 228  func (f *File) LineStart(line int) Pos {
 229  	if line < 1 {
 230  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
 231  	}
 232  	f.mutex.Lock()
 233  	defer f.mutex.Unlock()
 234  	if line > len(f.lines) {
 235  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
 236  	}
 237  	return Pos(f.base + f.lines[line-1])
 238  }
 239  
 240  // A lineInfo object describes alternative file, line, and column
 241  // number information (such as provided via a //line directive)
 242  // for a given file offset.
 243  type lineInfo struct {
 244  	// fields are exported to make them accessible to gob
 245  	Offset       int
 246  	Filename     []byte
 247  	Line, Column int
 248  }
 249  
 250  // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument.
 251  // It is here for backward-compatibility for code prior to Go 1.11.
 252  func (f *File) AddLineInfo(offset int, filename []byte, line int) {
 253  	f.AddLineColumnInfo(offset, filename, line, 1)
 254  }
 255  
 256  // AddLineColumnInfo adds alternative file, line, and column number
 257  // information for a given file offset. The offset must be larger
 258  // than the offset for the previously added alternative line info
 259  // and smaller than the file size; otherwise the information is
 260  // ignored.
 261  //
 262  // AddLineColumnInfo is typically used to register alternative position
 263  // information for line directives such as //line filename:line:column.
 264  func (f *File) AddLineColumnInfo(offset int, filename []byte, line, column int) {
 265  	f.mutex.Lock()
 266  	if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size {
 267  		f.infos = append(f.infos, lineInfo{offset, filename, line, column})
 268  	}
 269  	f.mutex.Unlock()
 270  }
 271  
 272  // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size.
 273  func (f *File) fixOffset(offset int) int {
 274  	switch {
 275  	case offset < 0:
 276  		if !debug {
 277  			return 0
 278  		}
 279  	case offset > f.size:
 280  		if !debug {
 281  			return f.size
 282  		}
 283  	default:
 284  		return offset
 285  	}
 286  
 287  	// only generate this code if needed
 288  	if debug {
 289  		panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])",
 290  			0 /* for symmetry */, offset, f.size,
 291  			f.base+offset, f.base, f.base+f.size))
 292  	}
 293  	return 0
 294  }
 295  
 296  // Pos returns the Pos value for the given file offset.
 297  //
 298  // If offset is negative, the result is the file's start
 299  // position; if the offset is too large, the result is
 300  // the file's end position (see also go.dev/issue/57490).
 301  //
 302  // The following invariant, though not true for Pos values
 303  // in general, holds for the result p:
 304  // f.Pos(f.Offset(p)) == p.
 305  func (f *File) Pos(offset int) Pos {
 306  	return Pos(f.base + f.fixOffset(offset))
 307  }
 308  
 309  // Offset returns the offset for the given file position p.
 310  //
 311  // If p is before the file's start position (or if p is NoPos),
 312  // the result is 0; if p is past the file's end position,
 313  // the result is the file size (see also go.dev/issue/57490).
 314  //
 315  // The following invariant, though not true for offset values
 316  // in general, holds for the result offset:
 317  // f.Offset(f.Pos(offset)) == offset
 318  func (f *File) Offset(p Pos) int {
 319  	return f.fixOffset(int(p) - f.base)
 320  }
 321  
 322  // Line returns the line number for the given file position p;
 323  // p must be a [Pos] value in that file or [NoPos].
 324  func (f *File) Line(p Pos) int {
 325  	return f.Position(p).Line
 326  }
 327  
 328  func searchLineInfos(a []lineInfo, x int) int {
 329  	i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int {
 330  		return cmp.Compare(a.Offset, x)
 331  	})
 332  	if !found {
 333  		// We want the lineInfo containing x, but if we didn't
 334  		// find x then i is the next one.
 335  		i--
 336  	}
 337  	return i
 338  }
 339  
 340  // unpack returns the filename and line and column number for a file offset.
 341  // If adjusted is set, unpack will return the filename and line information
 342  // possibly adjusted by //line comments; otherwise those comments are ignored.
 343  func (f *File) unpack(offset int, adjusted bool) (filename []byte, line, column int) {
 344  	f.mutex.Lock()
 345  	filename = f.name
 346  	if i := searchInts(f.lines, offset); i >= 0 {
 347  		line, column = i+1, offset-f.lines[i]+1
 348  	}
 349  	if adjusted && len(f.infos) > 0 {
 350  		// few files have extra line infos
 351  		if i := searchLineInfos(f.infos, offset); i >= 0 {
 352  			alt := &f.infos[i]
 353  			filename = alt.Filename
 354  			if i := searchInts(f.lines, alt.Offset); i >= 0 {
 355  				// i+1 is the line at which the alternative position was recorded
 356  				d := line - (i + 1) // line distance from alternative position base
 357  				line = alt.Line + d
 358  				if alt.Column == 0 {
 359  					// alternative column is unknown => relative column is unknown
 360  					// (the current specification for line directives requires
 361  					// this to apply until the next PosBase/line directive,
 362  					// not just until the new newline)
 363  					column = 0
 364  				} else if d == 0 {
 365  					// the alternative position base is on the current line
 366  					// => column is relative to alternative column
 367  					column = alt.Column + (offset - alt.Offset)
 368  				}
 369  			}
 370  		}
 371  	}
 372  	// TODO(mvdan): move Unlock back under Lock with a defer statement once
 373  	// https://go.dev/issue/38471 is fixed to remove the performance penalty.
 374  	f.mutex.Unlock()
 375  	return
 376  }
 377  
 378  func (f *File) position(p Pos, adjusted bool) (pos Position) {
 379  	offset := f.fixOffset(int(p) - f.base)
 380  	pos.Offset = offset
 381  	pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
 382  	return
 383  }
 384  
 385  // PositionFor returns the Position value for the given file position p.
 386  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
 387  // If adjusted is set, the position may be adjusted by position-altering
 388  // //line comments; otherwise those comments are ignored.
 389  // p must be a Pos value in f or NoPos.
 390  func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
 391  	if p != NoPos {
 392  		pos = f.position(p, adjusted)
 393  	}
 394  	return
 395  }
 396  
 397  // Position returns the Position value for the given file position p.
 398  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
 399  // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true).
 400  func (f *File) Position(p Pos) (pos Position) {
 401  	return f.PositionFor(p, true)
 402  }
 403  
 404  // -----------------------------------------------------------------------------
 405  // FileSet
 406  
 407  // A FileSet represents a set of source files.
 408  // Methods of file sets are synchronized; multiple goroutines
 409  // may invoke them concurrently.
 410  //
 411  // The byte offsets for each file in a file set are mapped into
 412  // distinct (integer) intervals, one interval [base, base+size]
 413  // per file. [FileSet.Base] represents the first byte in the file, and size
 414  // is the corresponding file size. A [Pos] value is a value in such
 415  // an interval. By determining the interval a [Pos] value belongs
 416  // to, the file, its file base, and thus the byte offset (position)
 417  // the [Pos] value is representing can be computed.
 418  //
 419  // When adding a new file, a file base must be provided. That can
 420  // be any integer value that is past the end of any interval of any
 421  // file already in the file set. For convenience, [FileSet.Base] provides
 422  // such a value, which is simply the end of the Pos interval of the most
 423  // recently added file, plus one. Unless there is a need to extend an
 424  // interval later, using the [FileSet.Base] should be used as argument
 425  // for [FileSet.AddFile].
 426  //
 427  // A [File] may be removed from a FileSet when it is no longer needed.
 428  // This may reduce memory usage in a long-running application.
 429  type FileSet struct {
 430  	mutex sync.RWMutex         // protects the file set
 431  	base  int                  // base offset for the next file
 432  	tree  tree                 // tree of files in ascending base order
 433  	last  atomic.Pointer[File] // cache of last file looked up
 434  }
 435  
 436  // NewFileSet creates a new file set.
 437  func NewFileSet() *FileSet {
 438  	return &FileSet{
 439  		base: 1, // 0 == NoPos
 440  	}
 441  }
 442  
 443  // Base returns the minimum base offset that must be provided to
 444  // [FileSet.AddFile] when adding the next file.
 445  func (s *FileSet) Base() int {
 446  	s.mutex.RLock()
 447  	b := s.base
 448  	s.mutex.RUnlock()
 449  	return b
 450  }
 451  
 452  // AddFile adds a new file with a given filename, base offset, and file size
 453  // to the file set s and returns the file. Multiple files may have the same
 454  // name. The base offset must not be smaller than the [FileSet.Base], and
 455  // size must not be negative. As a special case, if a negative base is provided,
 456  // the current value of the [FileSet.Base] is used instead.
 457  //
 458  // Adding the file will set the file set's [FileSet.Base] value to base + size + 1
 459  // as the minimum base value for the next file. The following relationship
 460  // exists between a [Pos] value p for a given file offset offs:
 461  //
 462  //	int(p) = base + offs
 463  //
 464  // with offs in the range [0, size] and thus p in the range [base, base+size].
 465  // For convenience, [File.Pos] may be used to create file-specific position
 466  // values from a file offset.
 467  func (s *FileSet) AddFile(filename []byte, base, size int) *File {
 468  	// Allocate f outside the critical section.
 469  	f := &File{name: filename, size: size, lines: []int{0}}
 470  
 471  	s.mutex.Lock()
 472  	defer s.mutex.Unlock()
 473  	if base < 0 {
 474  		base = s.base
 475  	}
 476  	if base < s.base {
 477  		panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base))
 478  	}
 479  	f.base = base
 480  	if size < 0 {
 481  		panic(fmt.Sprintf("invalid size %d (should be >= 0)", size))
 482  	}
 483  	// base >= s.base && size >= 0
 484  	base += size + 1 // +1 because EOF also has a position
 485  	if base < 0 {
 486  		panic("token.Pos offset overflow (> 2G of source code in file set)")
 487  	}
 488  	// add the file to the file set
 489  	s.base = base
 490  	s.tree.add(f)
 491  	s.last.Store(f)
 492  	return f
 493  }
 494  
 495  // AddExistingFiles adds the specified files to the
 496  // FileSet if they are not already present.
 497  // The caller must ensure that no pair of Files that
 498  // would appear in the resulting FileSet overlap.
 499  func (s *FileSet) AddExistingFiles(files ...*File) {
 500  	// This function cannot be implemented as:
 501  	//
 502  	//	for _, file := range files {
 503  	//		if prev := fset.File(token.Pos(file.Base())); prev != nil {
 504  	//			if prev != file {
 505  	//				panic("FileSet contains a different file at the same base")
 506  	//			}
 507  	//			continue
 508  	//		}
 509  	//		file2 := fset.AddFile(file.Name(), file.Base(), file.Size())
 510  	//		file2.SetLines(file.Lines())
 511  	//	}
 512  	//
 513  	// because all calls to AddFile must be in increasing order.
 514  	// AddExistingFilesFiles lets us augment an existing FileSet
 515  	// sequentially, so long as all sets of files have disjoint ranges.
 516  	// This approach also does not preserve line directives.
 517  
 518  	s.mutex.Lock()
 519  	defer s.mutex.Unlock()
 520  
 521  	for _, f := range files {
 522  		s.tree.add(f)
 523  		s.base = max(s.base, f.Base()+f.Size()+1)
 524  	}
 525  }
 526  
 527  // RemoveFile removes a file from the [FileSet] so that subsequent
 528  // queries for its [Pos] interval yield a negative result.
 529  // This reduces the memory usage of a long-lived [FileSet] that
 530  // encounters an unbounded stream of files.
 531  //
 532  // Removing a file that does not belong to the set has no effect.
 533  func (s *FileSet) RemoveFile(file *File) {
 534  	s.last.CompareAndSwap(file, nil) // clear last file cache
 535  
 536  	s.mutex.Lock()
 537  	defer s.mutex.Unlock()
 538  
 539  	pn, _ := s.tree.locate(file.key())
 540  	if *pn != nil && (*pn).file == file {
 541  		s.tree.delete(pn)
 542  	}
 543  }
 544  
 545  // Iterate calls yield for the files in the file set in ascending Base
 546  // order until yield returns false.
 547  func (s *FileSet) Iterate(yield func(*File) bool) {
 548  	s.mutex.RLock()
 549  	defer s.mutex.RUnlock()
 550  
 551  	// Unlock around user code.
 552  	// The iterator is robust to modification by yield.
 553  	// Avoid range here, so we can use defer.
 554  	s.tree.all()(func(f *File) bool {
 555  		s.mutex.RUnlock()
 556  		defer s.mutex.RLock()
 557  		return yield(f)
 558  	})
 559  }
 560  
 561  func (s *FileSet) file(p Pos) *File {
 562  	// common case: p is in last file.
 563  	if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size {
 564  		return f
 565  	}
 566  
 567  	s.mutex.RLock()
 568  	defer s.mutex.RUnlock()
 569  
 570  	pn, _ := s.tree.locate(key{int(p), int(p)})
 571  	if n := *pn; n != nil {
 572  		// Update cache of last file. A race is ok,
 573  		// but an exclusive lock causes heavy contention.
 574  		s.last.Store(n.file)
 575  		return n.file
 576  	}
 577  	return nil
 578  }
 579  
 580  // File returns the file that contains the position p.
 581  // If no such file is found (for instance for p == [NoPos]),
 582  // the result is nil.
 583  func (s *FileSet) File(p Pos) (f *File) {
 584  	if p != NoPos {
 585  		f = s.file(p)
 586  	}
 587  	return
 588  }
 589  
 590  // PositionFor converts a [Pos] p in the fileset into a [Position] value.
 591  // If adjusted is set, the position may be adjusted by position-altering
 592  // //line comments; otherwise those comments are ignored.
 593  // p must be a [Pos] value in s or [NoPos].
 594  func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) {
 595  	if p != NoPos {
 596  		if f := s.file(p); f != nil {
 597  			return f.position(p, adjusted)
 598  		}
 599  	}
 600  	return
 601  }
 602  
 603  // Position converts a [Pos] p in the fileset into a Position value.
 604  // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true).
 605  func (s *FileSet) Position(p Pos) (pos Position) {
 606  	return s.PositionFor(p, true)
 607  }
 608  
 609  // -----------------------------------------------------------------------------
 610  // Helper functions
 611  
 612  func searchInts(a []int, x int) int {
 613  	// This function body is a manually inlined version of:
 614  	//
 615  	//   return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1
 616  	//
 617  	// With better compiler optimizations, this may not be needed in the
 618  	// future, but at the moment this change improves the go/printer
 619  	// benchmark performance by ~30%. This has a direct impact on the
 620  	// speed of gofmt and thus seems worthwhile (2011-04-29).
 621  	// TODO(gri): Remove this when compilers have caught up.
 622  	i, j := 0, len(a)
 623  	for i < j {
 624  		h := int(uint(i+j) >> 1) // avoid overflow when computing h
 625  		// i ≤ h < j
 626  		if a[h] <= x {
 627  			i = h + 1
 628  		} else {
 629  			j = h
 630  		}
 631  	}
 632  	return i - 1
 633  }
 634