1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package token
6 7 import (
8 "cmp"
9 "fmt"
10 "slices"
11 "strconv"
12 "sync"
13 "sync/atomic"
14 )
15 16 // If debug is set, invalid offset and position values cause a panic
17 // (go.dev/issue/57490).
18 const debug = false
19 20 // -----------------------------------------------------------------------------
21 // Positions
22 23 // Position describes an arbitrary source position
24 // including the file, line, and column location.
25 // A Position is valid if the line number is > 0.
26 type Position struct {
27 Filename []byte // filename, if any
28 Offset int // offset, starting at 0
29 Line int // line number, starting at 1
30 Column int // column number, starting at 1 (byte count)
31 }
32 33 // IsValid reports whether the position is valid.
34 func (pos *Position) IsValid() bool { return pos.Line > 0 }
35 36 // String returns a string in one of several forms:
37 //
38 // file:line:column valid position with file name
39 // file:line valid position with file name but no column (column == 0)
40 // line:column valid position without file name
41 // line valid position without file name and no column (column == 0)
42 // file invalid position with file name
43 // - invalid position without file name
44 func (pos Position) String() string {
45 s := pos.Filename
46 if pos.IsValid() {
47 if s != "" {
48 s += ":"
49 }
50 s += strconv.Itoa(pos.Line)
51 if pos.Column != 0 {
52 s += fmt.Sprintf(":%d", pos.Column)
53 }
54 }
55 if s == "" {
56 s = "-"
57 }
58 return s
59 }
60 61 // Pos is a compact encoding of a source position within a file set.
62 // It can be converted into a [Position] for a more convenient, but much
63 // larger, representation.
64 //
65 // The Pos value for a given file is a number in the range [base, base+size],
66 // where base and size are specified when a file is added to the file set.
67 // The difference between a Pos value and the corresponding file base
68 // corresponds to the byte offset of that position (represented by the Pos value)
69 // from the beginning of the file. Thus, the file base offset is the Pos value
70 // representing the first byte in the file.
71 //
72 // To create the Pos value for a specific source offset (measured in bytes),
73 // first add the respective file to the current file set using [FileSet.AddFile]
74 // and then call [File.Pos](offset) for that file. Given a Pos value p
75 // for a specific file set fset, the corresponding [Position] value is
76 // obtained by calling fset.Position(p).
77 //
78 // Pos values can be compared directly with the usual comparison operators:
79 // If two Pos values p and q are in the same file, comparing p and q is
80 // equivalent to comparing the respective source file offsets. If p and q
81 // are in different files, p < q is true if the file implied by p was added
82 // to the respective file set before the file implied by q.
83 type Pos int
84 85 // The zero value for [Pos] is NoPos; there is no file and line information
86 // associated with it, and NoPos.IsValid() is false. NoPos is always
87 // smaller than any other [Pos] value. The corresponding [Position] value
88 // for NoPos is the zero value for [Position].
89 const NoPos Pos = 0
90 91 // IsValid reports whether the position is valid.
92 func (p Pos) IsValid() bool {
93 return p != NoPos
94 }
95 96 // -----------------------------------------------------------------------------
97 // File
98 99 // A File is a handle for a file belonging to a [FileSet].
100 // A File has a name, size, and line offset table.
101 //
102 // Use [FileSet.AddFile] to create a File.
103 // A File may belong to more than one FileSet; see [FileSet.AddExistingFiles].
104 type File struct {
105 name []byte // file name as provided to AddFile
106 base int // Pos value range for this file is [base...base+size]
107 size int // file size as provided to AddFile
108 109 // lines and infos are protected by mutex
110 mutex sync.Mutex
111 lines []int // lines contains the offset of the first character for each line (the first entry is always 0)
112 infos []lineInfo
113 }
114 115 // Name returns the file name of file f as registered with AddFile.
116 func (f *File) Name() []byte {
117 return f.name
118 }
119 120 // Base returns the base offset of file f as registered with AddFile.
121 func (f *File) Base() int {
122 return f.base
123 }
124 125 // Size returns the size of file f as registered with AddFile.
126 func (f *File) Size() int {
127 return f.size
128 }
129 130 // LineCount returns the number of lines in file f.
131 func (f *File) LineCount() int {
132 f.mutex.Lock()
133 n := len(f.lines)
134 f.mutex.Unlock()
135 return n
136 }
137 138 // AddLine adds the line offset for a new line.
139 // The line offset must be larger than the offset for the previous line
140 // and smaller than the file size; otherwise the line offset is ignored.
141 func (f *File) AddLine(offset int) {
142 f.mutex.Lock()
143 if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size {
144 f.lines = append(f.lines, offset)
145 }
146 f.mutex.Unlock()
147 }
148 149 // MergeLine merges a line with the following line. It is akin to replacing
150 // the newline character at the end of the line with a space (to not change the
151 // remaining offsets). To obtain the line number, consult e.g. [Position.Line].
152 // MergeLine will panic if given an invalid line number.
153 func (f *File) MergeLine(line int) {
154 if line < 1 {
155 panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
156 }
157 f.mutex.Lock()
158 defer f.mutex.Unlock()
159 if line >= len(f.lines) {
160 panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
161 }
162 // To merge the line numbered <line> with the line numbered <line+1>,
163 // we need to remove the entry in lines corresponding to the line
164 // numbered <line+1>. The entry in lines corresponding to the line
165 // numbered <line+1> is located at index <line>, since indices in lines
166 // are 0-based and line numbers are 1-based.
167 copy(f.lines[line:], f.lines[line+1:])
168 f.lines = f.lines[:len(f.lines)-1]
169 }
170 171 // Lines returns the effective line offset table of the form described by [File.SetLines].
172 // Callers must not mutate the result.
173 func (f *File) Lines() []int {
174 f.mutex.Lock()
175 lines := f.lines
176 f.mutex.Unlock()
177 return lines
178 }
179 180 // SetLines sets the line offsets for a file and reports whether it succeeded.
181 // The line offsets are the offsets of the first character of each line;
182 // for instance for the content "ab\nc\n" the line offsets are {0, 3}.
183 // An empty file has an empty line offset table.
184 // Each line offset must be larger than the offset for the previous line
185 // and smaller than the file size; otherwise SetLines fails and returns
186 // false.
187 // Callers must not mutate the provided slice after SetLines returns.
188 func (f *File) SetLines(lines []int) bool {
189 // verify validity of lines table
190 size := f.size
191 for i, offset := range lines {
192 if i > 0 && offset <= lines[i-1] || size <= offset {
193 return false
194 }
195 }
196 197 // set lines table
198 f.mutex.Lock()
199 f.lines = lines
200 f.mutex.Unlock()
201 return true
202 }
203 204 // SetLinesForContent sets the line offsets for the given file content.
205 // It ignores position-altering //line comments.
206 func (f *File) SetLinesForContent(content []byte) {
207 var lines []int
208 line := 0
209 for offset, b := range content {
210 if line >= 0 {
211 lines = append(lines, line)
212 }
213 line = -1
214 if b == '\n' {
215 line = offset + 1
216 }
217 }
218 219 // set lines table
220 f.mutex.Lock()
221 f.lines = lines
222 f.mutex.Unlock()
223 }
224 225 // LineStart returns the [Pos] value of the start of the specified line.
226 // It ignores any alternative positions set using [File.AddLineColumnInfo].
227 // LineStart panics if the 1-based line number is invalid.
228 func (f *File) LineStart(line int) Pos {
229 if line < 1 {
230 panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
231 }
232 f.mutex.Lock()
233 defer f.mutex.Unlock()
234 if line > len(f.lines) {
235 panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
236 }
237 return Pos(f.base + f.lines[line-1])
238 }
239 240 // A lineInfo object describes alternative file, line, and column
241 // number information (such as provided via a //line directive)
242 // for a given file offset.
243 type lineInfo struct {
244 // fields are exported to make them accessible to gob
245 Offset int
246 Filename []byte
247 Line, Column int
248 }
249 250 // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument.
251 // It is here for backward-compatibility for code prior to Go 1.11.
252 func (f *File) AddLineInfo(offset int, filename []byte, line int) {
253 f.AddLineColumnInfo(offset, filename, line, 1)
254 }
255 256 // AddLineColumnInfo adds alternative file, line, and column number
257 // information for a given file offset. The offset must be larger
258 // than the offset for the previously added alternative line info
259 // and smaller than the file size; otherwise the information is
260 // ignored.
261 //
262 // AddLineColumnInfo is typically used to register alternative position
263 // information for line directives such as //line filename:line:column.
264 func (f *File) AddLineColumnInfo(offset int, filename []byte, line, column int) {
265 f.mutex.Lock()
266 if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size {
267 f.infos = append(f.infos, lineInfo{offset, filename, line, column})
268 }
269 f.mutex.Unlock()
270 }
271 272 // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size.
273 func (f *File) fixOffset(offset int) int {
274 switch {
275 case offset < 0:
276 if !debug {
277 return 0
278 }
279 case offset > f.size:
280 if !debug {
281 return f.size
282 }
283 default:
284 return offset
285 }
286 287 // only generate this code if needed
288 if debug {
289 panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])",
290 0 /* for symmetry */, offset, f.size,
291 f.base+offset, f.base, f.base+f.size))
292 }
293 return 0
294 }
295 296 // Pos returns the Pos value for the given file offset.
297 //
298 // If offset is negative, the result is the file's start
299 // position; if the offset is too large, the result is
300 // the file's end position (see also go.dev/issue/57490).
301 //
302 // The following invariant, though not true for Pos values
303 // in general, holds for the result p:
304 // f.Pos(f.Offset(p)) == p.
305 func (f *File) Pos(offset int) Pos {
306 return Pos(f.base + f.fixOffset(offset))
307 }
308 309 // Offset returns the offset for the given file position p.
310 //
311 // If p is before the file's start position (or if p is NoPos),
312 // the result is 0; if p is past the file's end position,
313 // the result is the file size (see also go.dev/issue/57490).
314 //
315 // The following invariant, though not true for offset values
316 // in general, holds for the result offset:
317 // f.Offset(f.Pos(offset)) == offset
318 func (f *File) Offset(p Pos) int {
319 return f.fixOffset(int(p) - f.base)
320 }
321 322 // Line returns the line number for the given file position p;
323 // p must be a [Pos] value in that file or [NoPos].
324 func (f *File) Line(p Pos) int {
325 return f.Position(p).Line
326 }
327 328 func searchLineInfos(a []lineInfo, x int) int {
329 i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int {
330 return cmp.Compare(a.Offset, x)
331 })
332 if !found {
333 // We want the lineInfo containing x, but if we didn't
334 // find x then i is the next one.
335 i--
336 }
337 return i
338 }
339 340 // unpack returns the filename and line and column number for a file offset.
341 // If adjusted is set, unpack will return the filename and line information
342 // possibly adjusted by //line comments; otherwise those comments are ignored.
343 func (f *File) unpack(offset int, adjusted bool) (filename []byte, line, column int) {
344 f.mutex.Lock()
345 filename = f.name
346 if i := searchInts(f.lines, offset); i >= 0 {
347 line, column = i+1, offset-f.lines[i]+1
348 }
349 if adjusted && len(f.infos) > 0 {
350 // few files have extra line infos
351 if i := searchLineInfos(f.infos, offset); i >= 0 {
352 alt := &f.infos[i]
353 filename = alt.Filename
354 if i := searchInts(f.lines, alt.Offset); i >= 0 {
355 // i+1 is the line at which the alternative position was recorded
356 d := line - (i + 1) // line distance from alternative position base
357 line = alt.Line + d
358 if alt.Column == 0 {
359 // alternative column is unknown => relative column is unknown
360 // (the current specification for line directives requires
361 // this to apply until the next PosBase/line directive,
362 // not just until the new newline)
363 column = 0
364 } else if d == 0 {
365 // the alternative position base is on the current line
366 // => column is relative to alternative column
367 column = alt.Column + (offset - alt.Offset)
368 }
369 }
370 }
371 }
372 // TODO(mvdan): move Unlock back under Lock with a defer statement once
373 // https://go.dev/issue/38471 is fixed to remove the performance penalty.
374 f.mutex.Unlock()
375 return
376 }
377 378 func (f *File) position(p Pos, adjusted bool) (pos Position) {
379 offset := f.fixOffset(int(p) - f.base)
380 pos.Offset = offset
381 pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
382 return
383 }
384 385 // PositionFor returns the Position value for the given file position p.
386 // If p is out of bounds, it is adjusted to match the File.Offset behavior.
387 // If adjusted is set, the position may be adjusted by position-altering
388 // //line comments; otherwise those comments are ignored.
389 // p must be a Pos value in f or NoPos.
390 func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
391 if p != NoPos {
392 pos = f.position(p, adjusted)
393 }
394 return
395 }
396 397 // Position returns the Position value for the given file position p.
398 // If p is out of bounds, it is adjusted to match the File.Offset behavior.
399 // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true).
400 func (f *File) Position(p Pos) (pos Position) {
401 return f.PositionFor(p, true)
402 }
403 404 // -----------------------------------------------------------------------------
405 // FileSet
406 407 // A FileSet represents a set of source files.
408 // Methods of file sets are synchronized; multiple goroutines
409 // may invoke them concurrently.
410 //
411 // The byte offsets for each file in a file set are mapped into
412 // distinct (integer) intervals, one interval [base, base+size]
413 // per file. [FileSet.Base] represents the first byte in the file, and size
414 // is the corresponding file size. A [Pos] value is a value in such
415 // an interval. By determining the interval a [Pos] value belongs
416 // to, the file, its file base, and thus the byte offset (position)
417 // the [Pos] value is representing can be computed.
418 //
419 // When adding a new file, a file base must be provided. That can
420 // be any integer value that is past the end of any interval of any
421 // file already in the file set. For convenience, [FileSet.Base] provides
422 // such a value, which is simply the end of the Pos interval of the most
423 // recently added file, plus one. Unless there is a need to extend an
424 // interval later, using the [FileSet.Base] should be used as argument
425 // for [FileSet.AddFile].
426 //
427 // A [File] may be removed from a FileSet when it is no longer needed.
428 // This may reduce memory usage in a long-running application.
429 type FileSet struct {
430 mutex sync.RWMutex // protects the file set
431 base int // base offset for the next file
432 tree tree // tree of files in ascending base order
433 last atomic.Pointer[File] // cache of last file looked up
434 }
435 436 // NewFileSet creates a new file set.
437 func NewFileSet() *FileSet {
438 return &FileSet{
439 base: 1, // 0 == NoPos
440 }
441 }
442 443 // Base returns the minimum base offset that must be provided to
444 // [FileSet.AddFile] when adding the next file.
445 func (s *FileSet) Base() int {
446 s.mutex.RLock()
447 b := s.base
448 s.mutex.RUnlock()
449 return b
450 }
451 452 // AddFile adds a new file with a given filename, base offset, and file size
453 // to the file set s and returns the file. Multiple files may have the same
454 // name. The base offset must not be smaller than the [FileSet.Base], and
455 // size must not be negative. As a special case, if a negative base is provided,
456 // the current value of the [FileSet.Base] is used instead.
457 //
458 // Adding the file will set the file set's [FileSet.Base] value to base + size + 1
459 // as the minimum base value for the next file. The following relationship
460 // exists between a [Pos] value p for a given file offset offs:
461 //
462 // int(p) = base + offs
463 //
464 // with offs in the range [0, size] and thus p in the range [base, base+size].
465 // For convenience, [File.Pos] may be used to create file-specific position
466 // values from a file offset.
467 func (s *FileSet) AddFile(filename []byte, base, size int) *File {
468 // Allocate f outside the critical section.
469 f := &File{name: filename, size: size, lines: []int{0}}
470 471 s.mutex.Lock()
472 defer s.mutex.Unlock()
473 if base < 0 {
474 base = s.base
475 }
476 if base < s.base {
477 panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base))
478 }
479 f.base = base
480 if size < 0 {
481 panic(fmt.Sprintf("invalid size %d (should be >= 0)", size))
482 }
483 // base >= s.base && size >= 0
484 base += size + 1 // +1 because EOF also has a position
485 if base < 0 {
486 panic("token.Pos offset overflow (> 2G of source code in file set)")
487 }
488 // add the file to the file set
489 s.base = base
490 s.tree.add(f)
491 s.last.Store(f)
492 return f
493 }
494 495 // AddExistingFiles adds the specified files to the
496 // FileSet if they are not already present.
497 // The caller must ensure that no pair of Files that
498 // would appear in the resulting FileSet overlap.
499 func (s *FileSet) AddExistingFiles(files ...*File) {
500 // This function cannot be implemented as:
501 //
502 // for _, file := range files {
503 // if prev := fset.File(token.Pos(file.Base())); prev != nil {
504 // if prev != file {
505 // panic("FileSet contains a different file at the same base")
506 // }
507 // continue
508 // }
509 // file2 := fset.AddFile(file.Name(), file.Base(), file.Size())
510 // file2.SetLines(file.Lines())
511 // }
512 //
513 // because all calls to AddFile must be in increasing order.
514 // AddExistingFilesFiles lets us augment an existing FileSet
515 // sequentially, so long as all sets of files have disjoint ranges.
516 // This approach also does not preserve line directives.
517 518 s.mutex.Lock()
519 defer s.mutex.Unlock()
520 521 for _, f := range files {
522 s.tree.add(f)
523 s.base = max(s.base, f.Base()+f.Size()+1)
524 }
525 }
526 527 // RemoveFile removes a file from the [FileSet] so that subsequent
528 // queries for its [Pos] interval yield a negative result.
529 // This reduces the memory usage of a long-lived [FileSet] that
530 // encounters an unbounded stream of files.
531 //
532 // Removing a file that does not belong to the set has no effect.
533 func (s *FileSet) RemoveFile(file *File) {
534 s.last.CompareAndSwap(file, nil) // clear last file cache
535 536 s.mutex.Lock()
537 defer s.mutex.Unlock()
538 539 pn, _ := s.tree.locate(file.key())
540 if *pn != nil && (*pn).file == file {
541 s.tree.delete(pn)
542 }
543 }
544 545 // Iterate calls yield for the files in the file set in ascending Base
546 // order until yield returns false.
547 func (s *FileSet) Iterate(yield func(*File) bool) {
548 s.mutex.RLock()
549 defer s.mutex.RUnlock()
550 551 // Unlock around user code.
552 // The iterator is robust to modification by yield.
553 // Avoid range here, so we can use defer.
554 s.tree.all()(func(f *File) bool {
555 s.mutex.RUnlock()
556 defer s.mutex.RLock()
557 return yield(f)
558 })
559 }
560 561 func (s *FileSet) file(p Pos) *File {
562 // common case: p is in last file.
563 if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size {
564 return f
565 }
566 567 s.mutex.RLock()
568 defer s.mutex.RUnlock()
569 570 pn, _ := s.tree.locate(key{int(p), int(p)})
571 if n := *pn; n != nil {
572 // Update cache of last file. A race is ok,
573 // but an exclusive lock causes heavy contention.
574 s.last.Store(n.file)
575 return n.file
576 }
577 return nil
578 }
579 580 // File returns the file that contains the position p.
581 // If no such file is found (for instance for p == [NoPos]),
582 // the result is nil.
583 func (s *FileSet) File(p Pos) (f *File) {
584 if p != NoPos {
585 f = s.file(p)
586 }
587 return
588 }
589 590 // PositionFor converts a [Pos] p in the fileset into a [Position] value.
591 // If adjusted is set, the position may be adjusted by position-altering
592 // //line comments; otherwise those comments are ignored.
593 // p must be a [Pos] value in s or [NoPos].
594 func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) {
595 if p != NoPos {
596 if f := s.file(p); f != nil {
597 return f.position(p, adjusted)
598 }
599 }
600 return
601 }
602 603 // Position converts a [Pos] p in the fileset into a Position value.
604 // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true).
605 func (s *FileSet) Position(p Pos) (pos Position) {
606 return s.PositionFor(p, true)
607 }
608 609 // -----------------------------------------------------------------------------
610 // Helper functions
611 612 func searchInts(a []int, x int) int {
613 // This function body is a manually inlined version of:
614 //
615 // return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1
616 //
617 // With better compiler optimizations, this may not be needed in the
618 // future, but at the moment this change improves the go/printer
619 // benchmark performance by ~30%. This has a direct impact on the
620 // speed of gofmt and thus seems worthwhile (2011-04-29).
621 // TODO(gri): Remove this when compilers have caught up.
622 i, j := 0, len(a)
623 for i < j {
624 h := int(uint(i+j) >> 1) // avoid overflow when computing h
625 // i ≤ h < j
626 if a[h] <= x {
627 i = h + 1
628 } else {
629 j = h
630 }
631 }
632 return i - 1
633 }
634