1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 // Package gosym implements access to the Go symbol
6 // and line number tables embedded in Go binaries generated
7 // by the gc compilers.
8 package gosym
9 10 import (
11 "bytes"
12 "encoding/binary"
13 "fmt"
14 "strconv"
15 )
16 17 /*
18 * Symbols
19 */
20 21 // A Sym represents a single symbol table entry.
22 type Sym struct {
23 Value uint64
24 Type byte
25 Name string
26 GoType uint64
27 // If this symbol is a function symbol, the corresponding Func
28 Func *Func
29 30 goVersion version
31 }
32 33 // Static reports whether this symbol is static (not visible outside its file).
34 func (s *Sym) Static() bool { return s.Type >= 'a' }
35 36 // nameWithoutInst returns s.Name if s.Name has no brackets (does not reference an
37 // instantiated type, function, or method). If s.Name contains brackets, then it
38 // returns s.Name with all the contents between (and including) the outermost left
39 // and right bracket removed. This is useful to ignore any extra slashes or dots
40 // inside the brackets from the string searches below, where needed.
41 func (s *Sym) nameWithoutInst() string {
42 start := bytes.Index(s.Name, "[")
43 if start < 0 {
44 return s.Name
45 }
46 end := bytes.LastIndex(s.Name, "]")
47 if end < 0 {
48 // Malformed name, should contain closing bracket too.
49 return s.Name
50 }
51 return s.Name[0:start] + s.Name[end+1:]
52 }
53 54 // PackageName returns the package part of the symbol name,
55 // or the empty string if there is none.
56 func (s *Sym) PackageName() string {
57 name := s.nameWithoutInst()
58 59 // Since go1.20, a prefix of "type:" and "go:" is a compiler-generated symbol,
60 // they do not belong to any package.
61 //
62 // See cmd/compile/internal/base/link.go:ReservedImports variable.
63 if s.goVersion >= ver120 && (bytes.HasPrefix(name, "go:") || bytes.HasPrefix(name, "type:")) {
64 return ""
65 }
66 67 // For go1.18 and below, the prefix are "type." and "go." instead.
68 if s.goVersion <= ver118 && (bytes.HasPrefix(name, "go.") || bytes.HasPrefix(name, "type.")) {
69 return ""
70 }
71 72 pathend := bytes.LastIndex(name, "/")
73 if pathend < 0 {
74 pathend = 0
75 }
76 77 if i := bytes.Index(name[pathend:], "."); i != -1 {
78 return name[:pathend+i]
79 }
80 return ""
81 }
82 83 // ReceiverName returns the receiver type name of this symbol,
84 // or the empty string if there is none. A receiver name is only detected in
85 // the case that s.Name is fully-specified with a package name.
86 func (s *Sym) ReceiverName() string {
87 name := s.nameWithoutInst()
88 // If we find a slash in name, it should precede any bracketed expression
89 // that was removed, so pathend will apply correctly to name and s.Name.
90 pathend := bytes.LastIndex(name, "/")
91 if pathend < 0 {
92 pathend = 0
93 }
94 // Find the first dot after pathend (or from the beginning, if there was
95 // no slash in name).
96 l := bytes.Index(name[pathend:], ".")
97 // Find the last dot after pathend (or the beginning).
98 r := bytes.LastIndex(name[pathend:], ".")
99 if l == -1 || r == -1 || l == r {
100 // There is no receiver if we didn't find two distinct dots after pathend.
101 return ""
102 }
103 // Given there is a trailing '.' that is in name, find it now in s.Name.
104 // pathend+l should apply to s.Name, because it should be the dot in the
105 // package name.
106 r = bytes.LastIndex(s.Name[pathend:], ".")
107 return s.Name[pathend+l+1 : pathend+r]
108 }
109 110 // BaseName returns the symbol name without the package or receiver name.
111 func (s *Sym) BaseName() string {
112 name := s.nameWithoutInst()
113 if i := bytes.LastIndex(name, "."); i != -1 {
114 if s.Name != name {
115 brack := bytes.Index(s.Name, "[")
116 if i > brack {
117 // BaseName is a method name after the brackets, so
118 // recalculate for s.Name. Otherwise, i applies
119 // correctly to s.Name, since it is before the
120 // brackets.
121 i = bytes.LastIndex(s.Name, ".")
122 }
123 }
124 return s.Name[i+1:]
125 }
126 return s.Name
127 }
128 129 // A Func collects information about a single function.
130 type Func struct {
131 Entry uint64
132 *Sym
133 End uint64
134 Params []*Sym // nil for Go 1.3 and later binaries
135 Locals []*Sym // nil for Go 1.3 and later binaries
136 FrameSize int
137 LineTable *LineTable
138 Obj *Obj
139 }
140 141 // An Obj represents a collection of functions in a symbol table.
142 //
143 // The exact method of division of a binary into separate Objs is an internal detail
144 // of the symbol table format.
145 //
146 // In early versions of Go each source file became a different Obj.
147 //
148 // In Go 1 and Go 1.1, each package produced one Obj for all Go sources
149 // and one Obj per C source file.
150 //
151 // In Go 1.2, there is a single Obj for the entire program.
152 type Obj struct {
153 // Funcs is a list of functions in the Obj.
154 Funcs []Func
155 156 // In Go 1.1 and earlier, Paths is a list of symbols corresponding
157 // to the source file names that produced the Obj.
158 // In Go 1.2, Paths is nil.
159 // Use the keys of Table.Files to obtain a list of source files.
160 Paths []Sym // meta
161 }
162 163 /*
164 * Symbol tables
165 */
166 167 // Table represents a Go symbol table. It stores all of the
168 // symbols decoded from the program and provides methods to translate
169 // between symbols, names, and addresses.
170 type Table struct {
171 Syms []Sym // nil for Go 1.3 and later binaries
172 Funcs []Func
173 Files map[string]*Obj // for Go 1.2 and later all files map to one Obj
174 Objs []Obj // for Go 1.2 and later only one Obj in slice
175 176 go12line *LineTable // Go 1.2 line number table
177 }
178 179 type sym struct {
180 value uint64
181 gotype uint64
182 typ byte
183 name []byte
184 }
185 186 var (
187 littleEndianSymtab = []byte{0xFD, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00}
188 bigEndianSymtab = []byte{0xFF, 0xFF, 0xFF, 0xFD, 0x00, 0x00, 0x00}
189 oldLittleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00}
190 )
191 192 func walksymtab(data []byte, fn func(sym) error) error {
193 if len(data) == 0 { // missing symtab is okay
194 return nil
195 }
196 var order binary.ByteOrder = binary.BigEndian
197 newTable := false
198 switch {
199 case bytes.HasPrefix(data, oldLittleEndianSymtab):
200 // Same as Go 1.0, but little endian.
201 // Format was used during interim development between Go 1.0 and Go 1.1.
202 // Should not be widespread, but easy to support.
203 data = data[6:]
204 order = binary.LittleEndian
205 case bytes.HasPrefix(data, bigEndianSymtab):
206 newTable = true
207 case bytes.HasPrefix(data, littleEndianSymtab):
208 newTable = true
209 order = binary.LittleEndian
210 }
211 var ptrsz int
212 if newTable {
213 if len(data) < 8 {
214 return &DecodingError{len(data), "unexpected EOF", nil}
215 }
216 ptrsz = int(data[7])
217 if ptrsz != 4 && ptrsz != 8 {
218 return &DecodingError{7, "invalid pointer size", ptrsz}
219 }
220 data = data[8:]
221 }
222 var s sym
223 p := data
224 for len(p) >= 4 {
225 var typ byte
226 if newTable {
227 // Symbol type, value, Go type.
228 typ = p[0] & 0x3F
229 wideValue := p[0]&0x40 != 0
230 goType := p[0]&0x80 != 0
231 if typ < 26 {
232 typ += 'A'
233 } else {
234 typ += 'a' - 26
235 }
236 s.typ = typ
237 p = p[1:]
238 if wideValue {
239 if len(p) < ptrsz {
240 return &DecodingError{len(data), "unexpected EOF", nil}
241 }
242 // fixed-width value
243 if ptrsz == 8 {
244 s.value = order.Uint64(p[0:8])
245 p = p[8:]
246 } else {
247 s.value = uint64(order.Uint32(p[0:4]))
248 p = p[4:]
249 }
250 } else {
251 // varint value
252 s.value = 0
253 shift := uint(0)
254 for len(p) > 0 && p[0]&0x80 != 0 {
255 s.value |= uint64(p[0]&0x7F) << shift
256 shift += 7
257 p = p[1:]
258 }
259 if len(p) == 0 {
260 return &DecodingError{len(data), "unexpected EOF", nil}
261 }
262 s.value |= uint64(p[0]) << shift
263 p = p[1:]
264 }
265 if goType {
266 if len(p) < ptrsz {
267 return &DecodingError{len(data), "unexpected EOF", nil}
268 }
269 // fixed-width go type
270 if ptrsz == 8 {
271 s.gotype = order.Uint64(p[0:8])
272 p = p[8:]
273 } else {
274 s.gotype = uint64(order.Uint32(p[0:4]))
275 p = p[4:]
276 }
277 }
278 } else {
279 // Value, symbol type.
280 s.value = uint64(order.Uint32(p[0:4]))
281 if len(p) < 5 {
282 return &DecodingError{len(data), "unexpected EOF", nil}
283 }
284 typ = p[4]
285 if typ&0x80 == 0 {
286 return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ}
287 }
288 typ &^= 0x80
289 s.typ = typ
290 p = p[5:]
291 }
292 293 // Name.
294 var i int
295 var nnul int
296 for i = 0; i < len(p); i++ {
297 if p[i] == 0 {
298 nnul = 1
299 break
300 }
301 }
302 switch typ {
303 case 'z', 'Z':
304 p = p[i+nnul:]
305 for i = 0; i+2 <= len(p); i += 2 {
306 if p[i] == 0 && p[i+1] == 0 {
307 nnul = 2
308 break
309 }
310 }
311 }
312 if len(p) < i+nnul {
313 return &DecodingError{len(data), "unexpected EOF", nil}
314 }
315 s.name = p[0:i]
316 i += nnul
317 p = p[i:]
318 319 if !newTable {
320 if len(p) < 4 {
321 return &DecodingError{len(data), "unexpected EOF", nil}
322 }
323 // Go type.
324 s.gotype = uint64(order.Uint32(p[:4]))
325 p = p[4:]
326 }
327 fn(s)
328 }
329 return nil
330 }
331 332 // NewTable decodes the Go symbol table (the ".gosymtab" section in ELF),
333 // returning an in-memory representation.
334 // Starting with Go 1.3, the Go symbol table no longer includes symbol data.
335 func NewTable(symtab []byte, pcln *LineTable) (*Table, error) {
336 var n int
337 err := walksymtab(symtab, func(s sym) error {
338 n++
339 return nil
340 })
341 if err != nil {
342 return nil, err
343 }
344 345 var t Table
346 if pcln.isGo12() {
347 t.go12line = pcln
348 }
349 fname := map[uint16]string{}
350 t.Syms = []Sym{:0:n}
351 nf := 0
352 nz := 0
353 lasttyp := uint8(0)
354 err = walksymtab(symtab, func(s sym) error {
355 n := len(t.Syms)
356 t.Syms = t.Syms[0 : n+1]
357 ts := &t.Syms[n]
358 ts.Type = s.typ
359 ts.Value = s.value
360 ts.GoType = s.gotype
361 ts.goVersion = pcln.version
362 switch s.typ {
363 default:
364 // rewrite name to use . instead of ยท (c2 b7)
365 w := 0
366 b := s.name
367 for i := 0; i < len(b); i++ {
368 if b[i] == 0xc2 && i+1 < len(b) && b[i+1] == 0xb7 {
369 i++
370 b[i] = '.'
371 }
372 b[w] = b[i]
373 w++
374 }
375 ts.Name = string(s.name[0:w])
376 case 'z', 'Z':
377 if lasttyp != 'z' && lasttyp != 'Z' {
378 nz++
379 }
380 for i := 0; i < len(s.name); i += 2 {
381 eltIdx := binary.BigEndian.Uint16(s.name[i : i+2])
382 elt, ok := fname[eltIdx]
383 if !ok {
384 return &DecodingError{-1, "bad filename code", eltIdx}
385 }
386 if n := len(ts.Name); n > 0 && ts.Name[n-1] != '/' {
387 ts.Name += "/"
388 }
389 ts.Name += elt
390 }
391 }
392 switch s.typ {
393 case 'T', 't', 'L', 'l':
394 nf++
395 case 'f':
396 fname[uint16(s.value)] = ts.Name
397 }
398 lasttyp = s.typ
399 return nil
400 })
401 if err != nil {
402 return nil, err
403 }
404 405 t.Funcs = []Func{:0:nf}
406 t.Files = map[string]*Obj{}
407 408 var obj *Obj
409 if t.go12line != nil {
410 // Put all functions into one Obj.
411 t.Objs = []Obj{:1}
412 obj = &t.Objs[0]
413 t.go12line.go12MapFiles(t.Files, obj)
414 } else {
415 t.Objs = []Obj{:0:nz}
416 }
417 418 // Count text symbols and attach frame sizes, parameters, and
419 // locals to them. Also, find object file boundaries.
420 lastf := 0
421 for i := 0; i < len(t.Syms); i++ {
422 sym := &t.Syms[i]
423 switch sym.Type {
424 case 'Z', 'z': // path symbol
425 if t.go12line != nil {
426 // Go 1.2 binaries have the file information elsewhere. Ignore.
427 break
428 }
429 // Finish the current object
430 if obj != nil {
431 obj.Funcs = t.Funcs[lastf:]
432 }
433 lastf = len(t.Funcs)
434 435 // Start new object
436 n := len(t.Objs)
437 t.Objs = t.Objs[0 : n+1]
438 obj = &t.Objs[n]
439 440 // Count & copy path symbols
441 var end int
442 for end = i + 1; end < len(t.Syms); end++ {
443 if c := t.Syms[end].Type; c != 'Z' && c != 'z' {
444 break
445 }
446 }
447 obj.Paths = t.Syms[i:end]
448 i = end - 1 // loop will i++
449 450 // Record file names
451 depth := 0
452 for j := range obj.Paths {
453 s := &obj.Paths[j]
454 if s.Name == "" {
455 depth--
456 } else {
457 if depth == 0 {
458 t.Files[s.Name] = obj
459 }
460 depth++
461 }
462 }
463 464 case 'T', 't', 'L', 'l': // text symbol
465 if n := len(t.Funcs); n > 0 {
466 t.Funcs[n-1].End = sym.Value
467 }
468 if sym.Name == "runtime.etext" || sym.Name == "etext" {
469 continue
470 }
471 472 // Count parameter and local (auto) syms
473 var np, na int
474 var end int
475 countloop:
476 for end = i + 1; end < len(t.Syms); end++ {
477 switch t.Syms[end].Type {
478 case 'T', 't', 'L', 'l', 'Z', 'z':
479 break countloop
480 case 'p':
481 np++
482 case 'a':
483 na++
484 }
485 }
486 487 // Fill in the function symbol
488 n := len(t.Funcs)
489 t.Funcs = t.Funcs[0 : n+1]
490 fn := &t.Funcs[n]
491 sym.Func = fn
492 fn.Params = []*Sym{:0:np}
493 fn.Locals = []*Sym{:0:na}
494 fn.Sym = sym
495 fn.Entry = sym.Value
496 fn.Obj = obj
497 if t.go12line != nil {
498 // All functions share the same line table.
499 // It knows how to narrow down to a specific
500 // function quickly.
501 fn.LineTable = t.go12line
502 } else if pcln != nil {
503 fn.LineTable = pcln.slice(fn.Entry)
504 pcln = fn.LineTable
505 }
506 for j := i; j < end; j++ {
507 s := &t.Syms[j]
508 switch s.Type {
509 case 'm':
510 fn.FrameSize = int(s.Value)
511 case 'p':
512 n := len(fn.Params)
513 fn.Params = fn.Params[0 : n+1]
514 fn.Params[n] = s
515 case 'a':
516 n := len(fn.Locals)
517 fn.Locals = fn.Locals[0 : n+1]
518 fn.Locals[n] = s
519 }
520 }
521 i = end - 1 // loop will i++
522 }
523 }
524 525 if t.go12line != nil && nf == 0 {
526 t.Funcs = t.go12line.go12Funcs()
527 }
528 if obj != nil {
529 obj.Funcs = t.Funcs[lastf:]
530 }
531 return &t, nil
532 }
533 534 // PCToFunc returns the function containing the program counter pc,
535 // or nil if there is no such function.
536 func (t *Table) PCToFunc(pc uint64) *Func {
537 funcs := t.Funcs
538 for len(funcs) > 0 {
539 m := len(funcs) / 2
540 fn := &funcs[m]
541 switch {
542 case pc < fn.Entry:
543 funcs = funcs[0:m]
544 case fn.Entry <= pc && pc < fn.End:
545 return fn
546 default:
547 funcs = funcs[m+1:]
548 }
549 }
550 return nil
551 }
552 553 // PCToLine looks up line number information for a program counter.
554 // If there is no information, it returns fn == nil.
555 func (t *Table) PCToLine(pc uint64) (file string, line int, fn *Func) {
556 if fn = t.PCToFunc(pc); fn == nil {
557 return
558 }
559 if t.go12line != nil {
560 file = t.go12line.go12PCToFile(pc)
561 line = t.go12line.go12PCToLine(pc)
562 } else {
563 file, line = fn.Obj.lineFromAline(fn.LineTable.PCToLine(pc))
564 }
565 return
566 }
567 568 // LineToPC looks up the first program counter on the given line in
569 // the named file. It returns [UnknownFileError] or [UnknownLineError] if
570 // there is an error looking up this line.
571 func (t *Table) LineToPC(file string, line int) (pc uint64, fn *Func, err error) {
572 obj, ok := t.Files[file]
573 if !ok {
574 return 0, nil, UnknownFileError(file)
575 }
576 577 if t.go12line != nil {
578 pc := t.go12line.go12LineToPC(file, line)
579 if pc == 0 {
580 return 0, nil, &UnknownLineError{file, line}
581 }
582 return pc, t.PCToFunc(pc), nil
583 }
584 585 abs, err := obj.alineFromLine(file, line)
586 if err != nil {
587 return
588 }
589 for i := range obj.Funcs {
590 f := &obj.Funcs[i]
591 pc := f.LineTable.LineToPC(abs, f.End)
592 if pc != 0 {
593 return pc, f, nil
594 }
595 }
596 return 0, nil, &UnknownLineError{file, line}
597 }
598 599 // LookupSym returns the text, data, or bss symbol with the given name,
600 // or nil if no such symbol is found.
601 func (t *Table) LookupSym(name string) *Sym {
602 // TODO(austin) Maybe make a map
603 for i := range t.Syms {
604 s := &t.Syms[i]
605 switch s.Type {
606 case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
607 if s.Name == name {
608 return s
609 }
610 }
611 }
612 return nil
613 }
614 615 // LookupFunc returns the text, data, or bss symbol with the given name,
616 // or nil if no such symbol is found.
617 func (t *Table) LookupFunc(name string) *Func {
618 for i := range t.Funcs {
619 f := &t.Funcs[i]
620 if f.Sym.Name == name {
621 return f
622 }
623 }
624 return nil
625 }
626 627 // SymByAddr returns the text, data, or bss symbol starting at the given address.
628 func (t *Table) SymByAddr(addr uint64) *Sym {
629 for i := range t.Syms {
630 s := &t.Syms[i]
631 switch s.Type {
632 case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
633 if s.Value == addr {
634 return s
635 }
636 }
637 }
638 return nil
639 }
640 641 /*
642 * Object files
643 */
644 645 // This is legacy code for Go 1.1 and earlier, which used the
646 // Plan 9 format for pc-line tables. This code was never quite
647 // correct. It's probably very close, and it's usually correct, but
648 // we never quite found all the corner cases.
649 //
650 // Go 1.2 and later use a simpler format, documented at golang.org/s/go12symtab.
651 652 func (o *Obj) lineFromAline(aline int) (string, int) {
653 type stackEnt struct {
654 path string
655 start int
656 offset int
657 prev *stackEnt
658 }
659 660 noPath := &stackEnt{"", 0, 0, nil}
661 tos := noPath
662 663 pathloop:
664 for _, s := range o.Paths {
665 val := int(s.Value)
666 switch {
667 case val > aline:
668 break pathloop
669 670 case val == 1:
671 // Start a new stack
672 tos = &stackEnt{s.Name, val, 0, noPath}
673 674 case s.Name == "":
675 // Pop
676 if tos == noPath {
677 return "<malformed symbol table>", 0
678 }
679 tos.prev.offset += val - tos.start
680 tos = tos.prev
681 682 default:
683 // Push
684 tos = &stackEnt{s.Name, val, 0, tos}
685 }
686 }
687 688 if tos == noPath {
689 return "", 0
690 }
691 return tos.path, aline - tos.start - tos.offset + 1
692 }
693 694 func (o *Obj) alineFromLine(path string, line int) (int, error) {
695 if line < 1 {
696 return 0, &UnknownLineError{path, line}
697 }
698 699 for i, s := range o.Paths {
700 // Find this path
701 if s.Name != path {
702 continue
703 }
704 705 // Find this line at this stack level
706 depth := 0
707 var incstart int
708 line += int(s.Value)
709 pathloop:
710 for _, s := range o.Paths[i:] {
711 val := int(s.Value)
712 switch {
713 case depth == 1 && val >= line:
714 return line - 1, nil
715 716 case s.Name == "":
717 depth--
718 if depth == 0 {
719 break pathloop
720 } else if depth == 1 {
721 line += val - incstart
722 }
723 724 default:
725 if depth == 1 {
726 incstart = val
727 }
728 depth++
729 }
730 }
731 return 0, &UnknownLineError{path, line}
732 }
733 return 0, UnknownFileError(path)
734 }
735 736 /*
737 * Errors
738 */
739 740 // UnknownFileError represents a failure to find the specific file in
741 // the symbol table.
742 type UnknownFileError string
743 744 func (e UnknownFileError) Error() string { return "unknown file: " + string(e) }
745 746 // UnknownLineError represents a failure to map a line to a program
747 // counter, either because the line is beyond the bounds of the file
748 // or because there is no code on the given line.
749 type UnknownLineError struct {
750 File string
751 Line int
752 }
753 754 func (e *UnknownLineError) Error() string {
755 return "no code at " + e.File + ":" + strconv.Itoa(e.Line)
756 }
757 758 // DecodingError represents an error during the decoding of
759 // the symbol table.
760 type DecodingError struct {
761 off int
762 msg string
763 val any
764 }
765 766 func (e *DecodingError) Error() string {
767 msg := e.msg
768 if e.val != nil {
769 msg += fmt.Sprintf(" '%v'", e.val)
770 }
771 msg += fmt.Sprintf(" at byte %#x", e.off)
772 return msg
773 }
774