1 package builder
2 3 import (
4 "bytes"
5 "debug/dwarf"
6 "debug/elf"
7 "debug/macho"
8 "debug/pe"
9 "encoding/binary"
10 "fmt"
11 "io"
12 "os"
13 "path/filepath"
14 "regexp"
15 "runtime"
16 "sort"
17 "strings"
18 19 "github.com/aykevl/go-wasm"
20 "moxie/goenv"
21 )
22 23 // Set to true to print extra debug logs.
24 const sizesDebug = false
25 26 // programSize contains size statistics per package of a compiled program.
27 type programSize struct {
28 Packages map[string]*packageSize
29 Code uint64
30 ROData uint64
31 Data uint64
32 BSS uint64
33 }
34 35 // sortedPackageNames returns the list of package names (ProgramSize.Packages)
36 // sorted alphabetically.
37 func (ps *programSize) sortedPackageNames() []string {
38 names := make([]string, 0, len(ps.Packages))
39 for name := range ps.Packages {
40 names = append(names, name)
41 }
42 sort.Strings(names)
43 return names
44 }
45 46 // Flash usage in regular microcontrollers.
47 func (ps *programSize) Flash() uint64 {
48 return ps.Code + ps.ROData + ps.Data
49 }
50 51 // Static RAM usage in regular microcontrollers.
52 func (ps *programSize) RAM() uint64 {
53 return ps.Data + ps.BSS
54 }
55 56 // Return the package size information for a given package path, creating it if
57 // it doesn't exist yet.
58 func (ps *programSize) getPackage(path string) *packageSize {
59 if field, ok := ps.Packages[path]; ok {
60 return field
61 }
62 field := &packageSize{
63 Program: ps,
64 Sub: map[string]*packageSize{},
65 }
66 ps.Packages[path] = field
67 return field
68 }
69 70 // packageSize contains the size of a package, calculated from the linked object
71 // file.
72 type packageSize struct {
73 Program *programSize
74 Code uint64
75 ROData uint64
76 Data uint64
77 BSS uint64
78 Sub map[string]*packageSize
79 }
80 81 // Flash usage in regular microcontrollers.
82 func (ps *packageSize) Flash() uint64 {
83 return ps.Code + ps.ROData + ps.Data
84 }
85 86 // Static RAM usage in regular microcontrollers.
87 func (ps *packageSize) RAM() uint64 {
88 return ps.Data + ps.BSS
89 }
90 91 // Flash usage in regular microcontrollers, as a percentage of the total flash
92 // usage of the program.
93 func (ps *packageSize) FlashPercent() float64 {
94 return float64(ps.Flash()) / float64(ps.Program.Flash()) * 100
95 }
96 97 // Add a single size data point to this package.
98 // This must only be called while calculating package size, not afterwards.
99 func (ps *packageSize) addSize(getField func(*packageSize, bool) *uint64, filename string, size uint64, isVariable bool) {
100 if size == 0 {
101 return
102 }
103 104 // Add size for the package.
105 *getField(ps, isVariable) += size
106 107 // Add size for file inside package.
108 sub, ok := ps.Sub[filename]
109 if !ok {
110 sub = &packageSize{Program: ps.Program}
111 ps.Sub[filename] = sub
112 }
113 *getField(sub, isVariable) += size
114 }
115 116 // A mapping of a single chunk of code or data to a file path.
117 type addressLine struct {
118 Address uint64
119 Length uint64 // length of this chunk
120 Align uint64 // (maximum) alignment of this line
121 File string // file path as stored in DWARF
122 IsVariable bool // true if this is a variable (or constant), false if it is code
123 }
124 125 // Sections defined in the input file. This struct defines them in a
126 // filetype-agnostic way but roughly follow the ELF types (.text, .data, .bss,
127 // etc).
128 type memorySection struct {
129 Type memoryType
130 Address uint64
131 Size uint64
132 Align uint64
133 }
134 135 type memoryType int
136 137 const (
138 memoryCode memoryType = iota + 1
139 memoryData
140 memoryROData
141 memoryBSS
142 memoryStack
143 )
144 145 func (t memoryType) String() string {
146 return [...]string{
147 0: "-",
148 memoryCode: "code",
149 memoryData: "data",
150 memoryROData: "rodata",
151 memoryBSS: "bss",
152 memoryStack: "stack",
153 }[t]
154 }
155 156 // Regular expressions to match particular symbol names. These are not stored as
157 // DWARF variables because they have no mapping to source code global variables.
158 var (
159 // Various globals that aren't a variable but nonetheless need to be stored
160 // somewhere:
161 // alloc: heap allocations during init interpretation
162 // pack: data created when storing a constant in an interface for example
163 // string: buffer behind strings
164 packageSymbolRegexp = regexp.MustCompile(`\$(alloc|pack|string)(\.[0-9]+)?$`)
165 )
166 167 // readProgramSizeFromDWARF reads the source location for each line of code and
168 // each variable in the program, as far as this is stored in the DWARF debug
169 // information.
170 func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset, codeAlignment uint64, skipTombstone bool) ([]addressLine, error) {
171 r := data.Reader()
172 var lines []*dwarf.LineFile
173 var addresses []addressLine
174 for {
175 e, err := r.Next()
176 if err != nil {
177 return nil, err
178 }
179 if e == nil {
180 break
181 }
182 switch e.Tag {
183 case dwarf.TagCompileUnit:
184 // Found a compile unit.
185 // We can read the .debug_line section using it, which contains a
186 // mapping for most instructions to their file/line/column - even
187 // for inlined functions!
188 lr, err := data.LineReader(e)
189 if err != nil {
190 return nil, err
191 }
192 lines = lr.Files()
193 var lineEntry = dwarf.LineEntry{
194 EndSequence: true,
195 }
196 197 // Line tables are organized as sequences of line entries until an
198 // end sequence. A single line table can contain multiple such
199 // sequences. The last line entry is an EndSequence to indicate the
200 // end.
201 for {
202 // Read the next .debug_line entry.
203 prevLineEntry := lineEntry
204 err := lr.Next(&lineEntry)
205 if err != nil {
206 if err == io.EOF {
207 break
208 }
209 return nil, err
210 }
211 212 if prevLineEntry.EndSequence && lineEntry.Address == 0 && skipTombstone {
213 // Tombstone value. This symbol has been removed, for
214 // example by the --gc-sections linker flag. It is still
215 // here in the debug information because the linker can't
216 // just remove this reference.
217 // Read until the next EndSequence so that this sequence is
218 // skipped.
219 // For more details, see (among others):
220 // https://reviews.llvm.org/D84825
221 // The value 0 can however really occur in object files,
222 // that typically start at address 0. So don't skip
223 // tombstone values in object files (like when parsing MachO
224 // files).
225 for {
226 err := lr.Next(&lineEntry)
227 if err != nil {
228 return nil, err
229 }
230 if lineEntry.EndSequence {
231 break
232 }
233 }
234 }
235 236 if !prevLineEntry.EndSequence {
237 // The chunk describes the code from prevLineEntry to
238 // lineEntry.
239 path := prevLineEntry.File.Name
240 if runtime.GOOS == "windows" {
241 // Work around a Clang bug on Windows:
242 // https://github.com/llvm/llvm-project/issues/117317
243 path = strings.ReplaceAll(path, "\\\\", "\\")
244 245 // wasi-libc likes to use forward slashes, but we
246 // canonicalize everything to use backwards slashes as
247 // is common on Windows.
248 path = strings.ReplaceAll(path, "/", "\\")
249 }
250 line := addressLine{
251 Address: prevLineEntry.Address + codeOffset,
252 Length: lineEntry.Address - prevLineEntry.Address,
253 Align: codeAlignment,
254 File: path,
255 }
256 if line.Length != 0 {
257 addresses = append(addresses, line)
258 }
259 }
260 }
261 case dwarf.TagVariable:
262 // Global variable (or constant). Most of these are not actually
263 // stored in the binary, because they have been optimized out. Only
264 // the ones with a location are still present.
265 r.SkipChildren()
266 267 file := e.AttrField(dwarf.AttrDeclFile)
268 location := e.AttrField(dwarf.AttrLocation)
269 globalType := e.AttrField(dwarf.AttrType)
270 if file == nil || location == nil || globalType == nil {
271 // Doesn't contain the requested information.
272 continue
273 }
274 275 // Try to parse the location. While this could in theory be a very
276 // complex expression, usually it's just a DW_OP_addr opcode
277 // followed by an address.
278 addr, err := readDWARFConstant(r.AddressSize(), location.Val.([]uint8))
279 if err != nil {
280 continue // ignore the error, we don't know what to do with it
281 }
282 283 // Parse the type of the global variable, which (importantly)
284 // contains the variable size. We're not interested in the type,
285 // only in the size.
286 typ, err := data.Type(globalType.Val.(dwarf.Offset))
287 if err != nil {
288 return nil, err
289 }
290 291 // Read alignment, if it's stored as part of the debug information.
292 var alignment uint64
293 if attr := e.AttrField(dwarf.AttrAlignment); attr != nil {
294 alignment = uint64(attr.Val.(int64))
295 }
296 297 addresses = append(addresses, addressLine{
298 Address: addr,
299 Length: uint64(typ.Size()),
300 Align: alignment,
301 File: lines[file.Val.(int64)].Name,
302 IsVariable: true,
303 })
304 default:
305 r.SkipChildren()
306 }
307 }
308 return addresses, nil
309 }
310 311 // Parse a DWARF constant. For addresses, this is usually a very simple
312 // expression.
313 func readDWARFConstant(addressSize int, bytecode []byte) (uint64, error) {
314 var addr uint64
315 for len(bytecode) != 0 {
316 op := bytecode[0]
317 bytecode = bytecode[1:]
318 switch op {
319 case 0x03: // DW_OP_addr
320 switch addressSize {
321 case 2:
322 addr = uint64(binary.LittleEndian.Uint16(bytecode))
323 case 4:
324 addr = uint64(binary.LittleEndian.Uint32(bytecode))
325 case 8:
326 addr = binary.LittleEndian.Uint64(bytecode)
327 default:
328 panic("unexpected address size")
329 }
330 bytecode = bytecode[addressSize:]
331 case 0x23: // DW_OP_plus_uconst
332 offset, n := readULEB128(bytecode)
333 addr += offset
334 bytecode = bytecode[n:]
335 default:
336 return 0, fmt.Errorf("unknown DWARF opcode: 0x%x", op)
337 }
338 }
339 return addr, nil
340 }
341 342 // Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer
343 func readULEB128(buf []byte) (result uint64, n int) {
344 var shift uint8
345 for {
346 b := buf[n]
347 n++
348 result |= uint64(b&0x7f) << shift
349 if b&0x80 == 0 {
350 break
351 }
352 shift += 7
353 }
354 return
355 }
356 357 // Read a MachO object file and return a line table.
358 // Also return an index from symbol name to start address in the line table.
359 func readMachOSymbolAddresses(path string) (map[string]int, []addressLine, error) {
360 // Some constants from mach-o/nlist.h
361 // See: https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/nlist.h.auto.html
362 const (
363 N_STAB = 0xe0
364 N_TYPE = 0x0e // bitmask for N_TYPE field
365 N_SECT = 0xe // one of the possible type in the N_TYPE field
366 )
367 368 // Read DWARF from the given object file.
369 file, err := macho.Open(path)
370 if err != nil {
371 return nil, nil, err
372 }
373 defer file.Close()
374 dwarf, err := file.DWARF()
375 if err != nil {
376 return nil, nil, err
377 }
378 lines, err := readProgramSizeFromDWARF(dwarf, 0, 0, false)
379 if err != nil {
380 return nil, nil, err
381 }
382 383 // Make a map from start addresses to indices in the line table (because the
384 // line table is a slice, not a map).
385 addressToLine := make(map[uint64]int, len(lines))
386 for i, line := range lines {
387 if _, ok := addressToLine[line.Address]; ok {
388 addressToLine[line.Address] = -1
389 continue
390 }
391 addressToLine[line.Address] = i
392 }
393 394 // Make a map that for each symbol gives the start index in the line table.
395 addresses := make(map[string]int, len(addressToLine))
396 for _, symbol := range file.Symtab.Syms {
397 if symbol.Type&N_STAB != 0 {
398 continue // STABS entry, ignore
399 }
400 if symbol.Type&0x0e != N_SECT {
401 continue // undefined symbol
402 }
403 if index, ok := addressToLine[symbol.Value]; ok && index >= 0 {
404 if _, ok := addresses[symbol.Name]; ok {
405 // There is a duplicate. Mark it as unavailable.
406 addresses[symbol.Name] = -1
407 continue
408 }
409 addresses[symbol.Name] = index
410 }
411 }
412 413 return addresses, lines, nil
414 }
415 416 // loadProgramSize calculate a program/data size breakdown of each package for a
417 // given ELF file.
418 // If the file doesn't contain DWARF debug information, the returned program
419 // size will still have valid summaries but won't have complete size information
420 // per package.
421 func loadProgramSize(path string, packagePathMap map[string]string) (*programSize, error) {
422 // Open the binary file.
423 f, err := os.Open(path)
424 if err != nil {
425 return nil, err
426 }
427 defer f.Close()
428 429 // This stores all chunks of addresses found in the binary.
430 var addresses []addressLine
431 432 // Load the binary file, which could be in a number of file formats.
433 var sections []memorySection
434 if file, err := elf.NewFile(f); err == nil {
435 var codeAlignment uint64
436 switch file.Machine {
437 case elf.EM_ARM:
438 codeAlignment = 4 // usually 2, but can be 4
439 }
440 // Read DWARF information. The error is intentionally ignored.
441 data, _ := file.DWARF()
442 if data != nil {
443 addresses, err = readProgramSizeFromDWARF(data, 0, codeAlignment, true)
444 if err != nil {
445 // However, _do_ report an error here. Something must have gone
446 // wrong while trying to parse DWARF data.
447 return nil, err
448 }
449 }
450 451 // Read the ELF symbols for some more chunks of location information.
452 // Some globals (such as strings) aren't stored in the DWARF debug
453 // information and therefore need to be obtained in a different way.
454 allSymbols, err := file.Symbols()
455 if err != nil {
456 return nil, err
457 }
458 for _, symbol := range allSymbols {
459 symType := elf.ST_TYPE(symbol.Info)
460 if symbol.Size == 0 {
461 continue
462 }
463 if symType != elf.STT_FUNC && symType != elf.STT_OBJECT && symType != elf.STT_NOTYPE {
464 continue
465 }
466 if symbol.Section >= elf.SHN_LORESERVE {
467 // Not a regular section, so skip it.
468 // One example is elf.SHN_ABS, which is used for symbols
469 // declared with an absolute value such as the memset function
470 // on the ESP32 which is defined in the mask ROM.
471 continue
472 }
473 section := file.Sections[symbol.Section]
474 if section.Flags&elf.SHF_ALLOC == 0 {
475 continue
476 }
477 if packageSymbolRegexp.MatchString(symbol.Name) || symbol.Name == "__isr_vector" {
478 addresses = append(addresses, addressLine{
479 Address: symbol.Value,
480 Length: symbol.Size,
481 File: symbol.Name,
482 IsVariable: true,
483 })
484 }
485 }
486 487 // Load allocated sections.
488 for _, section := range file.Sections {
489 if section.Flags&elf.SHF_ALLOC == 0 {
490 continue
491 }
492 if section.Type == elf.SHT_NOBITS {
493 if strings.HasPrefix(section.Name, ".stack") {
494 // Moxie emits stack sections on microcontroller using the
495 // ".stack" name.
496 // This is a bit ugly, but I don't think there is a way to
497 // mark the stack section in a linker script.
498 sections = append(sections, memorySection{
499 Address: section.Addr,
500 Size: section.Size,
501 Align: section.Addralign,
502 Type: memoryStack,
503 })
504 } else {
505 // Regular .bss section.
506 sections = append(sections, memorySection{
507 Address: section.Addr,
508 Size: section.Size,
509 Align: section.Addralign,
510 Type: memoryBSS,
511 })
512 }
513 } else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_EXECINSTR != 0 {
514 // .text
515 sections = append(sections, memorySection{
516 Address: section.Addr,
517 Size: section.Size,
518 Align: section.Addralign,
519 Type: memoryCode,
520 })
521 } else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_WRITE != 0 {
522 // .data
523 sections = append(sections, memorySection{
524 Address: section.Addr,
525 Size: section.Size,
526 Align: section.Addralign,
527 Type: memoryData,
528 })
529 } else if section.Type == elf.SHT_PROGBITS {
530 // .rodata
531 sections = append(sections, memorySection{
532 Address: section.Addr,
533 Size: section.Size,
534 Align: section.Addralign,
535 Type: memoryROData,
536 })
537 }
538 }
539 } else if file, err := macho.NewFile(f); err == nil {
540 // Read segments, for use while reading through sections.
541 segments := map[string]*macho.Segment{}
542 for _, load := range file.Loads {
543 switch load := load.(type) {
544 case *macho.Segment:
545 segments[load.Name] = load
546 }
547 }
548 549 // Read MachO sections.
550 for _, section := range file.Sections {
551 sectionType := section.Flags & 0xff
552 sectionFlags := section.Flags >> 8
553 segment := segments[section.Seg]
554 // For the constants used here, see:
555 // https://github.com/llvm/llvm-project/blob/release/14.x/llvm/include/llvm/BinaryFormat/MachO.h
556 if sectionFlags&0x800000 != 0 { // S_ATTR_PURE_INSTRUCTIONS
557 // Section containing only instructions.
558 sections = append(sections, memorySection{
559 Address: section.Addr,
560 Size: uint64(section.Size),
561 Align: uint64(section.Align),
562 Type: memoryCode,
563 })
564 } else if sectionType == 1 { // S_ZEROFILL
565 // Section filled with zeroes on demand.
566 sections = append(sections, memorySection{
567 Address: section.Addr,
568 Size: uint64(section.Size),
569 Align: uint64(section.Align),
570 Type: memoryBSS,
571 })
572 } else if segment.Maxprot&0b011 == 0b001 { // --r (read-only data)
573 // Protection doesn't allow writes, so mark this section read-only.
574 sections = append(sections, memorySection{
575 Address: section.Addr,
576 Size: uint64(section.Size),
577 Align: uint64(section.Align),
578 Type: memoryROData,
579 })
580 } else {
581 // The rest is assumed to be regular data.
582 sections = append(sections, memorySection{
583 Address: section.Addr,
584 Size: uint64(section.Size),
585 Align: uint64(section.Align),
586 Type: memoryData,
587 })
588 }
589 }
590 591 // Read DWARF information.
592 // The data isn't stored directly in the binary as in most executable
593 // formats. Instead, it is left in the object files that were used as a
594 // basis for linking. The executable does however contain STABS debug
595 // information that points to the source object file and is used by
596 // debuggers.
597 // For more information:
598 // http://wiki.dwarfstd.org/index.php?title=Apple%27s_%22Lazy%22_DWARF_Scheme
599 var objSymbolNames map[string]int
600 var objAddresses []addressLine
601 var previousSymbol macho.Symbol
602 for _, symbol := range file.Symtab.Syms {
603 // STABS constants, from mach-o/stab.h:
604 // https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/stab.h.auto.html
605 const (
606 N_GSYM = 0x20
607 N_FUN = 0x24
608 N_STSYM = 0x26
609 N_SO = 0x64
610 N_OSO = 0x66
611 )
612 if symbol.Type == N_OSO {
613 // Found an object file. Now try to parse it.
614 objSymbolNames, objAddresses, err = readMachOSymbolAddresses(symbol.Name)
615 if err != nil && sizesDebug {
616 // Errors are normally ignored. If there is an error, it's
617 // simply treated as that the DWARF is not available.
618 fmt.Fprintf(os.Stderr, "could not read DWARF from file %s: %s\n", symbol.Name, err)
619 }
620 } else if symbol.Type == N_FUN {
621 // Found a function.
622 // The way this is encoded is a bit weird. MachO symbols don't
623 // have a length. What I've found is that the length is encoded
624 // by first having a N_FUN symbol as usual, and then having a
625 // symbol with a zero-length name that has the value not set to
626 // the address of the symbol but to the length. So in order to
627 // get both the address and the length, we look for a symbol
628 // with a name followed by a symbol without a name.
629 if symbol.Name == "" && previousSymbol.Type == N_FUN && previousSymbol.Name != "" {
630 // Functions are encoded as many small chunks in the line
631 // table (one or a few instructions per source line). But
632 // the symbol length covers the whole symbols, over many
633 // lines and possibly including inlined functions. So we
634 // continue to iterate through the objAddresses slice until
635 // we've found all the source lines that are part of this
636 // symbol.
637 address := previousSymbol.Value
638 length := symbol.Value
639 if index, ok := objSymbolNames[previousSymbol.Name]; ok && index >= 0 {
640 for length > 0 {
641 line := objAddresses[index]
642 line.Address = address
643 if line.Length > length {
644 // Line extends beyond the end of te symbol?
645 // Weird, shouldn't happen.
646 break
647 }
648 addresses = append(addresses, line)
649 index++
650 length -= line.Length
651 address += line.Length
652 }
653 }
654 }
655 } else if symbol.Type == N_GSYM || symbol.Type == N_STSYM {
656 // Global variables.
657 if index, ok := objSymbolNames[symbol.Name]; ok {
658 address := objAddresses[index]
659 address.Address = symbol.Value
660 addresses = append(addresses, address)
661 }
662 }
663 previousSymbol = symbol
664 }
665 } else if file, err := pe.NewFile(f); err == nil {
666 // Read DWARF information. The error is intentionally ignored.
667 data, _ := file.DWARF()
668 if data != nil {
669 addresses, err = readProgramSizeFromDWARF(data, 0, 0, true)
670 if err != nil {
671 // However, _do_ report an error here. Something must have gone
672 // wrong while trying to parse DWARF data.
673 return nil, err
674 }
675 }
676 677 // Read COFF sections.
678 optionalHeader := file.OptionalHeader.(*pe.OptionalHeader64)
679 for _, section := range file.Sections {
680 // For more information:
681 // https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header
682 const (
683 IMAGE_SCN_CNT_CODE = 0x00000020
684 IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040
685 IMAGE_SCN_MEM_DISCARDABLE = 0x02000000
686 IMAGE_SCN_MEM_READ = 0x40000000
687 IMAGE_SCN_MEM_WRITE = 0x80000000
688 )
689 if section.Characteristics&IMAGE_SCN_MEM_DISCARDABLE != 0 {
690 // Debug sections, etc.
691 continue
692 }
693 address := uint64(section.VirtualAddress) + optionalHeader.ImageBase
694 if section.Characteristics&IMAGE_SCN_CNT_CODE != 0 {
695 // .text
696 sections = append(sections, memorySection{
697 Address: address,
698 Size: uint64(section.VirtualSize),
699 Type: memoryCode,
700 })
701 } else if section.Characteristics&IMAGE_SCN_CNT_INITIALIZED_DATA != 0 {
702 if section.Characteristics&IMAGE_SCN_MEM_WRITE != 0 {
703 // .data
704 sections = append(sections, memorySection{
705 Address: address,
706 Size: uint64(section.Size),
707 Type: memoryData,
708 })
709 if section.Size < section.VirtualSize {
710 // Equivalent of a .bss section.
711 // Note: because of how the PE/COFF format is
712 // structured, not all zero-initialized data is marked
713 // as such. A portion may be at the end of the .data
714 // section and is thus marked as initialized data.
715 sections = append(sections, memorySection{
716 Address: address + uint64(section.Size),
717 Size: uint64(section.VirtualSize) - uint64(section.Size),
718 Type: memoryBSS,
719 })
720 }
721 } else if section.Characteristics&IMAGE_SCN_MEM_READ != 0 {
722 // .rdata, .buildid, .pdata
723 sections = append(sections, memorySection{
724 Address: address,
725 Size: uint64(section.VirtualSize),
726 Type: memoryROData,
727 })
728 }
729 }
730 }
731 } else if file, err := wasm.Parse(f); err == nil {
732 // File is in WebAssembly format.
733 734 // Put code at a very high address, so that it won't conflict with the
735 // data in the memory section.
736 const codeOffset = 0x8000_0000_0000_0000
737 738 // Read DWARF information. The error is intentionally ignored.
739 data, _ := file.DWARF()
740 if data != nil {
741 addresses, err = readProgramSizeFromDWARF(data, codeOffset, 0, true)
742 if err != nil {
743 // However, _do_ report an error here. Something must have gone
744 // wrong while trying to parse DWARF data.
745 return nil, err
746 }
747 }
748 749 var linearMemorySize uint64
750 for _, section := range file.Sections {
751 switch section := section.(type) {
752 case *wasm.SectionCode:
753 sections = append(sections, memorySection{
754 Address: codeOffset,
755 Size: uint64(section.Size()),
756 Type: memoryCode,
757 })
758 case *wasm.SectionMemory:
759 // This value is used when processing *wasm.SectionData (which
760 // always comes after *wasm.SectionMemory).
761 linearMemorySize = uint64(section.Entries[0].Limits.Initial) * 64 * 1024
762 case *wasm.SectionData:
763 // Data sections contain initial values for linear memory.
764 // First load the list of data sections, and sort them by
765 // address for easier processing.
766 var dataSections []memorySection
767 for _, entry := range section.Entries {
768 address, err := wasm.Eval(bytes.NewBuffer(entry.Offset))
769 if err != nil {
770 return nil, fmt.Errorf("could not parse data section address: %w", err)
771 }
772 dataSections = append(dataSections, memorySection{
773 Address: uint64(address[0].(int32)),
774 Size: uint64(len(entry.Data)),
775 Type: memoryData,
776 })
777 }
778 sort.Slice(dataSections, func(i, j int) bool {
779 return dataSections[i].Address < dataSections[j].Address
780 })
781 782 // And now add all data sections for linear memory.
783 // Parts that are in the slice of data sections are added as
784 // memoryData, and parts that are not are added as memoryBSS.
785 addr := uint64(0)
786 for _, section := range dataSections {
787 if addr < section.Address {
788 sections = append(sections, memorySection{
789 Address: addr,
790 Size: section.Address - addr,
791 Type: memoryBSS,
792 })
793 }
794 if addr > section.Address {
795 // This might be allowed, I'm not sure.
796 // It certainly doesn't make a lot of sense.
797 return nil, fmt.Errorf("overlapping data section")
798 }
799 // addr == section.Address
800 sections = append(sections, section)
801 addr = section.Address + section.Size
802 }
803 if addr < linearMemorySize {
804 sections = append(sections, memorySection{
805 Address: addr,
806 Size: linearMemorySize - addr,
807 Type: memoryBSS,
808 })
809 }
810 }
811 }
812 } else {
813 return nil, fmt.Errorf("could not parse file: %w", err)
814 }
815 816 // Sort the slice of address chunks by address, so that we can iterate
817 // through it to calculate section sizes.
818 sort.Slice(addresses, func(i, j int) bool {
819 if addresses[i].Address == addresses[j].Address {
820 // Very rarely, there might be duplicate addresses.
821 // If that happens, sort the largest chunks first.
822 return addresses[i].Length > addresses[j].Length
823 }
824 return addresses[i].Address < addresses[j].Address
825 })
826 827 // Now finally determine the binary/RAM size usage per package by going
828 // through each allocated section.
829 sizes := make(map[string]*packageSize)
830 program := &programSize{
831 Packages: sizes,
832 }
833 for _, section := range sections {
834 switch section.Type {
835 case memoryCode:
836 readSection(section, addresses, program, func(ps *packageSize, isVariable bool) *uint64 {
837 if isVariable {
838 return &ps.ROData
839 }
840 return &ps.Code
841 }, packagePathMap)
842 case memoryROData:
843 readSection(section, addresses, program, func(ps *packageSize, isVariable bool) *uint64 {
844 return &ps.ROData
845 }, packagePathMap)
846 case memoryData:
847 readSection(section, addresses, program, func(ps *packageSize, isVariable bool) *uint64 {
848 return &ps.Data
849 }, packagePathMap)
850 case memoryBSS:
851 readSection(section, addresses, program, func(ps *packageSize, isVariable bool) *uint64 {
852 return &ps.BSS
853 }, packagePathMap)
854 case memoryStack:
855 // We store the C stack as a pseudo-package.
856 program.getPackage("C stack").addSize(func(ps *packageSize, isVariable bool) *uint64 {
857 return &ps.BSS
858 }, "", section.Size, false)
859 }
860 }
861 862 // ...and summarize the results.
863 for _, pkg := range sizes {
864 program.Code += pkg.Code
865 program.ROData += pkg.ROData
866 program.Data += pkg.Data
867 program.BSS += pkg.BSS
868 }
869 return program, nil
870 }
871 872 // readSection determines for each byte in this section to which package it
873 // belongs.
874 func readSection(section memorySection, addresses []addressLine, program *programSize, getField func(*packageSize, bool) *uint64, packagePathMap map[string]string) {
875 // The addr variable tracks at which address we are while going through this
876 // section. We start at the beginning.
877 addr := section.Address
878 sectionEnd := section.Address + section.Size
879 if sizesDebug {
880 fmt.Printf("%08x..%08x %5d: %s\n", addr, sectionEnd, section.Size, section.Type)
881 }
882 for _, line := range addresses {
883 if line.Address < section.Address || line.Address+line.Length > sectionEnd {
884 // Check that this line is entirely within the section.
885 // Don't bother dealing with line entries that cross sections (that
886 // seems rather unlikely anyway).
887 continue
888 }
889 if addr < line.Address {
890 // There is a gap: there is a space between the current and the
891 // previous line entry.
892 // Check whether this is caused by alignment requirements.
893 addrAligned := (addr + line.Align - 1) &^ (line.Align - 1)
894 if line.Align > 1 && addrAligned >= line.Address {
895 // It is, assume that's what causes the gap.
896 program.getPackage("(padding)").addSize(getField, "", line.Address-addr, true)
897 } else {
898 program.getPackage("(unknown)").addSize(getField, "", line.Address-addr, false)
899 if sizesDebug {
900 fmt.Printf("%08x..%08x %5d: unknown (gap), alignment=%d\n", addr, line.Address, line.Address-addr, line.Align)
901 }
902 }
903 addr = line.Address
904 }
905 if addr > line.Address+line.Length {
906 // The current line is already covered by a previous line entry.
907 // Simply skip it.
908 continue
909 }
910 // At this point, addr falls within the current line (probably at the
911 // start).
912 length := line.Length
913 if addr > line.Address {
914 // There is some overlap: the previous line entry already covered
915 // part of this line entry. So reduce the length to add to the
916 // remaining bit of the line entry.
917 length = line.Length - (addr - line.Address)
918 }
919 // Finally, mark this chunk of memory as used by the given package.
920 packagePath, filename := findPackagePath(line.File, packagePathMap)
921 program.getPackage(packagePath).addSize(getField, filename, length, line.IsVariable)
922 addr = line.Address + line.Length
923 }
924 if addr < sectionEnd {
925 // There is a gap at the end of the section.
926 addrAligned := (addr + section.Align - 1) &^ (section.Align - 1)
927 if section.Align > 1 && addrAligned >= sectionEnd {
928 // The gap is caused by the section alignment.
929 // For example, if a .rodata section ends with a non-aligned string.
930 program.getPackage("(padding)").addSize(getField, "", sectionEnd-addr, true)
931 } else {
932 program.getPackage("(unknown)").addSize(getField, "", sectionEnd-addr, false)
933 if sizesDebug {
934 fmt.Printf("%08x..%08x %5d: unknown (end), alignment=%d\n", addr, sectionEnd, sectionEnd-addr, section.Align)
935 }
936 }
937 }
938 }
939 940 // findPackagePath returns the Go package (or a pseudo package) for the given
941 // path. It uses some heuristics, for example for some C libraries.
942 func findPackagePath(path string, packagePathMap map[string]string) (packagePath, filename string) {
943 // Check whether this path is part of one of the compiled packages.
944 packagePath, ok := packagePathMap[filepath.Dir(path)]
945 if ok {
946 // Directory is known as a Go package.
947 // Add the file itself as well.
948 filename = filepath.Base(path)
949 } else {
950 if strings.HasPrefix(path, filepath.Join(goenv.Get("MOXIEROOT"), "lib")) {
951 // Emit C libraries (in the lib subdirectory of Moxie) as a single
952 // package, with a "C" prefix. For example: "C picolibc" for the
953 // baremetal libc.
954 libPath := strings.TrimPrefix(path, filepath.Join(goenv.Get("MOXIEROOT"), "lib")+string(os.PathSeparator))
955 parts := strings.SplitN(libPath, string(os.PathSeparator), 2)
956 packagePath = "C " + parts[0]
957 filename = parts[1]
958 } else if prefix := filepath.Join(goenv.Get("MOXIEROOT"), "llvm-project", "compiler-rt"); strings.HasPrefix(path, prefix) {
959 packagePath = "C compiler-rt"
960 filename = strings.TrimPrefix(path, prefix+string(os.PathSeparator))
961 } else if packageSymbolRegexp.MatchString(path) {
962 // Parse symbol names like main$alloc or runtime$string.
963 packagePath = path[:strings.LastIndex(path, "$")]
964 } else if path == "__isr_vector" {
965 packagePath = "C interrupt vector"
966 } else if path == "<Go type>" {
967 packagePath = "Go types"
968 } else if path == "<Go interface assert>" {
969 // Interface type assert, generated by the interface lowering pass.
970 packagePath = "Go interface assert"
971 } else if path == "<Go interface method>" {
972 // Interface method wrapper (switch over all concrete types),
973 // generated by the interface lowering pass.
974 packagePath = "Go interface method"
975 } else if path == "<stdin>" {
976 // This can happen when the source code (in Go) doesn't have a
977 // source file and uses "-" as the location. Somewhere this is
978 // converted to "<stdin>".
979 // Convert this back to the "-" string. Eventually, this should be
980 // fixed in the compiler.
981 packagePath = "-"
982 } else {
983 // This is some other path. Not sure what it is, so just emit its
984 // directory as a fallback.
985 packagePath = filepath.Dir(path)
986 filename = filepath.Base(path)
987 }
988 }
989 return
990 }
991