file.mx raw
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 /*
6 Package pe implements access to PE (Microsoft Windows Portable Executable) files.
7
8 # Security
9
10 This package is not designed to be hardened against adversarial inputs, and is
11 outside the scope of https://go.dev/security/policy. In particular, only basic
12 validation is done when parsing object files. As such, care should be taken when
13 parsing untrusted inputs, as parsing malformed files may consume significant
14 resources, or cause panics.
15 */
16 package pe
17
18 import (
19 "bytes"
20 "compress/zlib"
21 "debug/dwarf"
22 "encoding/binary"
23 "errors"
24 "fmt"
25 "io"
26 "os"
27 )
28
29 // A File represents an open PE file.
30 type File struct {
31 FileHeader
32 OptionalHeader any // of type *OptionalHeader32 or *OptionalHeader64
33 Sections []*Section
34 Symbols []*Symbol // COFF symbols with auxiliary symbol records removed
35 COFFSymbols []COFFSymbol // all COFF symbols (including auxiliary symbol records)
36 StringTable StringTable
37
38 closer io.Closer
39 }
40
41 // Open opens the named file using [os.Open] and prepares it for use as a PE binary.
42 func Open(name string) (*File, error) {
43 f, err := os.Open(name)
44 if err != nil {
45 return nil, err
46 }
47 ff, err := NewFile(f)
48 if err != nil {
49 f.Close()
50 return nil, err
51 }
52 ff.closer = f
53 return ff, nil
54 }
55
56 // Close closes the [File].
57 // If the [File] was created using [NewFile] directly instead of [Open],
58 // Close has no effect.
59 func (f *File) Close() error {
60 var err error
61 if f.closer != nil {
62 err = f.closer.Close()
63 f.closer = nil
64 }
65 return err
66 }
67
68 // TODO(brainman): add Load function, as a replacement for NewFile, that does not call removeAuxSymbols (for performance)
69
70 // NewFile creates a new [File] for accessing a PE binary in an underlying reader.
71 func NewFile(r io.ReaderAt) (*File, error) {
72 f := &File{}
73 sr := io.NewSectionReader(r, 0, 1<<63-1)
74
75 var dosheader [96]byte
76 if _, err := r.ReadAt(dosheader[0:], 0); err != nil {
77 return nil, err
78 }
79 var base int64
80 if dosheader[0] == 'M' && dosheader[1] == 'Z' {
81 signoff := int64(binary.LittleEndian.Uint32(dosheader[0x3c:]))
82 var sign [4]byte
83 r.ReadAt(sign[:], signoff)
84 if !(sign[0] == 'P' && sign[1] == 'E' && sign[2] == 0 && sign[3] == 0) {
85 return nil, fmt.Errorf("invalid PE file signature: % x", sign)
86 }
87 base = signoff + 4
88 } else {
89 base = int64(0)
90 }
91 sr.Seek(base, io.SeekStart)
92 if err := binary.Read(sr, binary.LittleEndian, &f.FileHeader); err != nil {
93 return nil, err
94 }
95 switch f.FileHeader.Machine {
96 case IMAGE_FILE_MACHINE_AMD64,
97 IMAGE_FILE_MACHINE_ARM64,
98 IMAGE_FILE_MACHINE_ARMNT,
99 IMAGE_FILE_MACHINE_I386,
100 IMAGE_FILE_MACHINE_RISCV32,
101 IMAGE_FILE_MACHINE_RISCV64,
102 IMAGE_FILE_MACHINE_RISCV128,
103 IMAGE_FILE_MACHINE_UNKNOWN:
104 // ok
105 default:
106 return nil, fmt.Errorf("unrecognized PE machine: %#x", f.FileHeader.Machine)
107 }
108
109 var err error
110
111 // Read string table.
112 f.StringTable, err = readStringTable(&f.FileHeader, sr)
113 if err != nil {
114 return nil, err
115 }
116
117 // Read symbol table.
118 f.COFFSymbols, err = readCOFFSymbols(&f.FileHeader, sr)
119 if err != nil {
120 return nil, err
121 }
122 f.Symbols, err = removeAuxSymbols(f.COFFSymbols, f.StringTable)
123 if err != nil {
124 return nil, err
125 }
126
127 // Seek past file header.
128 _, err = sr.Seek(base+int64(binary.Size(f.FileHeader)), io.SeekStart)
129 if err != nil {
130 return nil, err
131 }
132
133 // Read optional header.
134 f.OptionalHeader, err = readOptionalHeader(sr, f.FileHeader.SizeOfOptionalHeader)
135 if err != nil {
136 return nil, err
137 }
138
139 // Process sections.
140 f.Sections = []*Section{:f.FileHeader.NumberOfSections}
141 for i := 0; i < int(f.FileHeader.NumberOfSections); i++ {
142 sh := &SectionHeader32{}
143 if err := binary.Read(sr, binary.LittleEndian, sh); err != nil {
144 return nil, err
145 }
146 name, err := sh.fullName(f.StringTable)
147 if err != nil {
148 return nil, err
149 }
150 s := &Section{}
151 s.SectionHeader = SectionHeader{
152 Name: name,
153 VirtualSize: sh.VirtualSize,
154 VirtualAddress: sh.VirtualAddress,
155 Size: sh.SizeOfRawData,
156 Offset: sh.PointerToRawData,
157 PointerToRelocations: sh.PointerToRelocations,
158 PointerToLineNumbers: sh.PointerToLineNumbers,
159 NumberOfRelocations: sh.NumberOfRelocations,
160 NumberOfLineNumbers: sh.NumberOfLineNumbers,
161 Characteristics: sh.Characteristics,
162 }
163 r2 := r
164 if sh.PointerToRawData == 0 { // .bss must have all 0s
165 r2 = &nobitsSectionReader{}
166 }
167 s.sr = io.NewSectionReader(r2, int64(s.SectionHeader.Offset), int64(s.SectionHeader.Size))
168 s.ReaderAt = s.sr
169 f.Sections[i] = s
170 }
171 for i := range f.Sections {
172 var err error
173 f.Sections[i].Relocs, err = readRelocs(&f.Sections[i].SectionHeader, sr)
174 if err != nil {
175 return nil, err
176 }
177 }
178
179 return f, nil
180 }
181
182 type nobitsSectionReader struct{}
183
184 func (*nobitsSectionReader) ReadAt(p []byte, off int64) (n int, err error) {
185 return 0, errors.New("unexpected read from section with uninitialized data")
186 }
187
188 // getString extracts a string from symbol string table.
189 func getString(section []byte, start int) (string, bool) {
190 if start < 0 || start >= len(section) {
191 return "", false
192 }
193
194 for end := start; end < len(section); end++ {
195 if section[end] == 0 {
196 return string(section[start:end]), true
197 }
198 }
199 return "", false
200 }
201
202 // Section returns the first section with the given name, or nil if no such
203 // section exists.
204 func (f *File) Section(name string) *Section {
205 for _, s := range f.Sections {
206 if s.Name == name {
207 return s
208 }
209 }
210 return nil
211 }
212
213 func (f *File) DWARF() (*dwarf.Data, error) {
214 dwarfSuffix := func(s *Section) string {
215 switch {
216 case bytes.HasPrefix(s.Name, ".debug_"):
217 return s.Name[7:]
218 case bytes.HasPrefix(s.Name, ".zdebug_"):
219 return s.Name[8:]
220 default:
221 return ""
222 }
223
224 }
225
226 // sectionData gets the data for s and checks its size.
227 sectionData := func(s *Section) ([]byte, error) {
228 b, err := s.Data()
229 if err != nil && uint32(len(b)) < s.Size {
230 return nil, err
231 }
232
233 if 0 < s.VirtualSize && s.VirtualSize < s.Size {
234 b = b[:s.VirtualSize]
235 }
236
237 if len(b) >= 12 && string(b[:4]) == "ZLIB" {
238 dlen := binary.BigEndian.Uint64(b[4:12])
239 dbuf := []byte{:dlen}
240 r, err := zlib.NewReader(bytes.NewBuffer(b[12:]))
241 if err != nil {
242 return nil, err
243 }
244 if _, err := io.ReadFull(r, dbuf); err != nil {
245 return nil, err
246 }
247 if err := r.Close(); err != nil {
248 return nil, err
249 }
250 b = dbuf
251 }
252 return b, nil
253 }
254
255 // There are many other DWARF sections, but these
256 // are the ones the debug/dwarf package uses.
257 // Don't bother loading others.
258 var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil}
259 for _, s := range f.Sections {
260 suffix := dwarfSuffix(s)
261 if suffix == "" {
262 continue
263 }
264 if _, ok := dat[suffix]; !ok {
265 continue
266 }
267
268 b, err := sectionData(s)
269 if err != nil {
270 return nil, err
271 }
272 dat[suffix] = b
273 }
274
275 d, err := dwarf.New(dat["abbrev"], nil, nil, dat["info"], dat["line"], nil, dat["ranges"], dat["str"])
276 if err != nil {
277 return nil, err
278 }
279
280 // Look for DWARF4 .debug_types sections and DWARF5 sections.
281 for i, s := range f.Sections {
282 suffix := dwarfSuffix(s)
283 if suffix == "" {
284 continue
285 }
286 if _, ok := dat[suffix]; ok {
287 // Already handled.
288 continue
289 }
290
291 b, err := sectionData(s)
292 if err != nil {
293 return nil, err
294 }
295
296 if suffix == "types" {
297 err = d.AddTypes(fmt.Sprintf("types-%d", i), b)
298 } else {
299 err = d.AddSection(".debug_"+suffix, b)
300 }
301 if err != nil {
302 return nil, err
303 }
304 }
305
306 return d, nil
307 }
308
309 // TODO(brainman): document ImportDirectory once we decide what to do with it.
310
311 type ImportDirectory struct {
312 OriginalFirstThunk uint32
313 TimeDateStamp uint32
314 ForwarderChain uint32
315 Name uint32
316 FirstThunk uint32
317
318 dll string
319 }
320
321 // ImportedSymbols returns the names of all symbols
322 // referred to by the binary f that are expected to be
323 // satisfied by other libraries at dynamic load time.
324 // It does not return weak symbols.
325 func (f *File) ImportedSymbols() ([][]byte, error) {
326 if f.OptionalHeader == nil {
327 return nil, nil
328 }
329
330 _, pe64 := f.OptionalHeader.(*OptionalHeader64)
331
332 // grab the number of data directory entries
333 var dd_length uint32
334 if pe64 {
335 dd_length = f.OptionalHeader.(*OptionalHeader64).NumberOfRvaAndSizes
336 } else {
337 dd_length = f.OptionalHeader.(*OptionalHeader32).NumberOfRvaAndSizes
338 }
339
340 // check that the length of data directory entries is large
341 // enough to include the imports directory.
342 if dd_length < IMAGE_DIRECTORY_ENTRY_IMPORT+1 {
343 return nil, nil
344 }
345
346 // grab the import data directory entry
347 var idd DataDirectory
348 if pe64 {
349 idd = f.OptionalHeader.(*OptionalHeader64).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
350 } else {
351 idd = f.OptionalHeader.(*OptionalHeader32).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
352 }
353
354 // figure out which section contains the import directory table
355 var ds *Section
356 ds = nil
357 for _, s := range f.Sections {
358 if s.Offset == 0 {
359 continue
360 }
361 // We are using distance between s.VirtualAddress and idd.VirtualAddress
362 // to avoid potential overflow of uint32 caused by addition of s.VirtualSize
363 // to s.VirtualAddress.
364 if s.VirtualAddress <= idd.VirtualAddress && idd.VirtualAddress-s.VirtualAddress < s.VirtualSize {
365 ds = s
366 break
367 }
368 }
369
370 // didn't find a section, so no import libraries were found
371 if ds == nil {
372 return nil, nil
373 }
374
375 d, err := ds.Data()
376 if err != nil {
377 return nil, err
378 }
379
380 // seek to the virtual address specified in the import data directory
381 d = d[idd.VirtualAddress-ds.VirtualAddress:]
382
383 // start decoding the import directory
384 var ida []ImportDirectory
385 for len(d) >= 20 {
386 var dt ImportDirectory
387 dt.OriginalFirstThunk = binary.LittleEndian.Uint32(d[0:4])
388 dt.TimeDateStamp = binary.LittleEndian.Uint32(d[4:8])
389 dt.ForwarderChain = binary.LittleEndian.Uint32(d[8:12])
390 dt.Name = binary.LittleEndian.Uint32(d[12:16])
391 dt.FirstThunk = binary.LittleEndian.Uint32(d[16:20])
392 d = d[20:]
393 if dt.OriginalFirstThunk == 0 {
394 break
395 }
396 ida = append(ida, dt)
397 }
398 // TODO(brainman): this needs to be rewritten
399 // ds.Data() returns contents of section containing import table. Why store in variable called "names"?
400 // Why we are retrieving it second time? We already have it in "d", and it is not modified anywhere.
401 // getString does not extracts a string from symbol string table (as getString doco says).
402 // Why ds.Data() called again and again in the loop?
403 // Needs test before rewrite.
404 names, _ := ds.Data()
405 var all [][]byte
406 for _, dt := range ida {
407 dt.dll, _ = getString(names, int(dt.Name-ds.VirtualAddress))
408 d, _ = ds.Data()
409 // seek to OriginalFirstThunk
410 d = d[dt.OriginalFirstThunk-ds.VirtualAddress:]
411 for len(d) > 0 {
412 if pe64 { // 64bit
413 va := binary.LittleEndian.Uint64(d[0:8])
414 d = d[8:]
415 if va == 0 {
416 break
417 }
418 if va&0x8000000000000000 > 0 { // is Ordinal
419 // TODO add dynimport ordinal support.
420 } else {
421 fn, _ := getString(names, int(uint32(va)-ds.VirtualAddress+2))
422 all = append(all, fn+":"+dt.dll)
423 }
424 } else { // 32bit
425 va := binary.LittleEndian.Uint32(d[0:4])
426 d = d[4:]
427 if va == 0 {
428 break
429 }
430 if va&0x80000000 > 0 { // is Ordinal
431 // TODO add dynimport ordinal support.
432 //ord := va&0x0000FFFF
433 } else {
434 fn, _ := getString(names, int(va-ds.VirtualAddress+2))
435 all = append(all, fn+":"+dt.dll)
436 }
437 }
438 }
439 }
440
441 return all, nil
442 }
443
444 // ImportedLibraries returns the names of all libraries
445 // referred to by the binary f that are expected to be
446 // linked with the binary at dynamic link time.
447 func (f *File) ImportedLibraries() ([][]byte, error) {
448 // TODO
449 // cgo -dynimport don't use this for windows PE, so just return.
450 return nil, nil
451 }
452
453 // FormatError is unused.
454 // The type is retained for compatibility.
455 type FormatError struct {
456 }
457
458 func (e *FormatError) Error() string {
459 return "unknown error"
460 }
461
462 // readOptionalHeader accepts an io.ReadSeeker pointing to optional header in the PE file
463 // and its size as seen in the file header.
464 // It parses the given size of bytes and returns optional header. It infers whether the
465 // bytes being parsed refer to 32 bit or 64 bit version of optional header.
466 func readOptionalHeader(r io.ReadSeeker, sz uint16) (any, error) {
467 // If optional header size is 0, return empty optional header.
468 if sz == 0 {
469 return nil, nil
470 }
471
472 var (
473 // First couple of bytes in option header state its type.
474 // We need to read them first to determine the type and
475 // validity of optional header.
476 ohMagic uint16
477 ohMagicSz = binary.Size(ohMagic)
478 )
479
480 // If optional header size is greater than 0 but less than its magic size, return error.
481 if sz < uint16(ohMagicSz) {
482 return nil, fmt.Errorf("optional header size is less than optional header magic size")
483 }
484
485 // read reads from io.ReadSeeke, r, into data.
486 var err error
487 read := func(data any) bool {
488 err = binary.Read(r, binary.LittleEndian, data)
489 return err == nil
490 }
491
492 if !read(&ohMagic) {
493 return nil, fmt.Errorf("failure to read optional header magic: %v", err)
494
495 }
496
497 switch ohMagic {
498 case 0x10b: // PE32
499 var (
500 oh32 OptionalHeader32
501 // There can be 0 or more data directories. So the minimum size of optional
502 // header is calculated by subtracting oh32.DataDirectory size from oh32 size.
503 oh32MinSz = binary.Size(oh32) - binary.Size(oh32.DataDirectory)
504 )
505
506 if sz < uint16(oh32MinSz) {
507 return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) of PE32 optional header", sz, oh32MinSz)
508 }
509
510 // Init oh32 fields
511 oh32.Magic = ohMagic
512 if !read(&oh32.MajorLinkerVersion) ||
513 !read(&oh32.MinorLinkerVersion) ||
514 !read(&oh32.SizeOfCode) ||
515 !read(&oh32.SizeOfInitializedData) ||
516 !read(&oh32.SizeOfUninitializedData) ||
517 !read(&oh32.AddressOfEntryPoint) ||
518 !read(&oh32.BaseOfCode) ||
519 !read(&oh32.BaseOfData) ||
520 !read(&oh32.ImageBase) ||
521 !read(&oh32.SectionAlignment) ||
522 !read(&oh32.FileAlignment) ||
523 !read(&oh32.MajorOperatingSystemVersion) ||
524 !read(&oh32.MinorOperatingSystemVersion) ||
525 !read(&oh32.MajorImageVersion) ||
526 !read(&oh32.MinorImageVersion) ||
527 !read(&oh32.MajorSubsystemVersion) ||
528 !read(&oh32.MinorSubsystemVersion) ||
529 !read(&oh32.Win32VersionValue) ||
530 !read(&oh32.SizeOfImage) ||
531 !read(&oh32.SizeOfHeaders) ||
532 !read(&oh32.CheckSum) ||
533 !read(&oh32.Subsystem) ||
534 !read(&oh32.DllCharacteristics) ||
535 !read(&oh32.SizeOfStackReserve) ||
536 !read(&oh32.SizeOfStackCommit) ||
537 !read(&oh32.SizeOfHeapReserve) ||
538 !read(&oh32.SizeOfHeapCommit) ||
539 !read(&oh32.LoaderFlags) ||
540 !read(&oh32.NumberOfRvaAndSizes) {
541 return nil, fmt.Errorf("failure to read PE32 optional header: %v", err)
542 }
543
544 dd, err := readDataDirectories(r, sz-uint16(oh32MinSz), oh32.NumberOfRvaAndSizes)
545 if err != nil {
546 return nil, err
547 }
548
549 copy(oh32.DataDirectory[:], dd)
550
551 return &oh32, nil
552 case 0x20b: // PE32+
553 var (
554 oh64 OptionalHeader64
555 // There can be 0 or more data directories. So the minimum size of optional
556 // header is calculated by subtracting oh64.DataDirectory size from oh64 size.
557 oh64MinSz = binary.Size(oh64) - binary.Size(oh64.DataDirectory)
558 )
559
560 if sz < uint16(oh64MinSz) {
561 return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) for PE32+ optional header", sz, oh64MinSz)
562 }
563
564 // Init oh64 fields
565 oh64.Magic = ohMagic
566 if !read(&oh64.MajorLinkerVersion) ||
567 !read(&oh64.MinorLinkerVersion) ||
568 !read(&oh64.SizeOfCode) ||
569 !read(&oh64.SizeOfInitializedData) ||
570 !read(&oh64.SizeOfUninitializedData) ||
571 !read(&oh64.AddressOfEntryPoint) ||
572 !read(&oh64.BaseOfCode) ||
573 !read(&oh64.ImageBase) ||
574 !read(&oh64.SectionAlignment) ||
575 !read(&oh64.FileAlignment) ||
576 !read(&oh64.MajorOperatingSystemVersion) ||
577 !read(&oh64.MinorOperatingSystemVersion) ||
578 !read(&oh64.MajorImageVersion) ||
579 !read(&oh64.MinorImageVersion) ||
580 !read(&oh64.MajorSubsystemVersion) ||
581 !read(&oh64.MinorSubsystemVersion) ||
582 !read(&oh64.Win32VersionValue) ||
583 !read(&oh64.SizeOfImage) ||
584 !read(&oh64.SizeOfHeaders) ||
585 !read(&oh64.CheckSum) ||
586 !read(&oh64.Subsystem) ||
587 !read(&oh64.DllCharacteristics) ||
588 !read(&oh64.SizeOfStackReserve) ||
589 !read(&oh64.SizeOfStackCommit) ||
590 !read(&oh64.SizeOfHeapReserve) ||
591 !read(&oh64.SizeOfHeapCommit) ||
592 !read(&oh64.LoaderFlags) ||
593 !read(&oh64.NumberOfRvaAndSizes) {
594 return nil, fmt.Errorf("failure to read PE32+ optional header: %v", err)
595 }
596
597 dd, err := readDataDirectories(r, sz-uint16(oh64MinSz), oh64.NumberOfRvaAndSizes)
598 if err != nil {
599 return nil, err
600 }
601
602 copy(oh64.DataDirectory[:], dd)
603
604 return &oh64, nil
605 default:
606 return nil, fmt.Errorf("optional header has unexpected Magic of 0x%x", ohMagic)
607 }
608 }
609
610 // readDataDirectories accepts an io.ReadSeeker pointing to data directories in the PE file,
611 // its size and number of data directories as seen in optional header.
612 // It parses the given size of bytes and returns given number of data directories.
613 func readDataDirectories(r io.ReadSeeker, sz uint16, n uint32) ([]DataDirectory, error) {
614 ddSz := uint64(binary.Size(DataDirectory{}))
615 if uint64(sz) != uint64(n)*ddSz {
616 return nil, fmt.Errorf("size of data directories(%d) is inconsistent with number of data directories(%d)", sz, n)
617 }
618
619 dd := []DataDirectory{:n}
620 if err := binary.Read(r, binary.LittleEndian, dd); err != nil {
621 return nil, fmt.Errorf("failure to read data directories: %v", err)
622 }
623
624 return dd, nil
625 }
626