dwarf.go raw
1 package stacksize
2
3 // This file implements parsing DWARF call frame information and interpreting
4 // the CFI bytecode, or enough of it for most practical code.
5
6 import (
7 "bytes"
8 "debug/elf"
9 "encoding/binary"
10 "fmt"
11 "io"
12 )
13
14 // dwarfCIE represents one DWARF Call Frame Information structure.
15 type dwarfCIE struct {
16 bytecode []byte
17 codeAlignmentFactor uint64
18 }
19
20 // parseFrames parses all call frame information from a .debug_frame section and
21 // provides the passed in symbols map with frame size information.
22 func parseFrames(f *elf.File, data []byte, symbols map[uint64]*CallNode) error {
23 if f.Class != elf.ELFCLASS32 {
24 // TODO: ELF64
25 return fmt.Errorf("expected ELF32")
26 }
27 cies := make(map[uint32]*dwarfCIE)
28
29 // Read each entity.
30 r := bytes.NewBuffer(data)
31 for {
32 start := len(data) - r.Len()
33 var length uint32
34 err := binary.Read(r, binary.LittleEndian, &length)
35 if err == io.EOF {
36 return nil
37 }
38 if err != nil {
39 return err
40 }
41 var cie uint32
42 err = binary.Read(r, binary.LittleEndian, &cie)
43 if err != nil {
44 return err
45 }
46 if cie == 0xffffffff {
47 // This is a CIE.
48 var fields struct {
49 Version uint8
50 Augmentation uint8
51 AddressSize uint8
52 SegmentSize uint8
53 }
54 err = binary.Read(r, binary.LittleEndian, &fields)
55 if err != nil {
56 return err
57 }
58 if fields.Version != 4 {
59 return fmt.Errorf("unimplemented: .debug_frame version %d", fields.Version)
60 }
61 if fields.Augmentation != 0 {
62 return fmt.Errorf("unimplemented: .debug_frame with augmentation")
63 }
64 if fields.SegmentSize != 0 {
65 return fmt.Errorf("unimplemented: .debug_frame with segment size")
66 }
67 codeAlignmentFactor, err := readULEB128(r)
68 if err != nil {
69 return err
70 }
71 _, err = readSLEB128(r) // data alignment factor
72 if err != nil {
73 return err
74 }
75 _, err = readULEB128(r) // return address register
76 if err != nil {
77 return err
78 }
79 rest := (start + int(length) + 4) - (len(data) - r.Len())
80 bytecode := r.Next(rest)
81 cies[uint32(start)] = &dwarfCIE{
82 codeAlignmentFactor: codeAlignmentFactor,
83 bytecode: bytecode,
84 }
85 } else {
86 // This is a FDE.
87 var fields struct {
88 InitialLocation uint32
89 AddressRange uint32
90 }
91 err = binary.Read(r, binary.LittleEndian, &fields)
92 if err != nil {
93 return err
94 }
95 if _, ok := cies[cie]; !ok {
96 return fmt.Errorf("could not find CIE 0x%x in .debug_frame section", cie)
97 }
98 frame := frameInfo{
99 cie: cies[cie],
100 start: uint64(fields.InitialLocation),
101 loc: uint64(fields.InitialLocation),
102 length: uint64(fields.AddressRange),
103 }
104 rest := (start + int(length) + 4) - (len(data) - r.Len())
105 bytecode := r.Next(rest)
106
107 if frame.start == 0 {
108 // Not sure where these come from but they don't seem to be
109 // important.
110 continue
111 }
112
113 _, err = frame.exec(frame.cie.bytecode)
114 if err != nil {
115 return err
116 }
117 entries, err := frame.exec(bytecode)
118 if err != nil {
119 return err
120 }
121 var maxFrameSize uint64
122 for _, entry := range entries {
123 switch f.Machine {
124 case elf.EM_ARM:
125 if entry.cfaRegister != 13 { // r13 or sp
126 // something other than a stack pointer (on ARM)
127 return fmt.Errorf("%08x..%08x: unknown CFA register number %d", frame.start, frame.start+frame.length, entry.cfaRegister)
128 }
129 default:
130 return fmt.Errorf("unknown architecture: %s", f.Machine)
131 }
132 if entry.cfaOffset > maxFrameSize {
133 maxFrameSize = entry.cfaOffset
134 }
135 }
136 node := symbols[frame.start]
137 if node.Size != frame.length {
138 return fmt.Errorf("%s: symtab gives symbol length %d while DWARF gives symbol length %d", node, node.Size, frame.length)
139 }
140 node.FrameSize = maxFrameSize
141 node.FrameSizeType = Bounded
142 if debugPrint {
143 fmt.Printf("%08x..%08x: frame size %4d %s\n", frame.start, frame.start+frame.length, maxFrameSize, node)
144 }
145 }
146 }
147 }
148
149 // frameInfo contains the state of executing call frame information bytecode.
150 type frameInfo struct {
151 cie *dwarfCIE
152 start uint64
153 loc uint64
154 length uint64
155 cfaRegister uint64
156 cfaOffset uint64
157 }
158
159 // frameInfoLine represents one line in the frame table (.debug_frame) at one
160 // point in the execution of the bytecode.
161 type frameInfoLine struct {
162 loc uint64
163 cfaRegister uint64
164 cfaOffset uint64
165 }
166
167 func (fi *frameInfo) newLine() frameInfoLine {
168 return frameInfoLine{
169 loc: fi.loc,
170 cfaRegister: fi.cfaRegister,
171 cfaOffset: fi.cfaOffset,
172 }
173 }
174
175 // exec executes the given bytecode in the CFI. Most CFI bytecode is actually
176 // very simple and provides a way to determine the maximum call frame size.
177 //
178 // The frame size often changes multiple times in a function, for example the
179 // frame size may be adjusted in the prologue and epilogue. Each frameInfoLine
180 // may contain such a change.
181 func (fi *frameInfo) exec(bytecode []byte) ([]frameInfoLine, error) {
182 var entries []frameInfoLine
183 r := bytes.NewBuffer(bytecode)
184 for {
185 op, err := r.ReadByte()
186 if err != nil {
187 if err == io.EOF {
188 entries = append(entries, fi.newLine())
189 return entries, nil
190 }
191 return nil, err
192 }
193 // For details on the various opcodes, see:
194 // http://dwarfstd.org/doc/DWARF5.pdf (page 239)
195 highBits := op >> 6 // high order 2 bits
196 lowBits := op & 0x1f
197 switch highBits {
198 case 1: // DW_CFA_advance_loc
199 fi.loc += uint64(lowBits) * fi.cie.codeAlignmentFactor
200 entries = append(entries, fi.newLine())
201 case 2: // DW_CFA_offset
202 // This indicates where a register is saved on the stack in the
203 // prologue. We can ignore that for our purposes.
204 _, err := readULEB128(r)
205 if err != nil {
206 return nil, err
207 }
208 case 3: // DW_CFA_restore
209 // Restore a register. Used after an outlined function call.
210 // It should be possible to ignore this.
211 // TODO: check that this is not the stack pointer.
212 case 0:
213 switch lowBits {
214 case 0: // DW_CFA_nop
215 // no operation
216 case 0x02: // DW_CFA_advance_loc1
217 // Very similar to DW_CFA_advance_loc but allows for a slightly
218 // larger range.
219 offset, err := r.ReadByte()
220 if err != nil {
221 return nil, err
222 }
223 fi.loc += uint64(offset) * fi.cie.codeAlignmentFactor
224 entries = append(entries, fi.newLine())
225 case 0x03: // DW_CFA_advance_loc2
226 var offset uint16
227 err := binary.Read(r, binary.LittleEndian, &offset)
228 if err != nil {
229 return nil, err
230 }
231 fi.loc += uint64(offset) * fi.cie.codeAlignmentFactor
232 entries = append(entries, fi.newLine())
233 case 0x04: // DW_CFA_advance_loc4
234 var offset uint32
235 err := binary.Read(r, binary.LittleEndian, &offset)
236 if err != nil {
237 return nil, err
238 }
239 fi.loc += uint64(offset) * fi.cie.codeAlignmentFactor
240 entries = append(entries, fi.newLine())
241 case 0x05: // DW_CFA_offset_extended
242 // Semantics are the same as DW_CFA_offset, but the encoding is
243 // different. Ignore it just like DW_CFA_offset.
244 _, err := readULEB128(r) // ULEB128 register
245 if err != nil {
246 return nil, err
247 }
248 _, err = readULEB128(r) // ULEB128 offset
249 if err != nil {
250 return nil, err
251 }
252 case 0x07: // DW_CFA_undefined
253 // Marks a single register as undefined. This is used to stop
254 // unwinding in moxie_startTask using:
255 // .cfi_undefined lr
256 // Ignore this directive.
257 _, err := readULEB128(r)
258 if err != nil {
259 return nil, err
260 }
261 case 0x09: // DW_CFA_register
262 // Copies a register. Emitted by the machine outliner, for example.
263 // It should be possible to ignore this.
264 // TODO: check that the stack pointer is not affected.
265 _, err := readULEB128(r)
266 if err != nil {
267 return nil, err
268 }
269 _, err = readULEB128(r)
270 if err != nil {
271 return nil, err
272 }
273 case 0x0c: // DW_CFA_def_cfa
274 register, err := readULEB128(r)
275 if err != nil {
276 return nil, err
277 }
278 offset, err := readULEB128(r)
279 if err != nil {
280 return nil, err
281 }
282 fi.cfaRegister = register
283 fi.cfaOffset = offset
284 case 0x0e: // DW_CFA_def_cfa_offset
285 offset, err := readULEB128(r)
286 if err != nil {
287 return nil, err
288 }
289 fi.cfaOffset = offset
290 default:
291 return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x (for address 0x%x)", op, fi.loc)
292 }
293 default:
294 return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x (for address 0x%x)", op, fi.loc)
295 }
296 }
297 }
298
299 // Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer
300 func readULEB128(r *bytes.Buffer) (result uint64, err error) {
301 // TODO: guard against overflowing 64-bit integers.
302 var shift uint8
303 for {
304 b, err := r.ReadByte()
305 if err != nil {
306 return 0, err
307 }
308 result |= uint64(b&0x7f) << shift
309 if b&0x80 == 0 {
310 break
311 }
312 shift += 7
313 }
314 return
315 }
316
317 // Source: https://en.wikipedia.org/wiki/LEB128#Decode_signed_integer
318 func readSLEB128(r *bytes.Buffer) (result int64, err error) {
319 var shift uint8
320
321 var b byte
322 var rawResult uint64
323 for {
324 b, err = r.ReadByte()
325 if err != nil {
326 return 0, err
327 }
328 rawResult |= uint64(b&0x7f) << shift
329 shift += 7
330 if b&0x80 == 0 {
331 break
332 }
333 }
334
335 // sign bit of byte is second high order bit (0x40)
336 if shift < 64 && b&0x40 != 0 {
337 // sign extend
338 rawResult |= ^uint64(0) << shift
339 }
340 result = int64(rawResult)
341
342 return
343 }
344