dwarf.go raw

   1  package stacksize
   2  
   3  // This file implements parsing DWARF call frame information and interpreting
   4  // the CFI bytecode, or enough of it for most practical code.
   5  
   6  import (
   7  	"bytes"
   8  	"debug/elf"
   9  	"encoding/binary"
  10  	"fmt"
  11  	"io"
  12  )
  13  
  14  // dwarfCIE represents one DWARF Call Frame Information structure.
  15  type dwarfCIE struct {
  16  	bytecode            []byte
  17  	codeAlignmentFactor uint64
  18  }
  19  
  20  // parseFrames parses all call frame information from a .debug_frame section and
  21  // provides the passed in symbols map with frame size information.
  22  func parseFrames(f *elf.File, data []byte, symbols map[uint64]*CallNode) error {
  23  	if f.Class != elf.ELFCLASS32 {
  24  		// TODO: ELF64
  25  		return fmt.Errorf("expected ELF32")
  26  	}
  27  	cies := make(map[uint32]*dwarfCIE)
  28  
  29  	// Read each entity.
  30  	r := bytes.NewBuffer(data)
  31  	for {
  32  		start := len(data) - r.Len()
  33  		var length uint32
  34  		err := binary.Read(r, binary.LittleEndian, &length)
  35  		if err == io.EOF {
  36  			return nil
  37  		}
  38  		if err != nil {
  39  			return err
  40  		}
  41  		var cie uint32
  42  		err = binary.Read(r, binary.LittleEndian, &cie)
  43  		if err != nil {
  44  			return err
  45  		}
  46  		if cie == 0xffffffff {
  47  			// This is a CIE.
  48  			var fields struct {
  49  				Version      uint8
  50  				Augmentation uint8
  51  				AddressSize  uint8
  52  				SegmentSize  uint8
  53  			}
  54  			err = binary.Read(r, binary.LittleEndian, &fields)
  55  			if err != nil {
  56  				return err
  57  			}
  58  			if fields.Version != 4 {
  59  				return fmt.Errorf("unimplemented: .debug_frame version %d", fields.Version)
  60  			}
  61  			if fields.Augmentation != 0 {
  62  				return fmt.Errorf("unimplemented: .debug_frame with augmentation")
  63  			}
  64  			if fields.SegmentSize != 0 {
  65  				return fmt.Errorf("unimplemented: .debug_frame with segment size")
  66  			}
  67  			codeAlignmentFactor, err := readULEB128(r)
  68  			if err != nil {
  69  				return err
  70  			}
  71  			_, err = readSLEB128(r) // data alignment factor
  72  			if err != nil {
  73  				return err
  74  			}
  75  			_, err = readULEB128(r) // return address register
  76  			if err != nil {
  77  				return err
  78  			}
  79  			rest := (start + int(length) + 4) - (len(data) - r.Len())
  80  			bytecode := r.Next(rest)
  81  			cies[uint32(start)] = &dwarfCIE{
  82  				codeAlignmentFactor: codeAlignmentFactor,
  83  				bytecode:            bytecode,
  84  			}
  85  		} else {
  86  			// This is a FDE.
  87  			var fields struct {
  88  				InitialLocation uint32
  89  				AddressRange    uint32
  90  			}
  91  			err = binary.Read(r, binary.LittleEndian, &fields)
  92  			if err != nil {
  93  				return err
  94  			}
  95  			if _, ok := cies[cie]; !ok {
  96  				return fmt.Errorf("could not find CIE 0x%x in .debug_frame section", cie)
  97  			}
  98  			frame := frameInfo{
  99  				cie:    cies[cie],
 100  				start:  uint64(fields.InitialLocation),
 101  				loc:    uint64(fields.InitialLocation),
 102  				length: uint64(fields.AddressRange),
 103  			}
 104  			rest := (start + int(length) + 4) - (len(data) - r.Len())
 105  			bytecode := r.Next(rest)
 106  
 107  			if frame.start == 0 {
 108  				// Not sure where these come from but they don't seem to be
 109  				// important.
 110  				continue
 111  			}
 112  
 113  			_, err = frame.exec(frame.cie.bytecode)
 114  			if err != nil {
 115  				return err
 116  			}
 117  			entries, err := frame.exec(bytecode)
 118  			if err != nil {
 119  				return err
 120  			}
 121  			var maxFrameSize uint64
 122  			for _, entry := range entries {
 123  				switch f.Machine {
 124  				case elf.EM_ARM:
 125  					if entry.cfaRegister != 13 { // r13 or sp
 126  						// something other than a stack pointer (on ARM)
 127  						return fmt.Errorf("%08x..%08x: unknown CFA register number %d", frame.start, frame.start+frame.length, entry.cfaRegister)
 128  					}
 129  				default:
 130  					return fmt.Errorf("unknown architecture: %s", f.Machine)
 131  				}
 132  				if entry.cfaOffset > maxFrameSize {
 133  					maxFrameSize = entry.cfaOffset
 134  				}
 135  			}
 136  			node := symbols[frame.start]
 137  			if node.Size != frame.length {
 138  				return fmt.Errorf("%s: symtab gives symbol length %d while DWARF gives symbol length %d", node, node.Size, frame.length)
 139  			}
 140  			node.FrameSize = maxFrameSize
 141  			node.FrameSizeType = Bounded
 142  			if debugPrint {
 143  				fmt.Printf("%08x..%08x: frame size %4d %s\n", frame.start, frame.start+frame.length, maxFrameSize, node)
 144  			}
 145  		}
 146  	}
 147  }
 148  
 149  // frameInfo contains the state of executing call frame information bytecode.
 150  type frameInfo struct {
 151  	cie         *dwarfCIE
 152  	start       uint64
 153  	loc         uint64
 154  	length      uint64
 155  	cfaRegister uint64
 156  	cfaOffset   uint64
 157  }
 158  
 159  // frameInfoLine represents one line in the frame table (.debug_frame) at one
 160  // point in the execution of the bytecode.
 161  type frameInfoLine struct {
 162  	loc         uint64
 163  	cfaRegister uint64
 164  	cfaOffset   uint64
 165  }
 166  
 167  func (fi *frameInfo) newLine() frameInfoLine {
 168  	return frameInfoLine{
 169  		loc:         fi.loc,
 170  		cfaRegister: fi.cfaRegister,
 171  		cfaOffset:   fi.cfaOffset,
 172  	}
 173  }
 174  
 175  // exec executes the given bytecode in the CFI. Most CFI bytecode is actually
 176  // very simple and provides a way to determine the maximum call frame size.
 177  //
 178  // The frame size often changes multiple times in a function, for example the
 179  // frame size may be adjusted in the prologue and epilogue. Each frameInfoLine
 180  // may contain such a change.
 181  func (fi *frameInfo) exec(bytecode []byte) ([]frameInfoLine, error) {
 182  	var entries []frameInfoLine
 183  	r := bytes.NewBuffer(bytecode)
 184  	for {
 185  		op, err := r.ReadByte()
 186  		if err != nil {
 187  			if err == io.EOF {
 188  				entries = append(entries, fi.newLine())
 189  				return entries, nil
 190  			}
 191  			return nil, err
 192  		}
 193  		// For details on the various opcodes, see:
 194  		// http://dwarfstd.org/doc/DWARF5.pdf (page 239)
 195  		highBits := op >> 6 // high order 2 bits
 196  		lowBits := op & 0x1f
 197  		switch highBits {
 198  		case 1: // DW_CFA_advance_loc
 199  			fi.loc += uint64(lowBits) * fi.cie.codeAlignmentFactor
 200  			entries = append(entries, fi.newLine())
 201  		case 2: // DW_CFA_offset
 202  			// This indicates where a register is saved on the stack in the
 203  			// prologue. We can ignore that for our purposes.
 204  			_, err := readULEB128(r)
 205  			if err != nil {
 206  				return nil, err
 207  			}
 208  		case 3: // DW_CFA_restore
 209  			// Restore a register. Used after an outlined function call.
 210  			// It should be possible to ignore this.
 211  			// TODO: check that this is not the stack pointer.
 212  		case 0:
 213  			switch lowBits {
 214  			case 0: // DW_CFA_nop
 215  				// no operation
 216  			case 0x02: // DW_CFA_advance_loc1
 217  				// Very similar to DW_CFA_advance_loc but allows for a slightly
 218  				// larger range.
 219  				offset, err := r.ReadByte()
 220  				if err != nil {
 221  					return nil, err
 222  				}
 223  				fi.loc += uint64(offset) * fi.cie.codeAlignmentFactor
 224  				entries = append(entries, fi.newLine())
 225  			case 0x03: // DW_CFA_advance_loc2
 226  				var offset uint16
 227  				err := binary.Read(r, binary.LittleEndian, &offset)
 228  				if err != nil {
 229  					return nil, err
 230  				}
 231  				fi.loc += uint64(offset) * fi.cie.codeAlignmentFactor
 232  				entries = append(entries, fi.newLine())
 233  			case 0x04: // DW_CFA_advance_loc4
 234  				var offset uint32
 235  				err := binary.Read(r, binary.LittleEndian, &offset)
 236  				if err != nil {
 237  					return nil, err
 238  				}
 239  				fi.loc += uint64(offset) * fi.cie.codeAlignmentFactor
 240  				entries = append(entries, fi.newLine())
 241  			case 0x05: // DW_CFA_offset_extended
 242  				// Semantics are the same as DW_CFA_offset, but the encoding is
 243  				// different. Ignore it just like DW_CFA_offset.
 244  				_, err := readULEB128(r) // ULEB128 register
 245  				if err != nil {
 246  					return nil, err
 247  				}
 248  				_, err = readULEB128(r) // ULEB128 offset
 249  				if err != nil {
 250  					return nil, err
 251  				}
 252  			case 0x07: // DW_CFA_undefined
 253  				// Marks a single register as undefined. This is used to stop
 254  				// unwinding in moxie_startTask using:
 255  				//     .cfi_undefined lr
 256  				// Ignore this directive.
 257  				_, err := readULEB128(r)
 258  				if err != nil {
 259  					return nil, err
 260  				}
 261  			case 0x09: // DW_CFA_register
 262  				// Copies a register. Emitted by the machine outliner, for example.
 263  				// It should be possible to ignore this.
 264  				// TODO: check that the stack pointer is not affected.
 265  				_, err := readULEB128(r)
 266  				if err != nil {
 267  					return nil, err
 268  				}
 269  				_, err = readULEB128(r)
 270  				if err != nil {
 271  					return nil, err
 272  				}
 273  			case 0x0c: // DW_CFA_def_cfa
 274  				register, err := readULEB128(r)
 275  				if err != nil {
 276  					return nil, err
 277  				}
 278  				offset, err := readULEB128(r)
 279  				if err != nil {
 280  					return nil, err
 281  				}
 282  				fi.cfaRegister = register
 283  				fi.cfaOffset = offset
 284  			case 0x0e: // DW_CFA_def_cfa_offset
 285  				offset, err := readULEB128(r)
 286  				if err != nil {
 287  					return nil, err
 288  				}
 289  				fi.cfaOffset = offset
 290  			default:
 291  				return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x (for address 0x%x)", op, fi.loc)
 292  			}
 293  		default:
 294  			return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x (for address 0x%x)", op, fi.loc)
 295  		}
 296  	}
 297  }
 298  
 299  // Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer
 300  func readULEB128(r *bytes.Buffer) (result uint64, err error) {
 301  	// TODO: guard against overflowing 64-bit integers.
 302  	var shift uint8
 303  	for {
 304  		b, err := r.ReadByte()
 305  		if err != nil {
 306  			return 0, err
 307  		}
 308  		result |= uint64(b&0x7f) << shift
 309  		if b&0x80 == 0 {
 310  			break
 311  		}
 312  		shift += 7
 313  	}
 314  	return
 315  }
 316  
 317  // Source: https://en.wikipedia.org/wiki/LEB128#Decode_signed_integer
 318  func readSLEB128(r *bytes.Buffer) (result int64, err error) {
 319  	var shift uint8
 320  
 321  	var b byte
 322  	var rawResult uint64
 323  	for {
 324  		b, err = r.ReadByte()
 325  		if err != nil {
 326  			return 0, err
 327  		}
 328  		rawResult |= uint64(b&0x7f) << shift
 329  		shift += 7
 330  		if b&0x80 == 0 {
 331  			break
 332  		}
 333  	}
 334  
 335  	// sign bit of byte is second high order bit (0x40)
 336  	if shift < 64 && b&0x40 != 0 {
 337  		// sign extend
 338  		rawResult |= ^uint64(0) << shift
 339  	}
 340  	result = int64(rawResult)
 341  
 342  	return
 343  }
 344