add-cfi.i386.awk raw

   1  # Insert GAS CFI directives ("control frame information") into x86-32 asm input
   2  #
   3  # CFI directives tell the assembler how to generate "stack frame" debug info
   4  # This information can tell a debugger (like gdb) how to find the current stack
   5  #   frame at any point in the program code, and how to find the values which
   6  #   various registers had at higher points in the call stack
   7  # With this information, the debugger can show a backtrace, and you can move up
   8  #   and down the call stack and examine the values of local variables
   9  
  10  BEGIN {
  11    # don't put CFI data in the .eh_frame ELF section (which we don't keep)
  12    print ".cfi_sections .debug_frame"
  13  
  14    # only emit CFI directives inside a function
  15    in_function = 0
  16  
  17    # emit .loc directives with line numbers from original source
  18    printf ".file 1 \"%s\"\n", ARGV[1]
  19    line_number = 0
  20  
  21    # used to detect "call label; label:" trick
  22    called = ""
  23  }
  24  
  25  function get_const1() {
  26    # for instructions with 2 operands, get 1st operand (assuming it is constant)
  27    match($0, /-?(0x[0-9a-fA-F]+|[0-9]+),/)
  28    return parse_const(substr($0, RSTART, RLENGTH-1))
  29  }
  30  
  31  function canonicalize_reg(register) {
  32    if (match(register, /^e/))
  33      return register
  34    else if (match(register, /[hl]$/)) # AH, AL, BH, BL, etc
  35      return "e" substr(register, 1, 1) "x"
  36    else # AX, BX, CX, etc
  37      return "e" register
  38  }
  39  function get_reg() {
  40    # only use if you already know there is 1 and only 1 register
  41    match($0, /%e?([abcd][hlx]|si|di|bp)/)
  42    return canonicalize_reg(substr($0, RSTART+1, RLENGTH-1))
  43  }
  44  function get_reg1() {
  45    # for instructions with 2 operands, get 1st operand (assuming it is register)
  46    match($0, /%e?([abcd][hlx]|si|di|bp),/)
  47    return canonicalize_reg(substr($0, RSTART+1, RLENGTH-2))
  48  }
  49  function get_reg2() {
  50    # for instructions with 2 operands, get 2nd operand (assuming it is register)
  51    match($0, /,%e?([abcd][hlx]|si|di|bp)/)
  52    return canonicalize_reg(substr($0, RSTART+2, RLENGTH-2))
  53  }
  54  
  55  function adjust_sp_offset(delta) {
  56    if (in_function)
  57      printf ".cfi_adjust_cfa_offset %d\n", delta
  58  }
  59  
  60  {
  61    line_number = line_number + 1
  62  
  63    # clean the input up before doing anything else
  64    # delete comments
  65    gsub(/(#|\/\/).*/, "")
  66  
  67    # canonicalize whitespace
  68    gsub(/[ \t]+/, " ") # mawk doesn't understand \s
  69    gsub(/ *, */, ",")
  70    gsub(/ *: */, ": ")
  71    gsub(/ $/, "")
  72    gsub(/^ /, "")
  73  }
  74  
  75  # check for assembler directives which we care about
  76  /^\.(section|data|text)/ {
  77    # a .cfi_startproc/.cfi_endproc pair should be within the same section
  78    # otherwise, clang will choke when generating ELF output
  79    if (in_function) {
  80      print ".cfi_endproc"
  81      in_function = 0
  82    }
  83  }
  84  /^\.type [a-zA-Z0-9_]+,@function/ {
  85    functions[substr($2, 1, length($2)-10)] = 1
  86  }
  87  # not interested in assembler directives beyond this, just pass them through
  88  /^\./ {
  89    print
  90    next
  91  }
  92  
  93  /^[a-zA-Z0-9_]+:/ {
  94    label = substr($1, 1, length($1)-1) # drop trailing :
  95  
  96    if (called == label) {
  97      # note adjustment of stack pointer from "call label; label:"
  98      adjust_sp_offset(4)
  99    }
 100  
 101    if (functions[label]) {
 102      if (in_function)
 103        print ".cfi_endproc"
 104  
 105      in_function = 1
 106      print ".cfi_startproc"
 107  
 108      for (register in saved)
 109        delete saved[register]
 110      for (register in dirty)
 111        delete dirty[register]
 112    }
 113  
 114    # an instruction may follow on the same line, so continue processing
 115  }
 116  
 117  /^$/ { next }
 118  
 119  {
 120    called = ""
 121    printf ".loc 1 %d\n", line_number
 122    print
 123  }
 124  
 125  # KEEPING UP WITH THE STACK POINTER
 126  # We do NOT attempt to understand foolish and ridiculous tricks like stashing
 127  #   the stack pointer and then using %esp as a scratch register, or bitshifting
 128  #   it or taking its square root or anything stupid like that.
 129  # %esp should only be adjusted by pushing/popping or adding/subtracting constants
 130  #
 131  /pushl?/ {
 132    if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/))
 133      adjust_sp_offset(2)
 134    else
 135      adjust_sp_offset(4)
 136  }
 137  /popl?/ {
 138    if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/))
 139      adjust_sp_offset(-2)
 140    else
 141      adjust_sp_offset(-4)
 142  }
 143  /addl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(-get_const1()) }
 144  /subl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(get_const1()) }
 145  
 146  /call/ {
 147    if (match($0, /call [0-9]+f/)) # "forward" label
 148      called = substr($0, RSTART+5, RLENGTH-6)
 149    else if (match($0, /call [0-9a-zA-Z_]+/))
 150      called = substr($0, RSTART+5, RLENGTH-5)
 151  }
 152  
 153  # TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME
 154  #
 155  /pushl? %e(ax|bx|cx|dx|si|di|bp)/ { # don't match "push (%reg)"
 156    # if a register is being pushed, and its value has not changed since the
 157    #   beginning of this function, the pushed value can be used when printing
 158    #   local variables at the next level up the stack
 159    # emit '.cfi_rel_offset' for that
 160  
 161    if (in_function) {
 162      register = get_reg()
 163      if (!saved[register] && !dirty[register]) {
 164        printf ".cfi_rel_offset %s,0\n", register
 165        saved[register] = 1
 166      }
 167    }
 168  }
 169  
 170  /movl? %e(ax|bx|cx|dx|si|di|bp),-?(0x[0-9a-fA-F]+|[0-9]+)?\(%esp\)/ {
 171    if (in_function) {
 172      register = get_reg()
 173      if (match($0, /-?(0x[0-9a-fA-F]+|[0-9]+)\(%esp\)/)) {
 174        offset = parse_const(substr($0, RSTART, RLENGTH-6))
 175      } else {
 176        offset = 0
 177      }
 178      if (!saved[register] && !dirty[register]) {
 179        printf ".cfi_rel_offset %s,%d\n", register, offset
 180        saved[register] = 1
 181      }
 182    }
 183  }
 184  
 185  # IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED
 186  # ...then we want to know about it.
 187  #
 188  function trashed(register) {
 189    if (in_function && !saved[register] && !dirty[register]) {
 190      printf ".cfi_undefined %s\n", register
 191    }
 192    dirty[register] = 1
 193  }
 194  # this does NOT exhaustively check for all possible instructions which could
 195  # overwrite a register value inherited from the caller (just the common ones)
 196  /mov.*,%e?([abcd][hlx]|si|di|bp)$/  { trashed(get_reg2()) }
 197  /(add|addl|sub|subl|and|or|xor|lea|sal|sar|shl|shr).*,%e?([abcd][hlx]|si|di|bp)$/ {
 198    trashed(get_reg2())
 199  }
 200  /^i?mul [^,]*$/                      { trashed("eax"); trashed("edx") }
 201  /^i?mul.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) }
 202  /^i?div/                             { trashed("eax"); trashed("edx") }
 203  /(dec|inc|not|neg|pop) %e?([abcd][hlx]|si|di|bp)/  { trashed(get_reg()) }
 204  /cpuid/ { trashed("eax"); trashed("ebx"); trashed("ecx"); trashed("edx") }
 205  
 206  END {
 207    if (in_function)
 208      print ".cfi_endproc"
 209  }
 210