compiler.go raw

   1  package interp
   2  
   3  // This file compiles the LLVM IR to a form that's easy to efficiently
   4  // interpret.
   5  
   6  import (
   7  	"strings"
   8  
   9  	"tinygo.org/x/go-llvm"
  10  )
  11  
  12  // A function is a compiled LLVM function, which means that interpreting it
  13  // avoids most CGo calls necessary. This is done in a separate step so the
  14  // result can be cached.
  15  // Functions are in SSA form, just like the LLVM version if it. The first block
  16  // (blocks[0]) is the entry block.
  17  type function struct {
  18  	llvmFn llvm.Value
  19  	name   string       // precalculated llvmFn.Name()
  20  	params []llvm.Value // precalculated llvmFn.Params()
  21  	blocks []*basicBlock
  22  	locals map[llvm.Value]int
  23  }
  24  
  25  // basicBlock represents a LLVM basic block and contains a slice of
  26  // instructions. The last instruction must be a terminator instruction.
  27  type basicBlock struct {
  28  	phiNodes     []instruction
  29  	instructions []instruction
  30  }
  31  
  32  // instruction is a precompiled LLVM IR instruction. The operands can be either
  33  // an already known value (such as literalValue or pointerValue) but can also be
  34  // the special localValue, which means that the value is a function parameter or
  35  // is produced by another instruction in the function. In that case, the
  36  // interpreter will replace the operand with that local value.
  37  type instruction struct {
  38  	opcode     llvm.Opcode
  39  	localIndex int
  40  	operands   []value
  41  	llvmInst   llvm.Value
  42  	name       string
  43  }
  44  
  45  // String returns a nice human-readable version of this instruction.
  46  func (inst *instruction) String() string {
  47  	operands := make([]string, len(inst.operands))
  48  	for i, op := range inst.operands {
  49  		operands[i] = op.String()
  50  	}
  51  
  52  	name := ""
  53  	if int(inst.opcode) < len(instructionNameMap) {
  54  		name = instructionNameMap[inst.opcode]
  55  	}
  56  	if name == "" {
  57  		name = "<unknown op>"
  58  	}
  59  	return name + " " + strings.Join(operands, " ")
  60  }
  61  
  62  // compileFunction compiles a given LLVM function to an easier to interpret
  63  // version of the function. As far as possible, all operands are preprocessed so
  64  // that the interpreter doesn't have to call into LLVM.
  65  func (r *runner) compileFunction(llvmFn llvm.Value) *function {
  66  	fn := &function{
  67  		llvmFn: llvmFn,
  68  		name:   llvmFn.Name(),
  69  		params: llvmFn.Params(),
  70  		locals: make(map[llvm.Value]int),
  71  	}
  72  	if llvmFn.IsDeclaration() {
  73  		// Nothing to do.
  74  		return fn
  75  	}
  76  
  77  	for i, param := range fn.params {
  78  		fn.locals[param] = i
  79  	}
  80  
  81  	// Make a map of all the blocks, to quickly find the block number for a
  82  	// given branch instruction.
  83  	blockIndices := make(map[llvm.Value]int)
  84  	for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) {
  85  		index := len(blockIndices)
  86  		blockIndices[llvmBB.AsValue()] = index
  87  	}
  88  
  89  	// Compile every block.
  90  	for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) {
  91  		bb := &basicBlock{}
  92  		fn.blocks = append(fn.blocks, bb)
  93  
  94  		// Compile every instruction in the block.
  95  		for llvmInst := llvmBB.FirstInstruction(); !llvmInst.IsNil(); llvmInst = llvm.NextInstruction(llvmInst) {
  96  			// Create instruction skeleton.
  97  			opcode := llvmInst.InstructionOpcode()
  98  			inst := instruction{
  99  				opcode:     opcode,
 100  				localIndex: len(fn.locals),
 101  				llvmInst:   llvmInst,
 102  			}
 103  			fn.locals[llvmInst] = len(fn.locals)
 104  
 105  			// Add operands specific for this instruction.
 106  			switch opcode {
 107  			case llvm.Ret:
 108  				// Return instruction, which can either be a `ret void` (no
 109  				// return value) or return a value.
 110  				numOperands := llvmInst.OperandsCount()
 111  				if numOperands != 0 {
 112  					inst.operands = []value{
 113  						r.getValue(llvmInst.Operand(0)),
 114  					}
 115  				}
 116  			case llvm.Br:
 117  				// Branch instruction. Can be either a conditional branch (with
 118  				// 3 operands) or unconditional branch (with just one basic
 119  				// block operand).
 120  				numOperands := llvmInst.OperandsCount()
 121  				switch numOperands {
 122  				case 3:
 123  					// Conditional jump to one of two blocks. Comparable to an
 124  					// if/else in procedural languages.
 125  					thenBB := llvmInst.Operand(2)
 126  					elseBB := llvmInst.Operand(1)
 127  					inst.operands = []value{
 128  						r.getValue(llvmInst.Operand(0)),
 129  						literalValue{uint32(blockIndices[thenBB])},
 130  						literalValue{uint32(blockIndices[elseBB])},
 131  					}
 132  				case 1:
 133  					// Unconditional jump to a target basic block. Comparable to
 134  					// a jump in C and Go.
 135  					jumpBB := llvmInst.Operand(0)
 136  					inst.operands = []value{
 137  						literalValue{uint32(blockIndices[jumpBB])},
 138  					}
 139  				default:
 140  					panic("unknown number of operands")
 141  				}
 142  			case llvm.Switch:
 143  				// A switch is an array of (value, label) pairs, of which the
 144  				// first one indicates the to-switch value and the default
 145  				// label.
 146  				numOperands := llvmInst.OperandsCount()
 147  				for i := 0; i < numOperands; i += 2 {
 148  					inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i)))
 149  					inst.operands = append(inst.operands, literalValue{uint32(blockIndices[llvmInst.Operand(i+1)])})
 150  				}
 151  			case llvm.PHI:
 152  				inst.name = llvmInst.Name()
 153  				incomingCount := inst.llvmInst.IncomingCount()
 154  				for i := 0; i < incomingCount; i++ {
 155  					incomingBB := inst.llvmInst.IncomingBlock(i)
 156  					incomingValue := inst.llvmInst.IncomingValue(i)
 157  					inst.operands = append(inst.operands,
 158  						literalValue{uint32(blockIndices[incomingBB.AsValue()])},
 159  						r.getValue(incomingValue),
 160  					)
 161  				}
 162  			case llvm.Select:
 163  				// Select is a special instruction that is much like a ternary
 164  				// operator. It produces operand 1 or 2 based on the boolean
 165  				// that is operand 0.
 166  				inst.name = llvmInst.Name()
 167  				inst.operands = []value{
 168  					r.getValue(llvmInst.Operand(0)),
 169  					r.getValue(llvmInst.Operand(1)),
 170  					r.getValue(llvmInst.Operand(2)),
 171  				}
 172  			case llvm.Call:
 173  				// Call is a regular function call but could also be a runtime
 174  				// intrinsic. Some runtime intrinsics are treated specially by
 175  				// the interpreter, such as runtime.alloc. We don't
 176  				// differentiate between them here because these calls may also
 177  				// need to be run at runtime, in which case they should all be
 178  				// created in the same way.
 179  				llvmCalledValue := llvmInst.CalledValue()
 180  				if !llvmCalledValue.IsAFunction().IsNil() {
 181  					name := llvmCalledValue.Name()
 182  					if name == "llvm.dbg.value" || strings.HasPrefix(name, "llvm.lifetime.") {
 183  						// These intrinsics should not be interpreted, they are not
 184  						// relevant to the execution of this function.
 185  						continue
 186  					}
 187  				}
 188  				inst.name = llvmInst.Name()
 189  				numOperands := llvmInst.OperandsCount()
 190  				inst.operands = append(inst.operands, r.getValue(llvmCalledValue))
 191  				for i := 0; i < numOperands-1; i++ {
 192  					inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i)))
 193  				}
 194  			case llvm.Load:
 195  				// Load instruction. The interpreter will load from the
 196  				// appropriate memory view.
 197  				// Also provide the memory size to be loaded, which is necessary
 198  				// with a lack of type information.
 199  				inst.name = llvmInst.Name()
 200  				inst.operands = []value{
 201  					r.getValue(llvmInst.Operand(0)),
 202  					literalValue{r.targetData.TypeAllocSize(llvmInst.Type())},
 203  				}
 204  			case llvm.Store:
 205  				// Store instruction. The interpreter will create a new object
 206  				// in the memory view of the function invocation and store to
 207  				// that, to make it possible to roll back this store.
 208  				inst.operands = []value{
 209  					r.getValue(llvmInst.Operand(0)),
 210  					r.getValue(llvmInst.Operand(1)),
 211  				}
 212  			case llvm.Alloca:
 213  				// Alloca allocates stack space for local variables.
 214  				numElements := r.getValue(inst.llvmInst.Operand(0)).(literalValue).value.(uint32)
 215  				elementSize := r.targetData.TypeAllocSize(inst.llvmInst.AllocatedType())
 216  				inst.operands = []value{
 217  					literalValue{elementSize * uint64(numElements)},
 218  				}
 219  			case llvm.GetElementPtr:
 220  				// GetElementPtr does pointer arithmetic.
 221  				inst.name = llvmInst.Name()
 222  				ptr := llvmInst.Operand(0)
 223  				n := llvmInst.OperandsCount()
 224  				elementType := llvmInst.GEPSourceElementType()
 225  				// gep: [source ptr, dest value size, pairs of indices...]
 226  				inst.operands = []value{
 227  					r.getValue(ptr),
 228  					r.getValue(llvmInst.Operand(1)),
 229  					literalValue{r.targetData.TypeAllocSize(elementType)},
 230  				}
 231  				for i := 2; i < n; i++ {
 232  					operand := r.getValue(llvmInst.Operand(i))
 233  					switch elementType.TypeKind() {
 234  					case llvm.StructTypeKind:
 235  						index := operand.(literalValue).value.(uint32)
 236  						elementOffset := r.targetData.ElementOffset(elementType, int(index))
 237  						// Encode operands in a special way. The elementOffset
 238  						// is just the offset in bytes. The elementSize is a
 239  						// negative number (when cast to a int64) by flipping
 240  						// all the bits. This allows the interpreter to detect
 241  						// this is a struct field and that it should not
 242  						// multiply it with the elementOffset to get the offset.
 243  						// It is important for the interpreter to know the
 244  						// struct field index for when the GEP must be done at
 245  						// runtime.
 246  						inst.operands = append(inst.operands, literalValue{elementOffset}, literalValue{^uint64(index)})
 247  						elementType = elementType.StructElementTypes()[index]
 248  					case llvm.ArrayTypeKind:
 249  						elementType = elementType.ElementType()
 250  						elementSize := r.targetData.TypeAllocSize(elementType)
 251  						elementSizeOperand := literalValue{elementSize}
 252  						// Add operand * elementSizeOperand bytes to the pointer.
 253  						inst.operands = append(inst.operands, operand, elementSizeOperand)
 254  					default:
 255  						// This should be unreachable.
 256  						panic("unknown type: " + elementType.String())
 257  					}
 258  				}
 259  			case llvm.BitCast, llvm.IntToPtr, llvm.PtrToInt:
 260  				// Bitcasts are usually used to cast a pointer from one type to
 261  				// another leaving the pointer itself intact.
 262  				inst.name = llvmInst.Name()
 263  				inst.operands = []value{
 264  					r.getValue(llvmInst.Operand(0)),
 265  				}
 266  			case llvm.ExtractValue:
 267  				inst.name = llvmInst.Name()
 268  				agg := llvmInst.Operand(0)
 269  				var offset uint64
 270  				indexingType := agg.Type()
 271  				for _, index := range inst.llvmInst.Indices() {
 272  					switch indexingType.TypeKind() {
 273  					case llvm.StructTypeKind:
 274  						offset += r.targetData.ElementOffset(indexingType, int(index))
 275  						indexingType = indexingType.StructElementTypes()[index]
 276  					case llvm.ArrayTypeKind:
 277  						indexingType = indexingType.ElementType()
 278  						elementSize := r.targetData.TypeAllocSize(indexingType)
 279  						offset += elementSize * uint64(index)
 280  					default:
 281  						panic("unknown type kind") // unreachable
 282  					}
 283  				}
 284  				size := r.targetData.TypeAllocSize(inst.llvmInst.Type())
 285  				// extractvalue [agg, byteOffset, byteSize]
 286  				inst.operands = []value{
 287  					r.getValue(agg),
 288  					literalValue{offset},
 289  					literalValue{size},
 290  				}
 291  			case llvm.InsertValue:
 292  				inst.name = llvmInst.Name()
 293  				agg := llvmInst.Operand(0)
 294  				var offset uint64
 295  				indexingType := agg.Type()
 296  				for _, index := range inst.llvmInst.Indices() {
 297  					switch indexingType.TypeKind() {
 298  					case llvm.StructTypeKind:
 299  						offset += r.targetData.ElementOffset(indexingType, int(index))
 300  						indexingType = indexingType.StructElementTypes()[index]
 301  					case llvm.ArrayTypeKind:
 302  						indexingType = indexingType.ElementType()
 303  						elementSize := r.targetData.TypeAllocSize(indexingType)
 304  						offset += elementSize * uint64(index)
 305  					default:
 306  						panic("unknown type kind") // unreachable
 307  					}
 308  				}
 309  				// insertvalue [agg, elt, byteOffset]
 310  				inst.operands = []value{
 311  					r.getValue(agg),
 312  					r.getValue(llvmInst.Operand(1)),
 313  					literalValue{offset},
 314  				}
 315  			case llvm.ICmp:
 316  				inst.name = llvmInst.Name()
 317  				inst.operands = []value{
 318  					r.getValue(llvmInst.Operand(0)),
 319  					r.getValue(llvmInst.Operand(1)),
 320  					literalValue{uint8(llvmInst.IntPredicate())},
 321  				}
 322  			case llvm.FCmp:
 323  				inst.name = llvmInst.Name()
 324  				inst.operands = []value{
 325  					r.getValue(llvmInst.Operand(0)),
 326  					r.getValue(llvmInst.Operand(1)),
 327  					literalValue{uint8(llvmInst.FloatPredicate())},
 328  				}
 329  			case llvm.Add, llvm.Sub, llvm.Mul, llvm.UDiv, llvm.SDiv, llvm.URem, llvm.SRem, llvm.Shl, llvm.LShr, llvm.AShr, llvm.And, llvm.Or, llvm.Xor:
 330  				// Integer binary operations.
 331  				inst.name = llvmInst.Name()
 332  				inst.operands = []value{
 333  					r.getValue(llvmInst.Operand(0)),
 334  					r.getValue(llvmInst.Operand(1)),
 335  				}
 336  			case llvm.SExt, llvm.ZExt, llvm.Trunc:
 337  				// Extend or shrink an integer size.
 338  				// No sign extension going on so easy to do.
 339  				// zext: [value, bitwidth]
 340  				// trunc: [value, bitwidth]
 341  				inst.name = llvmInst.Name()
 342  				inst.operands = []value{
 343  					r.getValue(llvmInst.Operand(0)),
 344  					literalValue{uint64(llvmInst.Type().IntTypeWidth())},
 345  				}
 346  			case llvm.SIToFP, llvm.UIToFP:
 347  				// Convert an integer to a floating point instruction.
 348  				// opcode: [value, bitwidth]
 349  				inst.name = llvmInst.Name()
 350  				inst.operands = []value{
 351  					r.getValue(llvmInst.Operand(0)),
 352  					literalValue{uint64(r.targetData.TypeAllocSize(llvmInst.Type()) * 8)},
 353  				}
 354  			default:
 355  				// Unknown instruction, which is already set in inst.opcode so
 356  				// is detectable.
 357  				// This error is handled when actually trying to interpret this
 358  				// instruction (to not trigger on code that won't be executed).
 359  			}
 360  			if inst.opcode == llvm.PHI {
 361  				// PHI nodes need to be treated specially, see the comment in
 362  				// interpreter.go for an explanation.
 363  				bb.phiNodes = append(bb.phiNodes, inst)
 364  			} else {
 365  				bb.instructions = append(bb.instructions, inst)
 366  			}
 367  		}
 368  	}
 369  	return fn
 370  }
 371  
 372  // instructionNameMap maps from instruction opcodes to instruction names. This
 373  // can be useful for debug logging.
 374  var instructionNameMap = [...]string{
 375  	llvm.Ret:         "ret",
 376  	llvm.Br:          "br",
 377  	llvm.Switch:      "switch",
 378  	llvm.IndirectBr:  "indirectbr",
 379  	llvm.Invoke:      "invoke",
 380  	llvm.Unreachable: "unreachable",
 381  
 382  	// Standard Binary Operators
 383  	llvm.Add:  "add",
 384  	llvm.FAdd: "fadd",
 385  	llvm.Sub:  "sub",
 386  	llvm.FSub: "fsub",
 387  	llvm.Mul:  "mul",
 388  	llvm.FMul: "fmul",
 389  	llvm.UDiv: "udiv",
 390  	llvm.SDiv: "sdiv",
 391  	llvm.FDiv: "fdiv",
 392  	llvm.URem: "urem",
 393  	llvm.SRem: "srem",
 394  	llvm.FRem: "frem",
 395  
 396  	// Logical Operators
 397  	llvm.Shl:  "shl",
 398  	llvm.LShr: "lshr",
 399  	llvm.AShr: "ashr",
 400  	llvm.And:  "and",
 401  	llvm.Or:   "or",
 402  	llvm.Xor:  "xor",
 403  
 404  	// Memory Operators
 405  	llvm.Alloca:        "alloca",
 406  	llvm.Load:          "load",
 407  	llvm.Store:         "store",
 408  	llvm.GetElementPtr: "getelementptr",
 409  
 410  	// Cast Operators
 411  	llvm.Trunc:    "trunc",
 412  	llvm.ZExt:     "zext",
 413  	llvm.SExt:     "sext",
 414  	llvm.FPToUI:   "fptoui",
 415  	llvm.FPToSI:   "fptosi",
 416  	llvm.UIToFP:   "uitofp",
 417  	llvm.SIToFP:   "sitofp",
 418  	llvm.FPTrunc:  "fptrunc",
 419  	llvm.FPExt:    "fpext",
 420  	llvm.PtrToInt: "ptrtoint",
 421  	llvm.IntToPtr: "inttoptr",
 422  	llvm.BitCast:  "bitcast",
 423  
 424  	// Other Operators
 425  	llvm.ICmp:           "icmp",
 426  	llvm.FCmp:           "fcmp",
 427  	llvm.PHI:            "phi",
 428  	llvm.Call:           "call",
 429  	llvm.Select:         "select",
 430  	llvm.VAArg:          "vaarg",
 431  	llvm.ExtractElement: "extractelement",
 432  	llvm.InsertElement:  "insertelement",
 433  	llvm.ShuffleVector:  "shufflevector",
 434  	llvm.ExtractValue:   "extractvalue",
 435  	llvm.InsertValue:    "insertvalue",
 436  }
 437