1 package interp
2 3 // This file compiles the LLVM IR to a form that's easy to efficiently
4 // interpret.
5 6 import (
7 "strings"
8 9 "tinygo.org/x/go-llvm"
10 )
11 12 // A function is a compiled LLVM function, which means that interpreting it
13 // avoids most CGo calls necessary. This is done in a separate step so the
14 // result can be cached.
15 // Functions are in SSA form, just like the LLVM version if it. The first block
16 // (blocks[0]) is the entry block.
17 type function struct {
18 llvmFn llvm.Value
19 name string // precalculated llvmFn.Name()
20 params []llvm.Value // precalculated llvmFn.Params()
21 blocks []*basicBlock
22 locals map[llvm.Value]int
23 }
24 25 // basicBlock represents a LLVM basic block and contains a slice of
26 // instructions. The last instruction must be a terminator instruction.
27 type basicBlock struct {
28 phiNodes []instruction
29 instructions []instruction
30 }
31 32 // instruction is a precompiled LLVM IR instruction. The operands can be either
33 // an already known value (such as literalValue or pointerValue) but can also be
34 // the special localValue, which means that the value is a function parameter or
35 // is produced by another instruction in the function. In that case, the
36 // interpreter will replace the operand with that local value.
37 type instruction struct {
38 opcode llvm.Opcode
39 localIndex int
40 operands []value
41 llvmInst llvm.Value
42 name string
43 }
44 45 // String returns a nice human-readable version of this instruction.
46 func (inst *instruction) String() string {
47 operands := make([]string, len(inst.operands))
48 for i, op := range inst.operands {
49 operands[i] = op.String()
50 }
51 52 name := ""
53 if int(inst.opcode) < len(instructionNameMap) {
54 name = instructionNameMap[inst.opcode]
55 }
56 if name == "" {
57 name = "<unknown op>"
58 }
59 return name + " " + strings.Join(operands, " ")
60 }
61 62 // compileFunction compiles a given LLVM function to an easier to interpret
63 // version of the function. As far as possible, all operands are preprocessed so
64 // that the interpreter doesn't have to call into LLVM.
65 func (r *runner) compileFunction(llvmFn llvm.Value) *function {
66 fn := &function{
67 llvmFn: llvmFn,
68 name: llvmFn.Name(),
69 params: llvmFn.Params(),
70 locals: make(map[llvm.Value]int),
71 }
72 if llvmFn.IsDeclaration() {
73 // Nothing to do.
74 return fn
75 }
76 77 for i, param := range fn.params {
78 fn.locals[param] = i
79 }
80 81 // Make a map of all the blocks, to quickly find the block number for a
82 // given branch instruction.
83 blockIndices := make(map[llvm.Value]int)
84 for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) {
85 index := len(blockIndices)
86 blockIndices[llvmBB.AsValue()] = index
87 }
88 89 // Compile every block.
90 for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) {
91 bb := &basicBlock{}
92 fn.blocks = append(fn.blocks, bb)
93 94 // Compile every instruction in the block.
95 for llvmInst := llvmBB.FirstInstruction(); !llvmInst.IsNil(); llvmInst = llvm.NextInstruction(llvmInst) {
96 // Create instruction skeleton.
97 opcode := llvmInst.InstructionOpcode()
98 inst := instruction{
99 opcode: opcode,
100 localIndex: len(fn.locals),
101 llvmInst: llvmInst,
102 }
103 fn.locals[llvmInst] = len(fn.locals)
104 105 // Add operands specific for this instruction.
106 switch opcode {
107 case llvm.Ret:
108 // Return instruction, which can either be a `ret void` (no
109 // return value) or return a value.
110 numOperands := llvmInst.OperandsCount()
111 if numOperands != 0 {
112 inst.operands = []value{
113 r.getValue(llvmInst.Operand(0)),
114 }
115 }
116 case llvm.Br:
117 // Branch instruction. Can be either a conditional branch (with
118 // 3 operands) or unconditional branch (with just one basic
119 // block operand).
120 numOperands := llvmInst.OperandsCount()
121 switch numOperands {
122 case 3:
123 // Conditional jump to one of two blocks. Comparable to an
124 // if/else in procedural languages.
125 thenBB := llvmInst.Operand(2)
126 elseBB := llvmInst.Operand(1)
127 inst.operands = []value{
128 r.getValue(llvmInst.Operand(0)),
129 literalValue{uint32(blockIndices[thenBB])},
130 literalValue{uint32(blockIndices[elseBB])},
131 }
132 case 1:
133 // Unconditional jump to a target basic block. Comparable to
134 // a jump in C and Go.
135 jumpBB := llvmInst.Operand(0)
136 inst.operands = []value{
137 literalValue{uint32(blockIndices[jumpBB])},
138 }
139 default:
140 panic("unknown number of operands")
141 }
142 case llvm.Switch:
143 // A switch is an array of (value, label) pairs, of which the
144 // first one indicates the to-switch value and the default
145 // label.
146 numOperands := llvmInst.OperandsCount()
147 for i := 0; i < numOperands; i += 2 {
148 inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i)))
149 inst.operands = append(inst.operands, literalValue{uint32(blockIndices[llvmInst.Operand(i+1)])})
150 }
151 case llvm.PHI:
152 inst.name = llvmInst.Name()
153 incomingCount := inst.llvmInst.IncomingCount()
154 for i := 0; i < incomingCount; i++ {
155 incomingBB := inst.llvmInst.IncomingBlock(i)
156 incomingValue := inst.llvmInst.IncomingValue(i)
157 inst.operands = append(inst.operands,
158 literalValue{uint32(blockIndices[incomingBB.AsValue()])},
159 r.getValue(incomingValue),
160 )
161 }
162 case llvm.Select:
163 // Select is a special instruction that is much like a ternary
164 // operator. It produces operand 1 or 2 based on the boolean
165 // that is operand 0.
166 inst.name = llvmInst.Name()
167 inst.operands = []value{
168 r.getValue(llvmInst.Operand(0)),
169 r.getValue(llvmInst.Operand(1)),
170 r.getValue(llvmInst.Operand(2)),
171 }
172 case llvm.Call:
173 // Call is a regular function call but could also be a runtime
174 // intrinsic. Some runtime intrinsics are treated specially by
175 // the interpreter, such as runtime.alloc. We don't
176 // differentiate between them here because these calls may also
177 // need to be run at runtime, in which case they should all be
178 // created in the same way.
179 llvmCalledValue := llvmInst.CalledValue()
180 if !llvmCalledValue.IsAFunction().IsNil() {
181 name := llvmCalledValue.Name()
182 if name == "llvm.dbg.value" || strings.HasPrefix(name, "llvm.lifetime.") {
183 // These intrinsics should not be interpreted, they are not
184 // relevant to the execution of this function.
185 continue
186 }
187 }
188 inst.name = llvmInst.Name()
189 numOperands := llvmInst.OperandsCount()
190 inst.operands = append(inst.operands, r.getValue(llvmCalledValue))
191 for i := 0; i < numOperands-1; i++ {
192 inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i)))
193 }
194 case llvm.Load:
195 // Load instruction. The interpreter will load from the
196 // appropriate memory view.
197 // Also provide the memory size to be loaded, which is necessary
198 // with a lack of type information.
199 inst.name = llvmInst.Name()
200 inst.operands = []value{
201 r.getValue(llvmInst.Operand(0)),
202 literalValue{r.targetData.TypeAllocSize(llvmInst.Type())},
203 }
204 case llvm.Store:
205 // Store instruction. The interpreter will create a new object
206 // in the memory view of the function invocation and store to
207 // that, to make it possible to roll back this store.
208 inst.operands = []value{
209 r.getValue(llvmInst.Operand(0)),
210 r.getValue(llvmInst.Operand(1)),
211 }
212 case llvm.Alloca:
213 // Alloca allocates stack space for local variables.
214 numElements := r.getValue(inst.llvmInst.Operand(0)).(literalValue).value.(uint32)
215 elementSize := r.targetData.TypeAllocSize(inst.llvmInst.AllocatedType())
216 inst.operands = []value{
217 literalValue{elementSize * uint64(numElements)},
218 }
219 case llvm.GetElementPtr:
220 // GetElementPtr does pointer arithmetic.
221 inst.name = llvmInst.Name()
222 ptr := llvmInst.Operand(0)
223 n := llvmInst.OperandsCount()
224 elementType := llvmInst.GEPSourceElementType()
225 // gep: [source ptr, dest value size, pairs of indices...]
226 inst.operands = []value{
227 r.getValue(ptr),
228 r.getValue(llvmInst.Operand(1)),
229 literalValue{r.targetData.TypeAllocSize(elementType)},
230 }
231 for i := 2; i < n; i++ {
232 operand := r.getValue(llvmInst.Operand(i))
233 switch elementType.TypeKind() {
234 case llvm.StructTypeKind:
235 index := operand.(literalValue).value.(uint32)
236 elementOffset := r.targetData.ElementOffset(elementType, int(index))
237 // Encode operands in a special way. The elementOffset
238 // is just the offset in bytes. The elementSize is a
239 // negative number (when cast to a int64) by flipping
240 // all the bits. This allows the interpreter to detect
241 // this is a struct field and that it should not
242 // multiply it with the elementOffset to get the offset.
243 // It is important for the interpreter to know the
244 // struct field index for when the GEP must be done at
245 // runtime.
246 inst.operands = append(inst.operands, literalValue{elementOffset}, literalValue{^uint64(index)})
247 elementType = elementType.StructElementTypes()[index]
248 case llvm.ArrayTypeKind:
249 elementType = elementType.ElementType()
250 elementSize := r.targetData.TypeAllocSize(elementType)
251 elementSizeOperand := literalValue{elementSize}
252 // Add operand * elementSizeOperand bytes to the pointer.
253 inst.operands = append(inst.operands, operand, elementSizeOperand)
254 default:
255 // This should be unreachable.
256 panic("unknown type: " + elementType.String())
257 }
258 }
259 case llvm.BitCast, llvm.IntToPtr, llvm.PtrToInt:
260 // Bitcasts are usually used to cast a pointer from one type to
261 // another leaving the pointer itself intact.
262 inst.name = llvmInst.Name()
263 inst.operands = []value{
264 r.getValue(llvmInst.Operand(0)),
265 }
266 case llvm.ExtractValue:
267 inst.name = llvmInst.Name()
268 agg := llvmInst.Operand(0)
269 var offset uint64
270 indexingType := agg.Type()
271 for _, index := range inst.llvmInst.Indices() {
272 switch indexingType.TypeKind() {
273 case llvm.StructTypeKind:
274 offset += r.targetData.ElementOffset(indexingType, int(index))
275 indexingType = indexingType.StructElementTypes()[index]
276 case llvm.ArrayTypeKind:
277 indexingType = indexingType.ElementType()
278 elementSize := r.targetData.TypeAllocSize(indexingType)
279 offset += elementSize * uint64(index)
280 default:
281 panic("unknown type kind") // unreachable
282 }
283 }
284 size := r.targetData.TypeAllocSize(inst.llvmInst.Type())
285 // extractvalue [agg, byteOffset, byteSize]
286 inst.operands = []value{
287 r.getValue(agg),
288 literalValue{offset},
289 literalValue{size},
290 }
291 case llvm.InsertValue:
292 inst.name = llvmInst.Name()
293 agg := llvmInst.Operand(0)
294 var offset uint64
295 indexingType := agg.Type()
296 for _, index := range inst.llvmInst.Indices() {
297 switch indexingType.TypeKind() {
298 case llvm.StructTypeKind:
299 offset += r.targetData.ElementOffset(indexingType, int(index))
300 indexingType = indexingType.StructElementTypes()[index]
301 case llvm.ArrayTypeKind:
302 indexingType = indexingType.ElementType()
303 elementSize := r.targetData.TypeAllocSize(indexingType)
304 offset += elementSize * uint64(index)
305 default:
306 panic("unknown type kind") // unreachable
307 }
308 }
309 // insertvalue [agg, elt, byteOffset]
310 inst.operands = []value{
311 r.getValue(agg),
312 r.getValue(llvmInst.Operand(1)),
313 literalValue{offset},
314 }
315 case llvm.ICmp:
316 inst.name = llvmInst.Name()
317 inst.operands = []value{
318 r.getValue(llvmInst.Operand(0)),
319 r.getValue(llvmInst.Operand(1)),
320 literalValue{uint8(llvmInst.IntPredicate())},
321 }
322 case llvm.FCmp:
323 inst.name = llvmInst.Name()
324 inst.operands = []value{
325 r.getValue(llvmInst.Operand(0)),
326 r.getValue(llvmInst.Operand(1)),
327 literalValue{uint8(llvmInst.FloatPredicate())},
328 }
329 case llvm.Add, llvm.Sub, llvm.Mul, llvm.UDiv, llvm.SDiv, llvm.URem, llvm.SRem, llvm.Shl, llvm.LShr, llvm.AShr, llvm.And, llvm.Or, llvm.Xor:
330 // Integer binary operations.
331 inst.name = llvmInst.Name()
332 inst.operands = []value{
333 r.getValue(llvmInst.Operand(0)),
334 r.getValue(llvmInst.Operand(1)),
335 }
336 case llvm.SExt, llvm.ZExt, llvm.Trunc:
337 // Extend or shrink an integer size.
338 // No sign extension going on so easy to do.
339 // zext: [value, bitwidth]
340 // trunc: [value, bitwidth]
341 inst.name = llvmInst.Name()
342 inst.operands = []value{
343 r.getValue(llvmInst.Operand(0)),
344 literalValue{uint64(llvmInst.Type().IntTypeWidth())},
345 }
346 case llvm.SIToFP, llvm.UIToFP:
347 // Convert an integer to a floating point instruction.
348 // opcode: [value, bitwidth]
349 inst.name = llvmInst.Name()
350 inst.operands = []value{
351 r.getValue(llvmInst.Operand(0)),
352 literalValue{uint64(r.targetData.TypeAllocSize(llvmInst.Type()) * 8)},
353 }
354 default:
355 // Unknown instruction, which is already set in inst.opcode so
356 // is detectable.
357 // This error is handled when actually trying to interpret this
358 // instruction (to not trigger on code that won't be executed).
359 }
360 if inst.opcode == llvm.PHI {
361 // PHI nodes need to be treated specially, see the comment in
362 // interpreter.go for an explanation.
363 bb.phiNodes = append(bb.phiNodes, inst)
364 } else {
365 bb.instructions = append(bb.instructions, inst)
366 }
367 }
368 }
369 return fn
370 }
371 372 // instructionNameMap maps from instruction opcodes to instruction names. This
373 // can be useful for debug logging.
374 var instructionNameMap = [...]string{
375 llvm.Ret: "ret",
376 llvm.Br: "br",
377 llvm.Switch: "switch",
378 llvm.IndirectBr: "indirectbr",
379 llvm.Invoke: "invoke",
380 llvm.Unreachable: "unreachable",
381 382 // Standard Binary Operators
383 llvm.Add: "add",
384 llvm.FAdd: "fadd",
385 llvm.Sub: "sub",
386 llvm.FSub: "fsub",
387 llvm.Mul: "mul",
388 llvm.FMul: "fmul",
389 llvm.UDiv: "udiv",
390 llvm.SDiv: "sdiv",
391 llvm.FDiv: "fdiv",
392 llvm.URem: "urem",
393 llvm.SRem: "srem",
394 llvm.FRem: "frem",
395 396 // Logical Operators
397 llvm.Shl: "shl",
398 llvm.LShr: "lshr",
399 llvm.AShr: "ashr",
400 llvm.And: "and",
401 llvm.Or: "or",
402 llvm.Xor: "xor",
403 404 // Memory Operators
405 llvm.Alloca: "alloca",
406 llvm.Load: "load",
407 llvm.Store: "store",
408 llvm.GetElementPtr: "getelementptr",
409 410 // Cast Operators
411 llvm.Trunc: "trunc",
412 llvm.ZExt: "zext",
413 llvm.SExt: "sext",
414 llvm.FPToUI: "fptoui",
415 llvm.FPToSI: "fptosi",
416 llvm.UIToFP: "uitofp",
417 llvm.SIToFP: "sitofp",
418 llvm.FPTrunc: "fptrunc",
419 llvm.FPExt: "fpext",
420 llvm.PtrToInt: "ptrtoint",
421 llvm.IntToPtr: "inttoptr",
422 llvm.BitCast: "bitcast",
423 424 // Other Operators
425 llvm.ICmp: "icmp",
426 llvm.FCmp: "fcmp",
427 llvm.PHI: "phi",
428 llvm.Call: "call",
429 llvm.Select: "select",
430 llvm.VAArg: "vaarg",
431 llvm.ExtractElement: "extractelement",
432 llvm.InsertElement: "insertelement",
433 llvm.ShuffleVector: "shufflevector",
434 llvm.ExtractValue: "extractvalue",
435 llvm.InsertValue: "insertvalue",
436 }
437