inst.go raw
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Package x86asm implements decoding of x86 machine code.
6 package x86asm
7
8 import (
9 "bytes"
10 "fmt"
11 )
12
13 // An Inst is a single instruction.
14 type Inst struct {
15 Prefix Prefixes // Prefixes applied to the instruction.
16 Op Op // Opcode mnemonic
17 Opcode uint32 // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
18 Args Args // Instruction arguments, in Intel order
19 Mode int // processor mode in bits: 16, 32, or 64
20 AddrSize int // address size in bits: 16, 32, or 64
21 DataSize int // operand size in bits: 16, 32, or 64
22 MemBytes int // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
23 Len int // length of encoded instruction in bytes
24 PCRel int // length of PC-relative address in instruction encoding
25 PCRelOff int // index of start of PC-relative address in instruction encoding
26 }
27
28 // Prefixes is an array of prefixes associated with a single instruction.
29 // The prefixes are listed in the same order as found in the instruction:
30 // each prefix byte corresponds to one slot in the array. The first zero
31 // in the array marks the end of the prefixes.
32 type Prefixes [14]Prefix
33
34 // A Prefix represents an Intel instruction prefix.
35 // The low 8 bits are the actual prefix byte encoding,
36 // and the top 8 bits contain distinguishing bits and metadata.
37 type Prefix uint16
38
39 const (
40 // Metadata about the role of a prefix in an instruction.
41 PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
42 PrefixIgnored Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
43 PrefixInvalid Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
44
45 // Memory segment overrides.
46 PrefixES Prefix = 0x26 // ES segment override
47 PrefixCS Prefix = 0x2E // CS segment override
48 PrefixSS Prefix = 0x36 // SS segment override
49 PrefixDS Prefix = 0x3E // DS segment override
50 PrefixFS Prefix = 0x64 // FS segment override
51 PrefixGS Prefix = 0x65 // GS segment override
52
53 // Branch prediction.
54 PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
55 PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
56
57 // Size attributes.
58 PrefixDataSize Prefix = 0x66 // operand size override
59 PrefixData16 Prefix = 0x166
60 PrefixData32 Prefix = 0x266
61 PrefixAddrSize Prefix = 0x67 // address size override
62 PrefixAddr16 Prefix = 0x167
63 PrefixAddr32 Prefix = 0x267
64
65 // One of a kind.
66 PrefixLOCK Prefix = 0xF0 // lock
67 PrefixREPN Prefix = 0xF2 // repeat not zero
68 PrefixXACQUIRE Prefix = 0x1F2
69 PrefixBND Prefix = 0x2F2
70 PrefixREP Prefix = 0xF3 // repeat
71 PrefixXRELEASE Prefix = 0x1F3
72
73 // The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
74 // the other bits are set or not according to the intended use.
75 PrefixREX Prefix = 0x40 // REX 64-bit extension prefix
76 PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width)
77 PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm)
78 PrefixREXX Prefix = 0x02 // extension bit X (index field in sib)
79 PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
80 PrefixVEX2Bytes Prefix = 0xC5 // Short form of vex prefix
81 PrefixVEX3Bytes Prefix = 0xC4 // Long form of vex prefix
82 )
83
84 // IsREX reports whether p is a REX prefix byte.
85 func (p Prefix) IsREX() bool {
86 return p&0xF0 == PrefixREX
87 }
88
89 func (p Prefix) IsVEX() bool {
90 return p&0xFF == PrefixVEX2Bytes || p&0xFF == PrefixVEX3Bytes
91 }
92
93 func (p Prefix) String() string {
94 p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
95 if s := prefixNames[p]; s != "" {
96 return s
97 }
98
99 if p.IsREX() {
100 s := "REX."
101 if p&PrefixREXW != 0 {
102 s += "W"
103 }
104 if p&PrefixREXR != 0 {
105 s += "R"
106 }
107 if p&PrefixREXX != 0 {
108 s += "X"
109 }
110 if p&PrefixREXB != 0 {
111 s += "B"
112 }
113 return s
114 }
115
116 return fmt.Sprintf("Prefix(%#x)", int(p))
117 }
118
119 // An Op is an x86 opcode.
120 type Op uint32
121
122 func (op Op) String() string {
123 i := int(op)
124 if i < 0 || i >= len(opNames) || opNames[i] == "" {
125 return fmt.Sprintf("Op(%d)", i)
126 }
127 return opNames[i]
128 }
129
130 // An Args holds the instruction arguments.
131 // If an instruction has fewer than 4 arguments,
132 // the final elements in the array are nil.
133 type Args [4]Arg
134
135 // An Arg is a single instruction argument,
136 // one of these types: Reg, Mem, Imm, Rel.
137 type Arg interface {
138 String() string
139 isArg()
140 }
141
142 // Note that the implements of Arg that follow are all sized
143 // so that on a 64-bit machine the data can be inlined in
144 // the interface value instead of requiring an allocation.
145
146 // A Reg is a single register.
147 // The zero Reg value has no name but indicates “no register.”
148 type Reg uint8
149
150 const (
151 _ Reg = iota
152
153 // 8-bit
154 AL
155 CL
156 DL
157 BL
158 AH
159 CH
160 DH
161 BH
162 SPB
163 BPB
164 SIB
165 DIB
166 R8B
167 R9B
168 R10B
169 R11B
170 R12B
171 R13B
172 R14B
173 R15B
174
175 // 16-bit
176 AX
177 CX
178 DX
179 BX
180 SP
181 BP
182 SI
183 DI
184 R8W
185 R9W
186 R10W
187 R11W
188 R12W
189 R13W
190 R14W
191 R15W
192
193 // 32-bit
194 EAX
195 ECX
196 EDX
197 EBX
198 ESP
199 EBP
200 ESI
201 EDI
202 R8L
203 R9L
204 R10L
205 R11L
206 R12L
207 R13L
208 R14L
209 R15L
210
211 // 64-bit
212 RAX
213 RCX
214 RDX
215 RBX
216 RSP
217 RBP
218 RSI
219 RDI
220 R8
221 R9
222 R10
223 R11
224 R12
225 R13
226 R14
227 R15
228
229 // Instruction pointer.
230 IP // 16-bit
231 EIP // 32-bit
232 RIP // 64-bit
233
234 // 387 floating point registers.
235 F0
236 F1
237 F2
238 F3
239 F4
240 F5
241 F6
242 F7
243
244 // MMX registers.
245 M0
246 M1
247 M2
248 M3
249 M4
250 M5
251 M6
252 M7
253
254 // XMM registers.
255 X0
256 X1
257 X2
258 X3
259 X4
260 X5
261 X6
262 X7
263 X8
264 X9
265 X10
266 X11
267 X12
268 X13
269 X14
270 X15
271
272 // Segment registers.
273 ES
274 CS
275 SS
276 DS
277 FS
278 GS
279
280 // System registers.
281 GDTR
282 IDTR
283 LDTR
284 MSW
285 TASK
286
287 // Control registers.
288 CR0
289 CR1
290 CR2
291 CR3
292 CR4
293 CR5
294 CR6
295 CR7
296 CR8
297 CR9
298 CR10
299 CR11
300 CR12
301 CR13
302 CR14
303 CR15
304
305 // Debug registers.
306 DR0
307 DR1
308 DR2
309 DR3
310 DR4
311 DR5
312 DR6
313 DR7
314 DR8
315 DR9
316 DR10
317 DR11
318 DR12
319 DR13
320 DR14
321 DR15
322
323 // Task registers.
324 TR0
325 TR1
326 TR2
327 TR3
328 TR4
329 TR5
330 TR6
331 TR7
332 )
333
334 const regMax = TR7
335
336 func (Reg) isArg() {}
337
338 func (r Reg) String() string {
339 i := int(r)
340 if i < 0 || i >= len(regNames) || regNames[i] == "" {
341 return fmt.Sprintf("Reg(%d)", i)
342 }
343 return regNames[i]
344 }
345
346 // A Mem is a memory reference.
347 // The general form is Segment:[Base+Scale*Index+Disp].
348 type Mem struct {
349 Segment Reg
350 Base Reg
351 Scale uint8
352 Index Reg
353 Disp int64
354 }
355
356 func (Mem) isArg() {}
357
358 func (m Mem) String() string {
359 var base, plus, scale, index, disp string
360
361 if m.Base != 0 {
362 base = m.Base.String()
363 }
364 if m.Scale != 0 {
365 if m.Base != 0 {
366 plus = "+"
367 }
368 if m.Scale > 1 {
369 scale = fmt.Sprintf("%d*", m.Scale)
370 }
371 index = m.Index.String()
372 }
373 if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
374 disp = fmt.Sprintf("%+#x", m.Disp)
375 }
376 return "[" + base + plus + scale + index + disp + "]"
377 }
378
379 // A Rel is an offset relative to the current instruction pointer.
380 type Rel int32
381
382 func (Rel) isArg() {}
383
384 func (r Rel) String() string {
385 return fmt.Sprintf(".%+d", r)
386 }
387
388 // An Imm is an integer constant.
389 type Imm int64
390
391 func (Imm) isArg() {}
392
393 func (i Imm) String() string {
394 return fmt.Sprintf("%#x", int64(i))
395 }
396
397 func (i Inst) String() string {
398 var buf bytes.Buffer
399 for _, p := range i.Prefix {
400 if p == 0 {
401 break
402 }
403 if p&PrefixImplicit != 0 {
404 continue
405 }
406 fmt.Fprintf(&buf, "%v ", p)
407 }
408 fmt.Fprintf(&buf, "%v", i.Op)
409 sep := " "
410 for _, v := range i.Args {
411 if v == nil {
412 break
413 }
414 fmt.Fprintf(&buf, "%s%v", sep, v)
415 sep = ", "
416 }
417 return buf.String()
418 }
419
420 func isReg(a Arg) bool {
421 _, ok := a.(Reg)
422 return ok
423 }
424
425 func isSegReg(a Arg) bool {
426 r, ok := a.(Reg)
427 return ok && ES <= r && r <= GS
428 }
429
430 func isMem(a Arg) bool {
431 _, ok := a.(Mem)
432 return ok
433 }
434
435 func isImm(a Arg) bool {
436 _, ok := a.(Imm)
437 return ok
438 }
439
440 func regBytes(a Arg) int {
441 r, ok := a.(Reg)
442 if !ok {
443 return 0
444 }
445 if AL <= r && r <= R15B {
446 return 1
447 }
448 if AX <= r && r <= R15W {
449 return 2
450 }
451 if EAX <= r && r <= R15L {
452 return 4
453 }
454 if RAX <= r && r <= R15 {
455 return 8
456 }
457 return 0
458 }
459
460 func isSegment(p Prefix) bool {
461 switch p {
462 case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
463 return true
464 }
465 return false
466 }
467
468 // The Op definitions and string list are in tables.go.
469
470 var prefixNames = map[Prefix]string{
471 PrefixCS: "CS",
472 PrefixDS: "DS",
473 PrefixES: "ES",
474 PrefixFS: "FS",
475 PrefixGS: "GS",
476 PrefixSS: "SS",
477 PrefixLOCK: "LOCK",
478 PrefixREP: "REP",
479 PrefixREPN: "REPN",
480 PrefixAddrSize: "ADDRSIZE",
481 PrefixDataSize: "DATASIZE",
482 PrefixAddr16: "ADDR16",
483 PrefixData16: "DATA16",
484 PrefixAddr32: "ADDR32",
485 PrefixData32: "DATA32",
486 PrefixBND: "BND",
487 PrefixXACQUIRE: "XACQUIRE",
488 PrefixXRELEASE: "XRELEASE",
489 PrefixREX: "REX",
490 PrefixPT: "PT",
491 PrefixPN: "PN",
492 }
493
494 var regNames = [...]string{
495 AL: "AL",
496 CL: "CL",
497 BL: "BL",
498 DL: "DL",
499 AH: "AH",
500 CH: "CH",
501 BH: "BH",
502 DH: "DH",
503 SPB: "SPB",
504 BPB: "BPB",
505 SIB: "SIB",
506 DIB: "DIB",
507 R8B: "R8B",
508 R9B: "R9B",
509 R10B: "R10B",
510 R11B: "R11B",
511 R12B: "R12B",
512 R13B: "R13B",
513 R14B: "R14B",
514 R15B: "R15B",
515 AX: "AX",
516 CX: "CX",
517 BX: "BX",
518 DX: "DX",
519 SP: "SP",
520 BP: "BP",
521 SI: "SI",
522 DI: "DI",
523 R8W: "R8W",
524 R9W: "R9W",
525 R10W: "R10W",
526 R11W: "R11W",
527 R12W: "R12W",
528 R13W: "R13W",
529 R14W: "R14W",
530 R15W: "R15W",
531 EAX: "EAX",
532 ECX: "ECX",
533 EDX: "EDX",
534 EBX: "EBX",
535 ESP: "ESP",
536 EBP: "EBP",
537 ESI: "ESI",
538 EDI: "EDI",
539 R8L: "R8L",
540 R9L: "R9L",
541 R10L: "R10L",
542 R11L: "R11L",
543 R12L: "R12L",
544 R13L: "R13L",
545 R14L: "R14L",
546 R15L: "R15L",
547 RAX: "RAX",
548 RCX: "RCX",
549 RDX: "RDX",
550 RBX: "RBX",
551 RSP: "RSP",
552 RBP: "RBP",
553 RSI: "RSI",
554 RDI: "RDI",
555 R8: "R8",
556 R9: "R9",
557 R10: "R10",
558 R11: "R11",
559 R12: "R12",
560 R13: "R13",
561 R14: "R14",
562 R15: "R15",
563 IP: "IP",
564 EIP: "EIP",
565 RIP: "RIP",
566 F0: "F0",
567 F1: "F1",
568 F2: "F2",
569 F3: "F3",
570 F4: "F4",
571 F5: "F5",
572 F6: "F6",
573 F7: "F7",
574 M0: "M0",
575 M1: "M1",
576 M2: "M2",
577 M3: "M3",
578 M4: "M4",
579 M5: "M5",
580 M6: "M6",
581 M7: "M7",
582 X0: "X0",
583 X1: "X1",
584 X2: "X2",
585 X3: "X3",
586 X4: "X4",
587 X5: "X5",
588 X6: "X6",
589 X7: "X7",
590 X8: "X8",
591 X9: "X9",
592 X10: "X10",
593 X11: "X11",
594 X12: "X12",
595 X13: "X13",
596 X14: "X14",
597 X15: "X15",
598 CS: "CS",
599 SS: "SS",
600 DS: "DS",
601 ES: "ES",
602 FS: "FS",
603 GS: "GS",
604 GDTR: "GDTR",
605 IDTR: "IDTR",
606 LDTR: "LDTR",
607 MSW: "MSW",
608 TASK: "TASK",
609 CR0: "CR0",
610 CR1: "CR1",
611 CR2: "CR2",
612 CR3: "CR3",
613 CR4: "CR4",
614 CR5: "CR5",
615 CR6: "CR6",
616 CR7: "CR7",
617 CR8: "CR8",
618 CR9: "CR9",
619 CR10: "CR10",
620 CR11: "CR11",
621 CR12: "CR12",
622 CR13: "CR13",
623 CR14: "CR14",
624 CR15: "CR15",
625 DR0: "DR0",
626 DR1: "DR1",
627 DR2: "DR2",
628 DR3: "DR3",
629 DR4: "DR4",
630 DR5: "DR5",
631 DR6: "DR6",
632 DR7: "DR7",
633 DR8: "DR8",
634 DR9: "DR9",
635 DR10: "DR10",
636 DR11: "DR11",
637 DR12: "DR12",
638 DR13: "DR13",
639 DR14: "DR14",
640 DR15: "DR15",
641 TR0: "TR0",
642 TR1: "TR1",
643 TR2: "TR2",
644 TR3: "TR3",
645 TR4: "TR4",
646 TR5: "TR5",
647 TR6: "TR6",
648 TR7: "TR7",
649 }
650