inst.go raw

   1  // Copyright 2014 The Go Authors.  All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package x86asm implements decoding of x86 machine code.
   6  package x86asm
   7  
   8  import (
   9  	"bytes"
  10  	"fmt"
  11  )
  12  
  13  // An Inst is a single instruction.
  14  type Inst struct {
  15  	Prefix   Prefixes // Prefixes applied to the instruction.
  16  	Op       Op       // Opcode mnemonic
  17  	Opcode   uint32   // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
  18  	Args     Args     // Instruction arguments, in Intel order
  19  	Mode     int      // processor mode in bits: 16, 32, or 64
  20  	AddrSize int      // address size in bits: 16, 32, or 64
  21  	DataSize int      // operand size in bits: 16, 32, or 64
  22  	MemBytes int      // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
  23  	Len      int      // length of encoded instruction in bytes
  24  	PCRel    int      // length of PC-relative address in instruction encoding
  25  	PCRelOff int      // index of start of PC-relative address in instruction encoding
  26  }
  27  
  28  // Prefixes is an array of prefixes associated with a single instruction.
  29  // The prefixes are listed in the same order as found in the instruction:
  30  // each prefix byte corresponds to one slot in the array. The first zero
  31  // in the array marks the end of the prefixes.
  32  type Prefixes [14]Prefix
  33  
  34  // A Prefix represents an Intel instruction prefix.
  35  // The low 8 bits are the actual prefix byte encoding,
  36  // and the top 8 bits contain distinguishing bits and metadata.
  37  type Prefix uint16
  38  
  39  const (
  40  	// Metadata about the role of a prefix in an instruction.
  41  	PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
  42  	PrefixIgnored  Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
  43  	PrefixInvalid  Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
  44  
  45  	// Memory segment overrides.
  46  	PrefixES Prefix = 0x26 // ES segment override
  47  	PrefixCS Prefix = 0x2E // CS segment override
  48  	PrefixSS Prefix = 0x36 // SS segment override
  49  	PrefixDS Prefix = 0x3E // DS segment override
  50  	PrefixFS Prefix = 0x64 // FS segment override
  51  	PrefixGS Prefix = 0x65 // GS segment override
  52  
  53  	// Branch prediction.
  54  	PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
  55  	PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
  56  
  57  	// Size attributes.
  58  	PrefixDataSize Prefix = 0x66 // operand size override
  59  	PrefixData16   Prefix = 0x166
  60  	PrefixData32   Prefix = 0x266
  61  	PrefixAddrSize Prefix = 0x67 // address size override
  62  	PrefixAddr16   Prefix = 0x167
  63  	PrefixAddr32   Prefix = 0x267
  64  
  65  	// One of a kind.
  66  	PrefixLOCK     Prefix = 0xF0 // lock
  67  	PrefixREPN     Prefix = 0xF2 // repeat not zero
  68  	PrefixXACQUIRE Prefix = 0x1F2
  69  	PrefixBND      Prefix = 0x2F2
  70  	PrefixREP      Prefix = 0xF3 // repeat
  71  	PrefixXRELEASE Prefix = 0x1F3
  72  
  73  	// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
  74  	// the other bits are set or not according to the intended use.
  75  	PrefixREX       Prefix = 0x40 // REX 64-bit extension prefix
  76  	PrefixREXW      Prefix = 0x08 // extension bit W (64-bit instruction width)
  77  	PrefixREXR      Prefix = 0x04 // extension bit R (r field in modrm)
  78  	PrefixREXX      Prefix = 0x02 // extension bit X (index field in sib)
  79  	PrefixREXB      Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
  80  	PrefixVEX2Bytes Prefix = 0xC5 // Short form of vex prefix
  81  	PrefixVEX3Bytes Prefix = 0xC4 // Long form of vex prefix
  82  )
  83  
  84  // IsREX reports whether p is a REX prefix byte.
  85  func (p Prefix) IsREX() bool {
  86  	return p&0xF0 == PrefixREX
  87  }
  88  
  89  func (p Prefix) IsVEX() bool {
  90  	return p&0xFF == PrefixVEX2Bytes || p&0xFF == PrefixVEX3Bytes
  91  }
  92  
  93  func (p Prefix) String() string {
  94  	p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
  95  	if s := prefixNames[p]; s != "" {
  96  		return s
  97  	}
  98  
  99  	if p.IsREX() {
 100  		s := "REX."
 101  		if p&PrefixREXW != 0 {
 102  			s += "W"
 103  		}
 104  		if p&PrefixREXR != 0 {
 105  			s += "R"
 106  		}
 107  		if p&PrefixREXX != 0 {
 108  			s += "X"
 109  		}
 110  		if p&PrefixREXB != 0 {
 111  			s += "B"
 112  		}
 113  		return s
 114  	}
 115  
 116  	return fmt.Sprintf("Prefix(%#x)", int(p))
 117  }
 118  
 119  // An Op is an x86 opcode.
 120  type Op uint32
 121  
 122  func (op Op) String() string {
 123  	i := int(op)
 124  	if i < 0 || i >= len(opNames) || opNames[i] == "" {
 125  		return fmt.Sprintf("Op(%d)", i)
 126  	}
 127  	return opNames[i]
 128  }
 129  
 130  // An Args holds the instruction arguments.
 131  // If an instruction has fewer than 4 arguments,
 132  // the final elements in the array are nil.
 133  type Args [4]Arg
 134  
 135  // An Arg is a single instruction argument,
 136  // one of these types: Reg, Mem, Imm, Rel.
 137  type Arg interface {
 138  	String() string
 139  	isArg()
 140  }
 141  
 142  // Note that the implements of Arg that follow are all sized
 143  // so that on a 64-bit machine the data can be inlined in
 144  // the interface value instead of requiring an allocation.
 145  
 146  // A Reg is a single register.
 147  // The zero Reg value has no name but indicates “no register.”
 148  type Reg uint8
 149  
 150  const (
 151  	_ Reg = iota
 152  
 153  	// 8-bit
 154  	AL
 155  	CL
 156  	DL
 157  	BL
 158  	AH
 159  	CH
 160  	DH
 161  	BH
 162  	SPB
 163  	BPB
 164  	SIB
 165  	DIB
 166  	R8B
 167  	R9B
 168  	R10B
 169  	R11B
 170  	R12B
 171  	R13B
 172  	R14B
 173  	R15B
 174  
 175  	// 16-bit
 176  	AX
 177  	CX
 178  	DX
 179  	BX
 180  	SP
 181  	BP
 182  	SI
 183  	DI
 184  	R8W
 185  	R9W
 186  	R10W
 187  	R11W
 188  	R12W
 189  	R13W
 190  	R14W
 191  	R15W
 192  
 193  	// 32-bit
 194  	EAX
 195  	ECX
 196  	EDX
 197  	EBX
 198  	ESP
 199  	EBP
 200  	ESI
 201  	EDI
 202  	R8L
 203  	R9L
 204  	R10L
 205  	R11L
 206  	R12L
 207  	R13L
 208  	R14L
 209  	R15L
 210  
 211  	// 64-bit
 212  	RAX
 213  	RCX
 214  	RDX
 215  	RBX
 216  	RSP
 217  	RBP
 218  	RSI
 219  	RDI
 220  	R8
 221  	R9
 222  	R10
 223  	R11
 224  	R12
 225  	R13
 226  	R14
 227  	R15
 228  
 229  	// Instruction pointer.
 230  	IP  // 16-bit
 231  	EIP // 32-bit
 232  	RIP // 64-bit
 233  
 234  	// 387 floating point registers.
 235  	F0
 236  	F1
 237  	F2
 238  	F3
 239  	F4
 240  	F5
 241  	F6
 242  	F7
 243  
 244  	// MMX registers.
 245  	M0
 246  	M1
 247  	M2
 248  	M3
 249  	M4
 250  	M5
 251  	M6
 252  	M7
 253  
 254  	// XMM registers.
 255  	X0
 256  	X1
 257  	X2
 258  	X3
 259  	X4
 260  	X5
 261  	X6
 262  	X7
 263  	X8
 264  	X9
 265  	X10
 266  	X11
 267  	X12
 268  	X13
 269  	X14
 270  	X15
 271  
 272  	// Segment registers.
 273  	ES
 274  	CS
 275  	SS
 276  	DS
 277  	FS
 278  	GS
 279  
 280  	// System registers.
 281  	GDTR
 282  	IDTR
 283  	LDTR
 284  	MSW
 285  	TASK
 286  
 287  	// Control registers.
 288  	CR0
 289  	CR1
 290  	CR2
 291  	CR3
 292  	CR4
 293  	CR5
 294  	CR6
 295  	CR7
 296  	CR8
 297  	CR9
 298  	CR10
 299  	CR11
 300  	CR12
 301  	CR13
 302  	CR14
 303  	CR15
 304  
 305  	// Debug registers.
 306  	DR0
 307  	DR1
 308  	DR2
 309  	DR3
 310  	DR4
 311  	DR5
 312  	DR6
 313  	DR7
 314  	DR8
 315  	DR9
 316  	DR10
 317  	DR11
 318  	DR12
 319  	DR13
 320  	DR14
 321  	DR15
 322  
 323  	// Task registers.
 324  	TR0
 325  	TR1
 326  	TR2
 327  	TR3
 328  	TR4
 329  	TR5
 330  	TR6
 331  	TR7
 332  )
 333  
 334  const regMax = TR7
 335  
 336  func (Reg) isArg() {}
 337  
 338  func (r Reg) String() string {
 339  	i := int(r)
 340  	if i < 0 || i >= len(regNames) || regNames[i] == "" {
 341  		return fmt.Sprintf("Reg(%d)", i)
 342  	}
 343  	return regNames[i]
 344  }
 345  
 346  // A Mem is a memory reference.
 347  // The general form is Segment:[Base+Scale*Index+Disp].
 348  type Mem struct {
 349  	Segment Reg
 350  	Base    Reg
 351  	Scale   uint8
 352  	Index   Reg
 353  	Disp    int64
 354  }
 355  
 356  func (Mem) isArg() {}
 357  
 358  func (m Mem) String() string {
 359  	var base, plus, scale, index, disp string
 360  
 361  	if m.Base != 0 {
 362  		base = m.Base.String()
 363  	}
 364  	if m.Scale != 0 {
 365  		if m.Base != 0 {
 366  			plus = "+"
 367  		}
 368  		if m.Scale > 1 {
 369  			scale = fmt.Sprintf("%d*", m.Scale)
 370  		}
 371  		index = m.Index.String()
 372  	}
 373  	if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
 374  		disp = fmt.Sprintf("%+#x", m.Disp)
 375  	}
 376  	return "[" + base + plus + scale + index + disp + "]"
 377  }
 378  
 379  // A Rel is an offset relative to the current instruction pointer.
 380  type Rel int32
 381  
 382  func (Rel) isArg() {}
 383  
 384  func (r Rel) String() string {
 385  	return fmt.Sprintf(".%+d", r)
 386  }
 387  
 388  // An Imm is an integer constant.
 389  type Imm int64
 390  
 391  func (Imm) isArg() {}
 392  
 393  func (i Imm) String() string {
 394  	return fmt.Sprintf("%#x", int64(i))
 395  }
 396  
 397  func (i Inst) String() string {
 398  	var buf bytes.Buffer
 399  	for _, p := range i.Prefix {
 400  		if p == 0 {
 401  			break
 402  		}
 403  		if p&PrefixImplicit != 0 {
 404  			continue
 405  		}
 406  		fmt.Fprintf(&buf, "%v ", p)
 407  	}
 408  	fmt.Fprintf(&buf, "%v", i.Op)
 409  	sep := " "
 410  	for _, v := range i.Args {
 411  		if v == nil {
 412  			break
 413  		}
 414  		fmt.Fprintf(&buf, "%s%v", sep, v)
 415  		sep = ", "
 416  	}
 417  	return buf.String()
 418  }
 419  
 420  func isReg(a Arg) bool {
 421  	_, ok := a.(Reg)
 422  	return ok
 423  }
 424  
 425  func isSegReg(a Arg) bool {
 426  	r, ok := a.(Reg)
 427  	return ok && ES <= r && r <= GS
 428  }
 429  
 430  func isMem(a Arg) bool {
 431  	_, ok := a.(Mem)
 432  	return ok
 433  }
 434  
 435  func isImm(a Arg) bool {
 436  	_, ok := a.(Imm)
 437  	return ok
 438  }
 439  
 440  func regBytes(a Arg) int {
 441  	r, ok := a.(Reg)
 442  	if !ok {
 443  		return 0
 444  	}
 445  	if AL <= r && r <= R15B {
 446  		return 1
 447  	}
 448  	if AX <= r && r <= R15W {
 449  		return 2
 450  	}
 451  	if EAX <= r && r <= R15L {
 452  		return 4
 453  	}
 454  	if RAX <= r && r <= R15 {
 455  		return 8
 456  	}
 457  	return 0
 458  }
 459  
 460  func isSegment(p Prefix) bool {
 461  	switch p {
 462  	case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
 463  		return true
 464  	}
 465  	return false
 466  }
 467  
 468  // The Op definitions and string list are in tables.go.
 469  
 470  var prefixNames = map[Prefix]string{
 471  	PrefixCS:       "CS",
 472  	PrefixDS:       "DS",
 473  	PrefixES:       "ES",
 474  	PrefixFS:       "FS",
 475  	PrefixGS:       "GS",
 476  	PrefixSS:       "SS",
 477  	PrefixLOCK:     "LOCK",
 478  	PrefixREP:      "REP",
 479  	PrefixREPN:     "REPN",
 480  	PrefixAddrSize: "ADDRSIZE",
 481  	PrefixDataSize: "DATASIZE",
 482  	PrefixAddr16:   "ADDR16",
 483  	PrefixData16:   "DATA16",
 484  	PrefixAddr32:   "ADDR32",
 485  	PrefixData32:   "DATA32",
 486  	PrefixBND:      "BND",
 487  	PrefixXACQUIRE: "XACQUIRE",
 488  	PrefixXRELEASE: "XRELEASE",
 489  	PrefixREX:      "REX",
 490  	PrefixPT:       "PT",
 491  	PrefixPN:       "PN",
 492  }
 493  
 494  var regNames = [...]string{
 495  	AL:   "AL",
 496  	CL:   "CL",
 497  	BL:   "BL",
 498  	DL:   "DL",
 499  	AH:   "AH",
 500  	CH:   "CH",
 501  	BH:   "BH",
 502  	DH:   "DH",
 503  	SPB:  "SPB",
 504  	BPB:  "BPB",
 505  	SIB:  "SIB",
 506  	DIB:  "DIB",
 507  	R8B:  "R8B",
 508  	R9B:  "R9B",
 509  	R10B: "R10B",
 510  	R11B: "R11B",
 511  	R12B: "R12B",
 512  	R13B: "R13B",
 513  	R14B: "R14B",
 514  	R15B: "R15B",
 515  	AX:   "AX",
 516  	CX:   "CX",
 517  	BX:   "BX",
 518  	DX:   "DX",
 519  	SP:   "SP",
 520  	BP:   "BP",
 521  	SI:   "SI",
 522  	DI:   "DI",
 523  	R8W:  "R8W",
 524  	R9W:  "R9W",
 525  	R10W: "R10W",
 526  	R11W: "R11W",
 527  	R12W: "R12W",
 528  	R13W: "R13W",
 529  	R14W: "R14W",
 530  	R15W: "R15W",
 531  	EAX:  "EAX",
 532  	ECX:  "ECX",
 533  	EDX:  "EDX",
 534  	EBX:  "EBX",
 535  	ESP:  "ESP",
 536  	EBP:  "EBP",
 537  	ESI:  "ESI",
 538  	EDI:  "EDI",
 539  	R8L:  "R8L",
 540  	R9L:  "R9L",
 541  	R10L: "R10L",
 542  	R11L: "R11L",
 543  	R12L: "R12L",
 544  	R13L: "R13L",
 545  	R14L: "R14L",
 546  	R15L: "R15L",
 547  	RAX:  "RAX",
 548  	RCX:  "RCX",
 549  	RDX:  "RDX",
 550  	RBX:  "RBX",
 551  	RSP:  "RSP",
 552  	RBP:  "RBP",
 553  	RSI:  "RSI",
 554  	RDI:  "RDI",
 555  	R8:   "R8",
 556  	R9:   "R9",
 557  	R10:  "R10",
 558  	R11:  "R11",
 559  	R12:  "R12",
 560  	R13:  "R13",
 561  	R14:  "R14",
 562  	R15:  "R15",
 563  	IP:   "IP",
 564  	EIP:  "EIP",
 565  	RIP:  "RIP",
 566  	F0:   "F0",
 567  	F1:   "F1",
 568  	F2:   "F2",
 569  	F3:   "F3",
 570  	F4:   "F4",
 571  	F5:   "F5",
 572  	F6:   "F6",
 573  	F7:   "F7",
 574  	M0:   "M0",
 575  	M1:   "M1",
 576  	M2:   "M2",
 577  	M3:   "M3",
 578  	M4:   "M4",
 579  	M5:   "M5",
 580  	M6:   "M6",
 581  	M7:   "M7",
 582  	X0:   "X0",
 583  	X1:   "X1",
 584  	X2:   "X2",
 585  	X3:   "X3",
 586  	X4:   "X4",
 587  	X5:   "X5",
 588  	X6:   "X6",
 589  	X7:   "X7",
 590  	X8:   "X8",
 591  	X9:   "X9",
 592  	X10:  "X10",
 593  	X11:  "X11",
 594  	X12:  "X12",
 595  	X13:  "X13",
 596  	X14:  "X14",
 597  	X15:  "X15",
 598  	CS:   "CS",
 599  	SS:   "SS",
 600  	DS:   "DS",
 601  	ES:   "ES",
 602  	FS:   "FS",
 603  	GS:   "GS",
 604  	GDTR: "GDTR",
 605  	IDTR: "IDTR",
 606  	LDTR: "LDTR",
 607  	MSW:  "MSW",
 608  	TASK: "TASK",
 609  	CR0:  "CR0",
 610  	CR1:  "CR1",
 611  	CR2:  "CR2",
 612  	CR3:  "CR3",
 613  	CR4:  "CR4",
 614  	CR5:  "CR5",
 615  	CR6:  "CR6",
 616  	CR7:  "CR7",
 617  	CR8:  "CR8",
 618  	CR9:  "CR9",
 619  	CR10: "CR10",
 620  	CR11: "CR11",
 621  	CR12: "CR12",
 622  	CR13: "CR13",
 623  	CR14: "CR14",
 624  	CR15: "CR15",
 625  	DR0:  "DR0",
 626  	DR1:  "DR1",
 627  	DR2:  "DR2",
 628  	DR3:  "DR3",
 629  	DR4:  "DR4",
 630  	DR5:  "DR5",
 631  	DR6:  "DR6",
 632  	DR7:  "DR7",
 633  	DR8:  "DR8",
 634  	DR9:  "DR9",
 635  	DR10: "DR10",
 636  	DR11: "DR11",
 637  	DR12: "DR12",
 638  	DR13: "DR13",
 639  	DR14: "DR14",
 640  	DR15: "DR15",
 641  	TR0:  "TR0",
 642  	TR1:  "TR1",
 643  	TR2:  "TR2",
 644  	TR3:  "TR3",
 645  	TR4:  "TR4",
 646  	TR5:  "TR5",
 647  	TR6:  "TR6",
 648  	TR7:  "TR7",
 649  }
 650