decode.go raw

   1  // Copyright 2014 The Go Authors.  All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Table-driven decoding of x86 instructions.
   6  
   7  package x86asm
   8  
   9  import (
  10  	"encoding/binary"
  11  	"errors"
  12  	"fmt"
  13  	"runtime"
  14  )
  15  
  16  // Set trace to true to cause the decoder to print the PC sequence
  17  // of the executed instruction codes. This is typically only useful
  18  // when you are running a test of a single input case.
  19  const trace = false
  20  
  21  // A decodeOp is a single instruction in the decoder bytecode program.
  22  //
  23  // The decodeOps correspond to consuming and conditionally branching
  24  // on input bytes, consuming additional fields, and then interpreting
  25  // consumed data as instruction arguments. The names of the xRead and xArg
  26  // operations are taken from the Intel manual conventions, for example
  27  // Volume 2, Section 3.1.1, page 487 of
  28  // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
  29  //
  30  // The actual decoding program is generated by ../x86map.
  31  //
  32  // TODO(rsc): We may be able to merge various of the memory operands
  33  // since we don't care about, say, the distinction between m80dec and m80bcd.
  34  // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
  35  
  36  type decodeOp uint16
  37  
  38  const (
  39  	xFail  decodeOp = iota // invalid instruction (return)
  40  	xMatch                 // completed match
  41  	xJump                  // jump to pc
  42  
  43  	xCondByte     // switch on instruction byte value
  44  	xCondSlashR   // read and switch on instruction /r value
  45  	xCondPrefix   // switch on presence of instruction prefix
  46  	xCondIs64     // switch on 64-bit processor mode
  47  	xCondDataSize // switch on operand size
  48  	xCondAddrSize // switch on address size
  49  	xCondIsMem    // switch on memory vs register argument
  50  
  51  	xSetOp // set instruction opcode
  52  
  53  	xReadSlashR // read /r
  54  	xReadIb     // read ib
  55  	xReadIw     // read iw
  56  	xReadId     // read id
  57  	xReadIo     // read io
  58  	xReadCb     // read cb
  59  	xReadCw     // read cw
  60  	xReadCd     // read cd
  61  	xReadCp     // read cp
  62  	xReadCm     // read cm
  63  
  64  	xArg1            // arg 1
  65  	xArg3            // arg 3
  66  	xArgAL           // arg AL
  67  	xArgAX           // arg AX
  68  	xArgCL           // arg CL
  69  	xArgCR0dashCR7   // arg CR0-CR7
  70  	xArgCS           // arg CS
  71  	xArgDR0dashDR7   // arg DR0-DR7
  72  	xArgDS           // arg DS
  73  	xArgDX           // arg DX
  74  	xArgEAX          // arg EAX
  75  	xArgEDX          // arg EDX
  76  	xArgES           // arg ES
  77  	xArgFS           // arg FS
  78  	xArgGS           // arg GS
  79  	xArgImm16        // arg imm16
  80  	xArgImm32        // arg imm32
  81  	xArgImm64        // arg imm64
  82  	xArgImm8         // arg imm8
  83  	xArgImm8u        // arg imm8 but record as unsigned
  84  	xArgImm16u       // arg imm8 but record as unsigned
  85  	xArgM            // arg m
  86  	xArgM128         // arg m128
  87  	xArgM256         // arg m256
  88  	xArgM1428byte    // arg m14/28byte
  89  	xArgM16          // arg m16
  90  	xArgM16and16     // arg m16&16
  91  	xArgM16and32     // arg m16&32
  92  	xArgM16and64     // arg m16&64
  93  	xArgM16colon16   // arg m16:16
  94  	xArgM16colon32   // arg m16:32
  95  	xArgM16colon64   // arg m16:64
  96  	xArgM16int       // arg m16int
  97  	xArgM2byte       // arg m2byte
  98  	xArgM32          // arg m32
  99  	xArgM32and32     // arg m32&32
 100  	xArgM32fp        // arg m32fp
 101  	xArgM32int       // arg m32int
 102  	xArgM512byte     // arg m512byte
 103  	xArgM64          // arg m64
 104  	xArgM64fp        // arg m64fp
 105  	xArgM64int       // arg m64int
 106  	xArgM8           // arg m8
 107  	xArgM80bcd       // arg m80bcd
 108  	xArgM80dec       // arg m80dec
 109  	xArgM80fp        // arg m80fp
 110  	xArgM94108byte   // arg m94/108byte
 111  	xArgMm           // arg mm
 112  	xArgMm1          // arg mm1
 113  	xArgMm2          // arg mm2
 114  	xArgMm2M64       // arg mm2/m64
 115  	xArgMmM32        // arg mm/m32
 116  	xArgMmM64        // arg mm/m64
 117  	xArgMem          // arg mem
 118  	xArgMoffs16      // arg moffs16
 119  	xArgMoffs32      // arg moffs32
 120  	xArgMoffs64      // arg moffs64
 121  	xArgMoffs8       // arg moffs8
 122  	xArgPtr16colon16 // arg ptr16:16
 123  	xArgPtr16colon32 // arg ptr16:32
 124  	xArgR16          // arg r16
 125  	xArgR16op        // arg r16 with +rw in opcode
 126  	xArgR32          // arg r32
 127  	xArgR32M16       // arg r32/m16
 128  	xArgR32M8        // arg r32/m8
 129  	xArgR32op        // arg r32 with +rd in opcode
 130  	xArgR64          // arg r64
 131  	xArgR64M16       // arg r64/m16
 132  	xArgR64op        // arg r64 with +rd in opcode
 133  	xArgR8           // arg r8
 134  	xArgR8op         // arg r8 with +rb in opcode
 135  	xArgRAX          // arg RAX
 136  	xArgRDX          // arg RDX
 137  	xArgRM           // arg r/m
 138  	xArgRM16         // arg r/m16
 139  	xArgRM32         // arg r/m32
 140  	xArgRM64         // arg r/m64
 141  	xArgRM8          // arg r/m8
 142  	xArgReg          // arg reg
 143  	xArgRegM16       // arg reg/m16
 144  	xArgRegM32       // arg reg/m32
 145  	xArgRegM8        // arg reg/m8
 146  	xArgRel16        // arg rel16
 147  	xArgRel32        // arg rel32
 148  	xArgRel8         // arg rel8
 149  	xArgSS           // arg SS
 150  	xArgST           // arg ST, aka ST(0)
 151  	xArgSTi          // arg ST(i) with +i in opcode
 152  	xArgSreg         // arg Sreg
 153  	xArgTR0dashTR7   // arg TR0-TR7
 154  	xArgXmm          // arg xmm
 155  	xArgXMM0         // arg <XMM0>
 156  	xArgXmm1         // arg xmm1
 157  	xArgXmm2         // arg xmm2
 158  	xArgXmm2M128     // arg xmm2/m128
 159  	xArgYmm2M256     // arg ymm2/m256
 160  	xArgXmm2M16      // arg xmm2/m16
 161  	xArgXmm2M32      // arg xmm2/m32
 162  	xArgXmm2M64      // arg xmm2/m64
 163  	xArgXmmM128      // arg xmm/m128
 164  	xArgXmmM32       // arg xmm/m32
 165  	xArgXmmM64       // arg xmm/m64
 166  	xArgYmm1         // arg ymm1
 167  	xArgRmf16        // arg r/m16 but force mod=3
 168  	xArgRmf32        // arg r/m32 but force mod=3
 169  	xArgRmf64        // arg r/m64 but force mod=3
 170  )
 171  
 172  // instPrefix returns an Inst describing just one prefix byte.
 173  // It is only used if there is a prefix followed by an unintelligible
 174  // or invalid instruction byte sequence.
 175  func instPrefix(b byte, mode int) (Inst, error) {
 176  	// When tracing it is useful to see what called instPrefix to report an error.
 177  	if trace {
 178  		_, file, line, _ := runtime.Caller(1)
 179  		fmt.Printf("%s:%d\n", file, line)
 180  	}
 181  	p := Prefix(b)
 182  	switch p {
 183  	case PrefixDataSize:
 184  		if mode == 16 {
 185  			p = PrefixData32
 186  		} else {
 187  			p = PrefixData16
 188  		}
 189  	case PrefixAddrSize:
 190  		if mode == 32 {
 191  			p = PrefixAddr16
 192  		} else {
 193  			p = PrefixAddr32
 194  		}
 195  	}
 196  	// Note: using composite literal with Prefix key confuses 'bundle' tool.
 197  	inst := Inst{Len: 1}
 198  	inst.Prefix = Prefixes{p}
 199  	return inst, nil
 200  }
 201  
 202  // truncated reports a truncated instruction.
 203  // For now we use instPrefix but perhaps later we will return
 204  // a specific error here.
 205  func truncated(src []byte, mode int) (Inst, error) {
 206  	if len(src) == 0 {
 207  		return Inst{}, ErrTruncated
 208  	}
 209  	return instPrefix(src[0], mode) // too long
 210  }
 211  
 212  // These are the errors returned by Decode.
 213  var (
 214  	ErrInvalidMode  = errors.New("invalid x86 mode in Decode")
 215  	ErrTruncated    = errors.New("truncated instruction")
 216  	ErrUnrecognized = errors.New("unrecognized instruction")
 217  )
 218  
 219  // decoderCover records coverage information for which parts
 220  // of the byte code have been executed.
 221  var decoderCover []bool
 222  
 223  // Decode decodes the leading bytes in src as a single instruction.
 224  // The mode arguments specifies the assumed processor mode:
 225  // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
 226  func Decode(src []byte, mode int) (inst Inst, err error) {
 227  	return decode1(src, mode, false)
 228  }
 229  
 230  // decode1 is the implementation of Decode but takes an extra
 231  // gnuCompat flag to cause it to change its behavior to mimic
 232  // bugs (or at least unique features) of GNU libopcodes as used
 233  // by objdump. We don't believe that logic is the right thing to do
 234  // in general, but when testing against libopcodes it simplifies the
 235  // comparison if we adjust a few small pieces of logic.
 236  // The affected logic is in the conditional branch for "mandatory" prefixes,
 237  // case xCondPrefix.
 238  func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
 239  	switch mode {
 240  	case 16, 32, 64:
 241  		// ok
 242  		// TODO(rsc): 64-bit mode not tested, probably not working.
 243  	default:
 244  		return Inst{}, ErrInvalidMode
 245  	}
 246  
 247  	// Maximum instruction size is 15 bytes.
 248  	// If we need to read more, return 'truncated instruction.
 249  	if len(src) > 15 {
 250  		src = src[:15]
 251  	}
 252  
 253  	var (
 254  		// prefix decoding information
 255  		pos           = 0    // position reading src
 256  		nprefix       = 0    // number of prefixes
 257  		lockIndex     = -1   // index of LOCK prefix in src and inst.Prefix
 258  		repIndex      = -1   // index of REP/REPN prefix in src and inst.Prefix
 259  		segIndex      = -1   // index of Group 2 prefix in src and inst.Prefix
 260  		dataSizeIndex = -1   // index of Group 3 prefix in src and inst.Prefix
 261  		addrSizeIndex = -1   // index of Group 4 prefix in src and inst.Prefix
 262  		rex           Prefix // rex byte if present (or 0)
 263  		rexUsed       Prefix // bits used in rex byte
 264  		rexIndex      = -1   // index of rex byte
 265  		vex           Prefix // use vex encoding
 266  		vexIndex      = -1   // index of vex prefix
 267  
 268  		addrMode = mode // address mode (width in bits)
 269  		dataMode = mode // operand mode (width in bits)
 270  
 271  		// decoded ModR/M fields
 272  		haveModrm bool
 273  		modrm     int
 274  		mod       int
 275  		regop     int
 276  		rm        int
 277  
 278  		// if ModR/M is memory reference, Mem form
 279  		mem     Mem
 280  		haveMem bool
 281  
 282  		// decoded SIB fields
 283  		haveSIB bool
 284  		sib     int
 285  		scale   int
 286  		index   int
 287  		base    int
 288  		displen int
 289  		dispoff int
 290  
 291  		// decoded immediate values
 292  		imm     int64
 293  		imm8    int8
 294  		immc    int64
 295  		immcpos int
 296  
 297  		// output
 298  		opshift int
 299  		inst    Inst
 300  		narg    int // number of arguments written to inst
 301  	)
 302  
 303  	if mode == 64 {
 304  		dataMode = 32
 305  	}
 306  
 307  	// Prefixes are certainly the most complex and underspecified part of
 308  	// decoding x86 instructions. Although the manuals say things like
 309  	// up to four prefixes, one from each group, nearly everyone seems to
 310  	// agree that in practice as many prefixes as possible, including multiple
 311  	// from a particular group or repetitions of a given prefix, can be used on
 312  	// an instruction, provided the total instruction length including prefixes
 313  	// does not exceed the agreed-upon maximum of 15 bytes.
 314  	// Everyone also agrees that if one of these prefixes is the LOCK prefix
 315  	// and the instruction is not one of the instructions that can be used with
 316  	// the LOCK prefix or if the destination is not a memory operand,
 317  	// then the instruction is invalid and produces the #UD exception.
 318  	// However, that is the end of any semblance of agreement.
 319  	//
 320  	// What happens if prefixes are given that conflict with other prefixes?
 321  	// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
 322  	// conflict with each other: only one segment can be in effect.
 323  	// Disassemblers seem to agree that later prefixes take priority over
 324  	// earlier ones. I have not taken the time to write assembly programs
 325  	// to check to see if the hardware agrees.
 326  	//
 327  	// What happens if prefixes are given that have no meaning for the
 328  	// specific instruction to which they are attached? It depends.
 329  	// If they really have no meaning, they are ignored. However, a future
 330  	// processor may assign a different meaning. As a disassembler, we
 331  	// don't really know whether we're seeing a meaningless prefix or one
 332  	// whose meaning we simply haven't been told yet.
 333  	//
 334  	// Combining the two questions, what happens when conflicting
 335  	// extension prefixes are given? No one seems to know for sure.
 336  	// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
 337  	// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
 338  	// Which prefix wins? See the xCondPrefix prefix for more.
 339  	//
 340  	// Writing assembly test cases to divine which interpretation the
 341  	// CPU uses might clarify the situation, but more likely it would
 342  	// make the situation even less clear.
 343  
 344  	// Read non-REX prefixes.
 345  ReadPrefixes:
 346  	for ; pos < len(src); pos++ {
 347  		p := Prefix(src[pos])
 348  		switch p {
 349  		default:
 350  			nprefix = pos
 351  			break ReadPrefixes
 352  
 353  		// Group 1 - lock and repeat prefixes
 354  		// According to Intel, there should only be one from this set,
 355  		// but according to AMD both can be present.
 356  		case 0xF0:
 357  			if lockIndex >= 0 {
 358  				inst.Prefix[lockIndex] |= PrefixIgnored
 359  			}
 360  			lockIndex = pos
 361  		case 0xF2, 0xF3:
 362  			if repIndex >= 0 {
 363  				inst.Prefix[repIndex] |= PrefixIgnored
 364  			}
 365  			repIndex = pos
 366  
 367  		// Group 2 - segment override / branch hints
 368  		case 0x26, 0x2E, 0x36, 0x3E:
 369  			if mode == 64 {
 370  				p |= PrefixIgnored
 371  				break
 372  			}
 373  			fallthrough
 374  		case 0x64, 0x65:
 375  			if segIndex >= 0 {
 376  				inst.Prefix[segIndex] |= PrefixIgnored
 377  			}
 378  			segIndex = pos
 379  
 380  		// Group 3 - operand size override
 381  		case 0x66:
 382  			if mode == 16 {
 383  				dataMode = 32
 384  				p = PrefixData32
 385  			} else {
 386  				dataMode = 16
 387  				p = PrefixData16
 388  			}
 389  			if dataSizeIndex >= 0 {
 390  				inst.Prefix[dataSizeIndex] |= PrefixIgnored
 391  			}
 392  			dataSizeIndex = pos
 393  
 394  		// Group 4 - address size override
 395  		case 0x67:
 396  			if mode == 32 {
 397  				addrMode = 16
 398  				p = PrefixAddr16
 399  			} else {
 400  				addrMode = 32
 401  				p = PrefixAddr32
 402  			}
 403  			if addrSizeIndex >= 0 {
 404  				inst.Prefix[addrSizeIndex] |= PrefixIgnored
 405  			}
 406  			addrSizeIndex = pos
 407  
 408  		//Group 5 - Vex encoding
 409  		case 0xC5:
 410  			if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
 411  				vex = p
 412  				vexIndex = pos
 413  				inst.Prefix[pos] = p
 414  				inst.Prefix[pos+1] = Prefix(src[pos+1])
 415  				pos += 1
 416  				continue
 417  			} else {
 418  				nprefix = pos
 419  				break ReadPrefixes
 420  			}
 421  		case 0xC4:
 422  			if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
 423  				vex = p
 424  				vexIndex = pos
 425  				inst.Prefix[pos] = p
 426  				inst.Prefix[pos+1] = Prefix(src[pos+1])
 427  				inst.Prefix[pos+2] = Prefix(src[pos+2])
 428  				pos += 2
 429  				continue
 430  			} else {
 431  				nprefix = pos
 432  				break ReadPrefixes
 433  			}
 434  		}
 435  
 436  		if pos >= len(inst.Prefix) {
 437  			return instPrefix(src[0], mode) // too long
 438  		}
 439  
 440  		inst.Prefix[pos] = p
 441  	}
 442  
 443  	// Read REX prefix.
 444  	if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 {
 445  		rex = Prefix(src[pos])
 446  		rexIndex = pos
 447  		if pos >= len(inst.Prefix) {
 448  			return instPrefix(src[0], mode) // too long
 449  		}
 450  		inst.Prefix[pos] = rex
 451  		pos++
 452  		if rex&PrefixREXW != 0 {
 453  			dataMode = 64
 454  			if dataSizeIndex >= 0 {
 455  				inst.Prefix[dataSizeIndex] |= PrefixIgnored
 456  			}
 457  		}
 458  	}
 459  
 460  	// Decode instruction stream, interpreting decoding instructions.
 461  	// opshift gives the shift to use when saving the next
 462  	// opcode byte into inst.Opcode.
 463  	opshift = 24
 464  
 465  	// Decode loop, executing decoder program.
 466  	var oldPC, prevPC int
 467  Decode:
 468  	for pc := 1; ; { // TODO uint
 469  		oldPC = prevPC
 470  		prevPC = pc
 471  		if trace {
 472  			println("run", pc)
 473  		}
 474  		x := decoder[pc]
 475  		if decoderCover != nil {
 476  			decoderCover[pc] = true
 477  		}
 478  		pc++
 479  
 480  		// Read and decode ModR/M if needed by opcode.
 481  		switch decodeOp(x) {
 482  		case xCondSlashR, xReadSlashR:
 483  			if haveModrm {
 484  				return Inst{Len: pos}, errInternal
 485  			}
 486  			haveModrm = true
 487  			if pos >= len(src) {
 488  				return truncated(src, mode)
 489  			}
 490  			modrm = int(src[pos])
 491  			pos++
 492  			if opshift >= 0 {
 493  				inst.Opcode |= uint32(modrm) << uint(opshift)
 494  				opshift -= 8
 495  			}
 496  			mod = modrm >> 6
 497  			regop = (modrm >> 3) & 07
 498  			rm = modrm & 07
 499  			if rex&PrefixREXR != 0 {
 500  				rexUsed |= PrefixREXR
 501  				regop |= 8
 502  			}
 503  			if addrMode == 16 {
 504  				// 16-bit modrm form
 505  				if mod != 3 {
 506  					haveMem = true
 507  					mem = addr16[rm]
 508  					if rm == 6 && mod == 0 {
 509  						mem.Base = 0
 510  					}
 511  
 512  					// Consume disp16 if present.
 513  					if mod == 0 && rm == 6 || mod == 2 {
 514  						if pos+2 > len(src) {
 515  							return truncated(src, mode)
 516  						}
 517  						mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
 518  						pos += 2
 519  					}
 520  
 521  					// Consume disp8 if present.
 522  					if mod == 1 {
 523  						if pos >= len(src) {
 524  							return truncated(src, mode)
 525  						}
 526  						mem.Disp = int64(int8(src[pos]))
 527  						pos++
 528  					}
 529  				}
 530  			} else {
 531  				haveMem = mod != 3
 532  
 533  				// 32-bit or 64-bit form
 534  				// Consume SIB encoding if present.
 535  				if rm == 4 && mod != 3 {
 536  					haveSIB = true
 537  					if pos >= len(src) {
 538  						return truncated(src, mode)
 539  					}
 540  					sib = int(src[pos])
 541  					pos++
 542  					if opshift >= 0 {
 543  						inst.Opcode |= uint32(sib) << uint(opshift)
 544  						opshift -= 8
 545  					}
 546  					scale = sib >> 6
 547  					index = (sib >> 3) & 07
 548  					base = sib & 07
 549  					if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 {
 550  						rexUsed |= PrefixREXB
 551  						base |= 8
 552  					}
 553  					if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 {
 554  						rexUsed |= PrefixREXX
 555  						index |= 8
 556  					}
 557  
 558  					mem.Scale = 1 << uint(scale)
 559  					if index == 4 {
 560  						// no mem.Index
 561  					} else {
 562  						mem.Index = baseRegForBits(addrMode) + Reg(index)
 563  					}
 564  					if base&7 == 5 && mod == 0 {
 565  						// no mem.Base
 566  					} else {
 567  						mem.Base = baseRegForBits(addrMode) + Reg(base)
 568  					}
 569  				} else {
 570  					if rex&PrefixREXB != 0 {
 571  						rexUsed |= PrefixREXB
 572  						rm |= 8
 573  					}
 574  					if mod == 0 && rm&7 == 5 || rm&7 == 4 {
 575  						// base omitted
 576  					} else if mod != 3 {
 577  						mem.Base = baseRegForBits(addrMode) + Reg(rm)
 578  					}
 579  				}
 580  
 581  				// Consume disp32 if present.
 582  				if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
 583  					if pos+4 > len(src) {
 584  						return truncated(src, mode)
 585  					}
 586  					dispoff = pos
 587  					displen = 4
 588  					mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
 589  					pos += 4
 590  				}
 591  
 592  				// Consume disp8 if present.
 593  				if mod == 1 {
 594  					if pos >= len(src) {
 595  						return truncated(src, mode)
 596  					}
 597  					dispoff = pos
 598  					displen = 1
 599  					mem.Disp = int64(int8(src[pos]))
 600  					pos++
 601  				}
 602  
 603  				// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
 604  				// See Vol 2A. Table 2-7.
 605  				if mode == 64 && mod == 0 && rm&7 == 5 {
 606  					if addrMode == 32 {
 607  						mem.Base = EIP
 608  					} else {
 609  						mem.Base = RIP
 610  					}
 611  				}
 612  			}
 613  
 614  			if segIndex >= 0 {
 615  				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
 616  			}
 617  		}
 618  
 619  		// Execute single opcode.
 620  		switch decodeOp(x) {
 621  		default:
 622  			println("bad op", x, "at", pc-1, "from", oldPC)
 623  			return Inst{Len: pos}, errInternal
 624  
 625  		case xFail:
 626  			inst.Op = 0
 627  			break Decode
 628  
 629  		case xMatch:
 630  			break Decode
 631  
 632  		case xJump:
 633  			pc = int(decoder[pc])
 634  
 635  		// Conditional branches.
 636  
 637  		case xCondByte:
 638  			if pos >= len(src) {
 639  				return truncated(src, mode)
 640  			}
 641  			b := src[pos]
 642  			n := int(decoder[pc])
 643  			pc++
 644  			for i := 0; i < n; i++ {
 645  				xb, xpc := decoder[pc], int(decoder[pc+1])
 646  				pc += 2
 647  				if b == byte(xb) {
 648  					pc = xpc
 649  					pos++
 650  					if opshift >= 0 {
 651  						inst.Opcode |= uint32(b) << uint(opshift)
 652  						opshift -= 8
 653  					}
 654  					continue Decode
 655  				}
 656  			}
 657  			// xCondByte is the only conditional with a fall through,
 658  			// so that it can be used to pick off special cases before
 659  			// an xCondSlash. If the fallthrough instruction is xFail,
 660  			// advance the position so that the decoded instruction
 661  			// size includes the byte we just compared against.
 662  			if decodeOp(decoder[pc]) == xJump {
 663  				pc = int(decoder[pc+1])
 664  			}
 665  			if decodeOp(decoder[pc]) == xFail {
 666  				pos++
 667  			}
 668  
 669  		case xCondIs64:
 670  			if mode == 64 {
 671  				pc = int(decoder[pc+1])
 672  			} else {
 673  				pc = int(decoder[pc])
 674  			}
 675  
 676  		case xCondIsMem:
 677  			mem := haveMem
 678  			if !haveModrm {
 679  				if pos >= len(src) {
 680  					return instPrefix(src[0], mode) // too long
 681  				}
 682  				mem = src[pos]>>6 != 3
 683  			}
 684  			if mem {
 685  				pc = int(decoder[pc+1])
 686  			} else {
 687  				pc = int(decoder[pc])
 688  			}
 689  
 690  		case xCondDataSize:
 691  			switch dataMode {
 692  			case 16:
 693  				if dataSizeIndex >= 0 {
 694  					inst.Prefix[dataSizeIndex] |= PrefixImplicit
 695  				}
 696  				pc = int(decoder[pc])
 697  			case 32:
 698  				if dataSizeIndex >= 0 {
 699  					inst.Prefix[dataSizeIndex] |= PrefixImplicit
 700  				}
 701  				pc = int(decoder[pc+1])
 702  			case 64:
 703  				rexUsed |= PrefixREXW
 704  				pc = int(decoder[pc+2])
 705  			}
 706  
 707  		case xCondAddrSize:
 708  			switch addrMode {
 709  			case 16:
 710  				if addrSizeIndex >= 0 {
 711  					inst.Prefix[addrSizeIndex] |= PrefixImplicit
 712  				}
 713  				pc = int(decoder[pc])
 714  			case 32:
 715  				if addrSizeIndex >= 0 {
 716  					inst.Prefix[addrSizeIndex] |= PrefixImplicit
 717  				}
 718  				pc = int(decoder[pc+1])
 719  			case 64:
 720  				pc = int(decoder[pc+2])
 721  			}
 722  
 723  		case xCondPrefix:
 724  			// Conditional branch based on presence or absence of prefixes.
 725  			// The conflict cases here are completely undocumented and
 726  			// differ significantly between GNU libopcodes and Intel xed.
 727  			// I have not written assembly code to divine what various CPUs
 728  			// do, but it wouldn't surprise me if they are not consistent either.
 729  			//
 730  			// The basic idea is to switch on the presence of a prefix, so that
 731  			// for example:
 732  			//
 733  			//	xCondPrefix, 4
 734  			//	0xF3, 123,
 735  			//	0xF2, 234,
 736  			//	0x66, 345,
 737  			//	0, 456
 738  			//
 739  			// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
 740  			// is present, 66 if the 345 prefix is present, and 456 otherwise.
 741  			// The prefixes are given in descending order so that the 0 will be last.
 742  			//
 743  			// It is unclear what should happen if multiple conditions are
 744  			// satisfied: what if F2 and F3 are both present, or if 66 and F2
 745  			// are present, or if all three are present? The one chosen becomes
 746  			// part of the opcode and the others do not. Perhaps the answer
 747  			// depends on the specific opcodes in question.
 748  			//
 749  			// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
 750  			// it comes in 16-bit and 32-bit forms based on the 66 prefix,
 751  			// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
 752  			// with the 66 being only an operand size override, and probably
 753  			// F2 66 0F 38 F1 /r should be treated the same.
 754  			// Perhaps that rule is specific to the case of CRC32, since no
 755  			// 66 0F 38 F1 instruction is defined (today) (that we know of).
 756  			// However, both libopcodes and xed seem to generalize this
 757  			// example and choose F2/F3 in preference to 66, and we
 758  			// do the same.
 759  			//
 760  			// Next, what if both F2 and F3 are present? Which wins?
 761  			// The Intel xed rule, and ours, is that the one that occurs last wins.
 762  			// The GNU libopcodes rule, which we implement only in gnuCompat mode,
 763  			// is that F3 beats F2 unless F3 has no special meaning, in which
 764  			// case F3 can be a modified on an F2 special meaning.
 765  			//
 766  			// Concretely,
 767  			//	66 0F D6 /r is MOVQ
 768  			//	F2 0F D6 /r is MOVDQ2Q
 769  			//	F3 0F D6 /r is MOVQ2DQ.
 770  			//
 771  			//	F2 66 0F D6 /r is 66 + MOVDQ2Q always.
 772  			//	66 F2 0F D6 /r is 66 + MOVDQ2Q always.
 773  			//	F3 66 0F D6 /r is 66 + MOVQ2DQ always.
 774  			//	66 F3 0F D6 /r is 66 + MOVQ2DQ always.
 775  			//	F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
 776  			//	F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
 777  			//	Adding 66 anywhere in the prefix section of the
 778  			//	last two cases does not change the outcome.
 779  			//
 780  			// Finally, what if there is a variant in which 66 is a mandatory
 781  			// prefix rather than an operand size override, but we know of
 782  			// no corresponding F2/F3 form, and we see both F2/F3 and 66.
 783  			// Does F2/F3 still take priority, so that the result is an unknown
 784  			// instruction, or does the 66 take priority, so that the extended
 785  			// 66 instruction should be interpreted as having a REP/REPN prefix?
 786  			// Intel xed does the former and GNU libopcodes does the latter.
 787  			// We side with Intel xed, unless we are trying to match libopcodes
 788  			// more closely during the comparison-based test suite.
 789  			//
 790  			// In 64-bit mode REX.W is another valid prefix to test for, but
 791  			// there is less ambiguity about that. When present, REX.W is
 792  			// always the first entry in the table.
 793  			n := int(decoder[pc])
 794  			pc++
 795  			sawF3 := false
 796  			for j := 0; j < n; j++ {
 797  				prefix := Prefix(decoder[pc+2*j])
 798  				if prefix.IsREX() {
 799  					rexUsed |= prefix
 800  					if rex&prefix == prefix {
 801  						pc = int(decoder[pc+2*j+1])
 802  						continue Decode
 803  					}
 804  					continue
 805  				}
 806  				ok := false
 807  				if prefix == 0 {
 808  					ok = true
 809  				} else if prefix.IsREX() {
 810  					rexUsed |= prefix
 811  					if rex&prefix == prefix {
 812  						ok = true
 813  					}
 814  				} else if prefix == 0xC5 || prefix == 0xC4 {
 815  					if vex == prefix {
 816  						ok = true
 817  					}
 818  				} else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A ||
 819  					prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) {
 820  					var vexM, vexP Prefix
 821  					if vex == 0xC5 {
 822  						vexM = 1 // 2 byte vex always implies 0F
 823  						vexP = inst.Prefix[vexIndex+1]
 824  					} else {
 825  						vexM = inst.Prefix[vexIndex+1]
 826  						vexP = inst.Prefix[vexIndex+2]
 827  					}
 828  					switch prefix {
 829  					case 0x66:
 830  						ok = vexP&3 == 1
 831  					case 0xF3:
 832  						ok = vexP&3 == 2
 833  					case 0xF2:
 834  						ok = vexP&3 == 3
 835  					case 0x0F:
 836  						ok = vexM&3 == 1
 837  					case 0x0F38:
 838  						ok = vexM&3 == 2
 839  					case 0x0F3A:
 840  						ok = vexM&3 == 3
 841  					}
 842  				} else {
 843  					if prefix == 0xF3 {
 844  						sawF3 = true
 845  					}
 846  					switch prefix {
 847  					case PrefixLOCK:
 848  						if lockIndex >= 0 {
 849  							inst.Prefix[lockIndex] |= PrefixImplicit
 850  							ok = true
 851  						}
 852  					case PrefixREP, PrefixREPN:
 853  						if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
 854  							inst.Prefix[repIndex] |= PrefixImplicit
 855  							ok = true
 856  						}
 857  						if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
 858  							// Check to see if earlier prefix F3 is present.
 859  							for i := repIndex - 1; i >= 0; i-- {
 860  								if inst.Prefix[i]&0xFF == prefix {
 861  									inst.Prefix[i] |= PrefixImplicit
 862  									ok = true
 863  								}
 864  							}
 865  						}
 866  						if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
 867  							// Check to see if earlier prefix F2 is present.
 868  							for i := repIndex - 1; i >= 0; i-- {
 869  								if inst.Prefix[i]&0xFF == prefix {
 870  									inst.Prefix[i] |= PrefixImplicit
 871  									ok = true
 872  								}
 873  							}
 874  						}
 875  					case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
 876  						if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
 877  							inst.Prefix[segIndex] |= PrefixImplicit
 878  							ok = true
 879  						}
 880  					case PrefixDataSize:
 881  						// Looking for 66 mandatory prefix.
 882  						// The F2/F3 mandatory prefixes take priority when both are present.
 883  						// If we got this far in the xCondPrefix table and an F2/F3 is present,
 884  						// it means the table didn't have any entry for that prefix. But if 66 has
 885  						// special meaning, perhaps F2/F3 have special meaning that we don't know.
 886  						// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
 887  						// GNU libopcodes allows the 66 to match. We do what Intel xed does
 888  						// except in gnuCompat mode.
 889  						if repIndex >= 0 && !gnuCompat {
 890  							inst.Op = 0
 891  							break Decode
 892  						}
 893  						if dataSizeIndex >= 0 {
 894  							inst.Prefix[dataSizeIndex] |= PrefixImplicit
 895  							ok = true
 896  						}
 897  					case PrefixAddrSize:
 898  						if addrSizeIndex >= 0 {
 899  							inst.Prefix[addrSizeIndex] |= PrefixImplicit
 900  							ok = true
 901  						}
 902  					}
 903  				}
 904  				if ok {
 905  					pc = int(decoder[pc+2*j+1])
 906  					continue Decode
 907  				}
 908  			}
 909  			inst.Op = 0
 910  			break Decode
 911  
 912  		case xCondSlashR:
 913  			pc = int(decoder[pc+regop&7])
 914  
 915  		// Input.
 916  
 917  		case xReadSlashR:
 918  			// done above
 919  
 920  		case xReadIb:
 921  			if pos >= len(src) {
 922  				return truncated(src, mode)
 923  			}
 924  			imm8 = int8(src[pos])
 925  			pos++
 926  
 927  		case xReadIw:
 928  			if pos+2 > len(src) {
 929  				return truncated(src, mode)
 930  			}
 931  			imm = int64(binary.LittleEndian.Uint16(src[pos:]))
 932  			pos += 2
 933  
 934  		case xReadId:
 935  			if pos+4 > len(src) {
 936  				return truncated(src, mode)
 937  			}
 938  			imm = int64(binary.LittleEndian.Uint32(src[pos:]))
 939  			pos += 4
 940  
 941  		case xReadIo:
 942  			if pos+8 > len(src) {
 943  				return truncated(src, mode)
 944  			}
 945  			imm = int64(binary.LittleEndian.Uint64(src[pos:]))
 946  			pos += 8
 947  
 948  		case xReadCb:
 949  			if pos >= len(src) {
 950  				return truncated(src, mode)
 951  			}
 952  			immcpos = pos
 953  			immc = int64(src[pos])
 954  			pos++
 955  
 956  		case xReadCw:
 957  			if pos+2 > len(src) {
 958  				return truncated(src, mode)
 959  			}
 960  			immcpos = pos
 961  			immc = int64(binary.LittleEndian.Uint16(src[pos:]))
 962  			pos += 2
 963  
 964  		case xReadCm:
 965  			immcpos = pos
 966  			if addrMode == 16 {
 967  				if pos+2 > len(src) {
 968  					return truncated(src, mode)
 969  				}
 970  				immc = int64(binary.LittleEndian.Uint16(src[pos:]))
 971  				pos += 2
 972  			} else if addrMode == 32 {
 973  				if pos+4 > len(src) {
 974  					return truncated(src, mode)
 975  				}
 976  				immc = int64(binary.LittleEndian.Uint32(src[pos:]))
 977  				pos += 4
 978  			} else {
 979  				if pos+8 > len(src) {
 980  					return truncated(src, mode)
 981  				}
 982  				immc = int64(binary.LittleEndian.Uint64(src[pos:]))
 983  				pos += 8
 984  			}
 985  		case xReadCd:
 986  			immcpos = pos
 987  			if pos+4 > len(src) {
 988  				return truncated(src, mode)
 989  			}
 990  			immc = int64(binary.LittleEndian.Uint32(src[pos:]))
 991  			pos += 4
 992  
 993  		case xReadCp:
 994  			immcpos = pos
 995  			if pos+6 > len(src) {
 996  				return truncated(src, mode)
 997  			}
 998  			w := binary.LittleEndian.Uint32(src[pos:])
 999  			w2 := binary.LittleEndian.Uint16(src[pos+4:])
1000  			immc = int64(w2)<<32 | int64(w)
1001  			pos += 6
1002  
1003  		// Output.
1004  
1005  		case xSetOp:
1006  			inst.Op = Op(decoder[pc])
1007  			pc++
1008  
1009  		case xArg1,
1010  			xArg3,
1011  			xArgAL,
1012  			xArgAX,
1013  			xArgCL,
1014  			xArgCS,
1015  			xArgDS,
1016  			xArgDX,
1017  			xArgEAX,
1018  			xArgEDX,
1019  			xArgES,
1020  			xArgFS,
1021  			xArgGS,
1022  			xArgRAX,
1023  			xArgRDX,
1024  			xArgSS,
1025  			xArgST,
1026  			xArgXMM0:
1027  			inst.Args[narg] = fixedArg[x]
1028  			narg++
1029  
1030  		case xArgImm8:
1031  			inst.Args[narg] = Imm(imm8)
1032  			narg++
1033  
1034  		case xArgImm8u:
1035  			inst.Args[narg] = Imm(uint8(imm8))
1036  			narg++
1037  
1038  		case xArgImm16:
1039  			inst.Args[narg] = Imm(int16(imm))
1040  			narg++
1041  
1042  		case xArgImm16u:
1043  			inst.Args[narg] = Imm(uint16(imm))
1044  			narg++
1045  
1046  		case xArgImm32:
1047  			inst.Args[narg] = Imm(int32(imm))
1048  			narg++
1049  
1050  		case xArgImm64:
1051  			inst.Args[narg] = Imm(imm)
1052  			narg++
1053  
1054  		case xArgM,
1055  			xArgM128,
1056  			xArgM256,
1057  			xArgM1428byte,
1058  			xArgM16,
1059  			xArgM16and16,
1060  			xArgM16and32,
1061  			xArgM16and64,
1062  			xArgM16colon16,
1063  			xArgM16colon32,
1064  			xArgM16colon64,
1065  			xArgM16int,
1066  			xArgM2byte,
1067  			xArgM32,
1068  			xArgM32and32,
1069  			xArgM32fp,
1070  			xArgM32int,
1071  			xArgM512byte,
1072  			xArgM64,
1073  			xArgM64fp,
1074  			xArgM64int,
1075  			xArgM8,
1076  			xArgM80bcd,
1077  			xArgM80dec,
1078  			xArgM80fp,
1079  			xArgM94108byte,
1080  			xArgMem:
1081  			if !haveMem {
1082  				inst.Op = 0
1083  				break Decode
1084  			}
1085  			inst.Args[narg] = mem
1086  			inst.MemBytes = int(memBytes[decodeOp(x)])
1087  			if mem.Base == RIP {
1088  				inst.PCRel = displen
1089  				inst.PCRelOff = dispoff
1090  			}
1091  			narg++
1092  
1093  		case xArgPtr16colon16:
1094  			inst.Args[narg] = Imm(immc >> 16)
1095  			inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
1096  			narg += 2
1097  
1098  		case xArgPtr16colon32:
1099  			inst.Args[narg] = Imm(immc >> 32)
1100  			inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
1101  			narg += 2
1102  
1103  		case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
1104  			// TODO(rsc): Can address be 64 bits?
1105  			mem = Mem{Disp: int64(immc)}
1106  			if segIndex >= 0 {
1107  				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
1108  				inst.Prefix[segIndex] |= PrefixImplicit
1109  			}
1110  			inst.Args[narg] = mem
1111  			inst.MemBytes = int(memBytes[decodeOp(x)])
1112  			if mem.Base == RIP {
1113  				inst.PCRel = displen
1114  				inst.PCRelOff = dispoff
1115  			}
1116  			narg++
1117  
1118  		case xArgYmm1:
1119  			base := baseReg[x]
1120  			index := Reg(regop)
1121  			if inst.Prefix[vexIndex+1]&0x80 == 0 {
1122  				index += 8
1123  			}
1124  			inst.Args[narg] = base + index
1125  			narg++
1126  
1127  		case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
1128  			base := baseReg[x]
1129  			index := Reg(regop)
1130  			if rex != 0 && base == AL && index >= 4 {
1131  				rexUsed |= PrefixREX
1132  				index -= 4
1133  				base = SPB
1134  			}
1135  			inst.Args[narg] = base + index
1136  			narg++
1137  
1138  		case xArgMm, xArgMm1, xArgTR0dashTR7:
1139  			inst.Args[narg] = baseReg[x] + Reg(regop&7)
1140  			narg++
1141  
1142  		case xArgCR0dashCR7:
1143  			// AMD documents an extension that the LOCK prefix
1144  			// can be used in place of a REX prefix in order to access
1145  			// CR8 from 32-bit mode. The LOCK prefix is allowed in
1146  			// all modes, provided the corresponding CPUID bit is set.
1147  			if lockIndex >= 0 {
1148  				inst.Prefix[lockIndex] |= PrefixImplicit
1149  				regop += 8
1150  			}
1151  			inst.Args[narg] = CR0 + Reg(regop)
1152  			narg++
1153  
1154  		case xArgSreg:
1155  			regop &= 7
1156  			if regop >= 6 {
1157  				inst.Op = 0
1158  				break Decode
1159  			}
1160  			inst.Args[narg] = ES + Reg(regop)
1161  			narg++
1162  
1163  		case xArgRmf16, xArgRmf32, xArgRmf64:
1164  			base := baseReg[x]
1165  			index := Reg(modrm & 07)
1166  			if rex&PrefixREXB != 0 {
1167  				rexUsed |= PrefixREXB
1168  				index += 8
1169  			}
1170  			inst.Args[narg] = base + index
1171  			narg++
1172  
1173  		case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
1174  			n := inst.Opcode >> uint(opshift+8) & 07
1175  			base := baseReg[x]
1176  			index := Reg(n)
1177  			if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
1178  				rexUsed |= PrefixREXB
1179  				index += 8
1180  			}
1181  			if rex != 0 && base == AL && index >= 4 {
1182  				rexUsed |= PrefixREX
1183  				index -= 4
1184  				base = SPB
1185  			}
1186  			inst.Args[narg] = base + index
1187  			narg++
1188  		case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
1189  			xArgMmM32, xArgMmM64, xArgMm2M64,
1190  			xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128,
1191  			xArgYmm2M256:
1192  			if haveMem {
1193  				inst.Args[narg] = mem
1194  				inst.MemBytes = int(memBytes[decodeOp(x)])
1195  				if mem.Base == RIP {
1196  					inst.PCRel = displen
1197  					inst.PCRelOff = dispoff
1198  				}
1199  			} else {
1200  				base := baseReg[x]
1201  				index := Reg(rm)
1202  				switch decodeOp(x) {
1203  				case xArgMmM32, xArgMmM64, xArgMm2M64:
1204  					// There are only 8 MMX registers, so these ignore the REX.X bit.
1205  					index &= 7
1206  				case xArgRM8:
1207  					if rex != 0 && index >= 4 {
1208  						rexUsed |= PrefixREX
1209  						index -= 4
1210  						base = SPB
1211  					}
1212  				case xArgYmm2M256:
1213  					if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 {
1214  						index += 8
1215  					}
1216  				}
1217  				inst.Args[narg] = base + index
1218  			}
1219  			narg++
1220  
1221  		case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
1222  			if haveMem {
1223  				inst.Op = 0
1224  				break Decode
1225  			}
1226  			inst.Args[narg] = baseReg[x] + Reg(rm&7)
1227  			narg++
1228  
1229  		case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
1230  			if haveMem {
1231  				inst.Op = 0
1232  				break Decode
1233  			}
1234  			inst.Args[narg] = baseReg[x] + Reg(rm)
1235  			narg++
1236  
1237  		case xArgRel8:
1238  			inst.PCRelOff = immcpos
1239  			inst.PCRel = 1
1240  			inst.Args[narg] = Rel(int8(immc))
1241  			narg++
1242  
1243  		case xArgRel16:
1244  			inst.PCRelOff = immcpos
1245  			inst.PCRel = 2
1246  			inst.Args[narg] = Rel(int16(immc))
1247  			narg++
1248  
1249  		case xArgRel32:
1250  			inst.PCRelOff = immcpos
1251  			inst.PCRel = 4
1252  			inst.Args[narg] = Rel(int32(immc))
1253  			narg++
1254  		}
1255  	}
1256  
1257  	if inst.Op == 0 {
1258  		// Invalid instruction.
1259  		if nprefix > 0 {
1260  			return instPrefix(src[0], mode) // invalid instruction
1261  		}
1262  		return Inst{Len: pos}, ErrUnrecognized
1263  	}
1264  
1265  	// Matched! Hooray!
1266  
1267  	// 90 decodes as XCHG EAX, EAX but is NOP.
1268  	// 66 90 decodes as XCHG AX, AX and is NOP too.
1269  	// 48 90 decodes as XCHG RAX, RAX and is NOP too.
1270  	// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
1271  	// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
1272  	// It's all too special to handle in the decoding tables, at least for now.
1273  	if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
1274  		if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
1275  			inst.Op = NOP
1276  			if dataSizeIndex >= 0 {
1277  				inst.Prefix[dataSizeIndex] &^= PrefixImplicit
1278  			}
1279  			inst.Args[0] = nil
1280  			inst.Args[1] = nil
1281  		}
1282  		if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
1283  			inst.Prefix[repIndex] |= PrefixImplicit
1284  			inst.Op = PAUSE
1285  			inst.Args[0] = nil
1286  			inst.Args[1] = nil
1287  		} else if gnuCompat {
1288  			for i := nprefix - 1; i >= 0; i-- {
1289  				if inst.Prefix[i]&0xFF == 0xF3 {
1290  					inst.Prefix[i] |= PrefixImplicit
1291  					inst.Op = PAUSE
1292  					inst.Args[0] = nil
1293  					inst.Args[1] = nil
1294  					break
1295  				}
1296  			}
1297  		}
1298  	}
1299  
1300  	// defaultSeg returns the default segment for an implicit
1301  	// memory reference: the final override if present, or else DS.
1302  	defaultSeg := func() Reg {
1303  		if segIndex >= 0 {
1304  			inst.Prefix[segIndex] |= PrefixImplicit
1305  			return prefixToSegment(inst.Prefix[segIndex])
1306  		}
1307  		return DS
1308  	}
1309  
1310  	// Add implicit arguments not present in the tables.
1311  	// Normally we shy away from making implicit arguments explicit,
1312  	// following the Intel manuals, but adding the arguments seems
1313  	// the best way to express the effect of the segment override prefixes.
1314  	// TODO(rsc): Perhaps add these to the tables and
1315  	// create bytecode instructions for them.
1316  	usedAddrSize := false
1317  	switch inst.Op {
1318  	case INSB, INSW, INSD:
1319  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1320  		inst.Args[1] = DX
1321  		usedAddrSize = true
1322  
1323  	case OUTSB, OUTSW, OUTSD:
1324  		inst.Args[0] = DX
1325  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1326  		usedAddrSize = true
1327  
1328  	case MOVSB, MOVSW, MOVSD, MOVSQ:
1329  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1330  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1331  		usedAddrSize = true
1332  
1333  	case CMPSB, CMPSW, CMPSD, CMPSQ:
1334  		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1335  		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1336  		usedAddrSize = true
1337  
1338  	case LODSB, LODSW, LODSD, LODSQ:
1339  		switch inst.Op {
1340  		case LODSB:
1341  			inst.Args[0] = AL
1342  		case LODSW:
1343  			inst.Args[0] = AX
1344  		case LODSD:
1345  			inst.Args[0] = EAX
1346  		case LODSQ:
1347  			inst.Args[0] = RAX
1348  		}
1349  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1350  		usedAddrSize = true
1351  
1352  	case STOSB, STOSW, STOSD, STOSQ:
1353  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1354  		switch inst.Op {
1355  		case STOSB:
1356  			inst.Args[1] = AL
1357  		case STOSW:
1358  			inst.Args[1] = AX
1359  		case STOSD:
1360  			inst.Args[1] = EAX
1361  		case STOSQ:
1362  			inst.Args[1] = RAX
1363  		}
1364  		usedAddrSize = true
1365  
1366  	case SCASB, SCASW, SCASD, SCASQ:
1367  		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1368  		switch inst.Op {
1369  		case SCASB:
1370  			inst.Args[0] = AL
1371  		case SCASW:
1372  			inst.Args[0] = AX
1373  		case SCASD:
1374  			inst.Args[0] = EAX
1375  		case SCASQ:
1376  			inst.Args[0] = RAX
1377  		}
1378  		usedAddrSize = true
1379  
1380  	case XLATB:
1381  		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
1382  		usedAddrSize = true
1383  	}
1384  
1385  	// If we used the address size annotation to construct the
1386  	// argument list, mark that prefix as implicit: it doesn't need
1387  	// to be shown when printing the instruction.
1388  	if haveMem || usedAddrSize {
1389  		if addrSizeIndex >= 0 {
1390  			inst.Prefix[addrSizeIndex] |= PrefixImplicit
1391  		}
1392  	}
1393  
1394  	// Similarly, if there's some memory operand, the segment
1395  	// will be shown there and doesn't need to be shown as an
1396  	// explicit prefix.
1397  	if haveMem {
1398  		if segIndex >= 0 {
1399  			inst.Prefix[segIndex] |= PrefixImplicit
1400  		}
1401  	}
1402  
1403  	// Branch predict prefixes are overloaded segment prefixes,
1404  	// since segment prefixes don't make sense on conditional jumps.
1405  	// Rewrite final instance to prediction prefix.
1406  	// The set of instructions to which the prefixes apply (other then the
1407  	// Jcc conditional jumps) is not 100% clear from the manuals, but
1408  	// the disassemblers seem to agree about the LOOP and JCXZ instructions,
1409  	// so we'll follow along.
1410  	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1411  	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
1412  	PredictLoop:
1413  		for i := nprefix - 1; i >= 0; i-- {
1414  			p := inst.Prefix[i]
1415  			switch p & 0xFF {
1416  			case PrefixCS:
1417  				inst.Prefix[i] = PrefixPN
1418  				break PredictLoop
1419  			case PrefixDS:
1420  				inst.Prefix[i] = PrefixPT
1421  				break PredictLoop
1422  			}
1423  		}
1424  	}
1425  
1426  	// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
1427  	// A REPN applied to certain control transfers is a BND prefix to bound
1428  	// the range of possible destinations. There's surprisingly little documentation
1429  	// about this, so we just do what libopcodes and xed agree on.
1430  	// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
1431  	// does not turn into a BND.
1432  	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1433  	if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
1434  		for i := nprefix - 1; i >= 0; i-- {
1435  			p := inst.Prefix[i]
1436  			if p&^PrefixIgnored == PrefixREPN {
1437  				inst.Prefix[i] = PrefixBND
1438  				break
1439  			}
1440  		}
1441  	}
1442  
1443  	// The LOCK prefix only applies to certain instructions, and then only
1444  	// to instances of the instruction with a memory destination.
1445  	// Other uses of LOCK are invalid and cause a processor exception,
1446  	// in contrast to the "just ignore it" spirit applied to all other prefixes.
1447  	// Mark invalid lock prefixes.
1448  	hasLock := false
1449  	if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
1450  		switch inst.Op {
1451  		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1452  		case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
1453  			if isMem(inst.Args[0]) {
1454  				hasLock = true
1455  				break
1456  			}
1457  			fallthrough
1458  		default:
1459  			inst.Prefix[lockIndex] |= PrefixInvalid
1460  		}
1461  	}
1462  
1463  	// In certain cases, all of which require a memory destination,
1464  	// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
1465  	// from the Intel Transactional Synchroniation Extensions (TSX).
1466  	//
1467  	// The specific rules are:
1468  	// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
1469  	// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
1470  	// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
1471  	if isMem(inst.Args[0]) {
1472  		if inst.Op == XCHG {
1473  			hasLock = true
1474  		}
1475  
1476  		for i := len(inst.Prefix) - 1; i >= 0; i-- {
1477  			p := inst.Prefix[i] &^ PrefixIgnored
1478  			switch p {
1479  			case PrefixREPN:
1480  				if hasLock {
1481  					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
1482  				}
1483  
1484  			case PrefixREP:
1485  				if hasLock {
1486  					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
1487  				}
1488  
1489  				if inst.Op == MOV {
1490  					op := (inst.Opcode >> 24) &^ 1
1491  					if op == 0x88 || op == 0xC6 {
1492  						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
1493  					}
1494  				}
1495  			}
1496  		}
1497  	}
1498  
1499  	// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
1500  	if repIndex >= 0 {
1501  		switch inst.Prefix[repIndex] {
1502  		case PrefixREP, PrefixREPN:
1503  			switch inst.Op {
1504  			// According to the manuals, the REP/REPE prefix applies to all of these,
1505  			// while the REPN applies only to some of them. However, both libopcodes
1506  			// and xed show both prefixes explicitly for all instructions, so we do the same.
1507  			// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1508  			case INSB, INSW, INSD,
1509  				MOVSB, MOVSW, MOVSD, MOVSQ,
1510  				OUTSB, OUTSW, OUTSD,
1511  				LODSB, LODSW, LODSD, LODSQ,
1512  				CMPSB, CMPSW, CMPSD, CMPSQ,
1513  				SCASB, SCASW, SCASD, SCASQ,
1514  				STOSB, STOSW, STOSD, STOSQ:
1515  				// ok
1516  			default:
1517  				inst.Prefix[repIndex] |= PrefixIgnored
1518  			}
1519  		}
1520  	}
1521  
1522  	// If REX was present, mark implicit if all the 1 bits were consumed.
1523  	if rexIndex >= 0 {
1524  		if rexUsed != 0 {
1525  			rexUsed |= PrefixREX
1526  		}
1527  		if rex&^rexUsed == 0 {
1528  			inst.Prefix[rexIndex] |= PrefixImplicit
1529  		}
1530  	}
1531  
1532  	inst.DataSize = dataMode
1533  	inst.AddrSize = addrMode
1534  	inst.Mode = mode
1535  	inst.Len = pos
1536  	return inst, nil
1537  }
1538  
1539  var errInternal = errors.New("internal error")
1540  
1541  // addr16 records the eight 16-bit addressing modes.
1542  var addr16 = [8]Mem{
1543  	{Base: BX, Scale: 1, Index: SI},
1544  	{Base: BX, Scale: 1, Index: DI},
1545  	{Base: BP, Scale: 1, Index: SI},
1546  	{Base: BP, Scale: 1, Index: DI},
1547  	{Base: SI},
1548  	{Base: DI},
1549  	{Base: BP},
1550  	{Base: BX},
1551  }
1552  
1553  // baseRegForBits returns the base register for a given register size in bits.
1554  func baseRegForBits(bits int) Reg {
1555  	switch bits {
1556  	case 8:
1557  		return AL
1558  	case 16:
1559  		return AX
1560  	case 32:
1561  		return EAX
1562  	case 64:
1563  		return RAX
1564  	}
1565  	return 0
1566  }
1567  
1568  // baseReg records the base register for argument types that specify
1569  // a range of registers indexed by op, regop, or rm.
1570  var baseReg = [...]Reg{
1571  	xArgDR0dashDR7: DR0,
1572  	xArgMm1:        M0,
1573  	xArgMm2:        M0,
1574  	xArgMm2M64:     M0,
1575  	xArgMm:         M0,
1576  	xArgMmM32:      M0,
1577  	xArgMmM64:      M0,
1578  	xArgR16:        AX,
1579  	xArgR16op:      AX,
1580  	xArgR32:        EAX,
1581  	xArgR32M16:     EAX,
1582  	xArgR32M8:      EAX,
1583  	xArgR32op:      EAX,
1584  	xArgR64:        RAX,
1585  	xArgR64M16:     RAX,
1586  	xArgR64op:      RAX,
1587  	xArgR8:         AL,
1588  	xArgR8op:       AL,
1589  	xArgRM16:       AX,
1590  	xArgRM32:       EAX,
1591  	xArgRM64:       RAX,
1592  	xArgRM8:        AL,
1593  	xArgRmf16:      AX,
1594  	xArgRmf32:      EAX,
1595  	xArgRmf64:      RAX,
1596  	xArgSTi:        F0,
1597  	xArgTR0dashTR7: TR0,
1598  	xArgXmm1:       X0,
1599  	xArgYmm1:       X0,
1600  	xArgXmm2:       X0,
1601  	xArgXmm2M128:   X0,
1602  	xArgYmm2M256:   X0,
1603  	xArgXmm2M16:    X0,
1604  	xArgXmm2M32:    X0,
1605  	xArgXmm2M64:    X0,
1606  	xArgXmm:        X0,
1607  	xArgXmmM128:    X0,
1608  	xArgXmmM32:     X0,
1609  	xArgXmmM64:     X0,
1610  }
1611  
1612  // prefixToSegment returns the segment register
1613  // corresponding to a particular segment prefix.
1614  func prefixToSegment(p Prefix) Reg {
1615  	switch p &^ PrefixImplicit {
1616  	case PrefixCS:
1617  		return CS
1618  	case PrefixDS:
1619  		return DS
1620  	case PrefixES:
1621  		return ES
1622  	case PrefixFS:
1623  		return FS
1624  	case PrefixGS:
1625  		return GS
1626  	case PrefixSS:
1627  		return SS
1628  	}
1629  	return 0
1630  }
1631  
1632  // fixedArg records the fixed arguments corresponding to the given bytecodes.
1633  var fixedArg = [...]Arg{
1634  	xArg1:    Imm(1),
1635  	xArg3:    Imm(3),
1636  	xArgAL:   AL,
1637  	xArgAX:   AX,
1638  	xArgDX:   DX,
1639  	xArgEAX:  EAX,
1640  	xArgEDX:  EDX,
1641  	xArgRAX:  RAX,
1642  	xArgRDX:  RDX,
1643  	xArgCL:   CL,
1644  	xArgCS:   CS,
1645  	xArgDS:   DS,
1646  	xArgES:   ES,
1647  	xArgFS:   FS,
1648  	xArgGS:   GS,
1649  	xArgSS:   SS,
1650  	xArgST:   F0,
1651  	xArgXMM0: X0,
1652  }
1653  
1654  // memBytes records the size of the memory pointed at
1655  // by a memory argument of the given form.
1656  var memBytes = [...]int8{
1657  	xArgM128:       128 / 8,
1658  	xArgM256:       256 / 8,
1659  	xArgM16:        16 / 8,
1660  	xArgM16and16:   (16 + 16) / 8,
1661  	xArgM16colon16: (16 + 16) / 8,
1662  	xArgM16colon32: (16 + 32) / 8,
1663  	xArgM16int:     16 / 8,
1664  	xArgM2byte:     2,
1665  	xArgM32:        32 / 8,
1666  	xArgM32and32:   (32 + 32) / 8,
1667  	xArgM32fp:      32 / 8,
1668  	xArgM32int:     32 / 8,
1669  	xArgM64:        64 / 8,
1670  	xArgM64fp:      64 / 8,
1671  	xArgM64int:     64 / 8,
1672  	xArgMm2M64:     64 / 8,
1673  	xArgMmM32:      32 / 8,
1674  	xArgMmM64:      64 / 8,
1675  	xArgMoffs16:    16 / 8,
1676  	xArgMoffs32:    32 / 8,
1677  	xArgMoffs64:    64 / 8,
1678  	xArgMoffs8:     8 / 8,
1679  	xArgR32M16:     16 / 8,
1680  	xArgR32M8:      8 / 8,
1681  	xArgR64M16:     16 / 8,
1682  	xArgRM16:       16 / 8,
1683  	xArgRM32:       32 / 8,
1684  	xArgRM64:       64 / 8,
1685  	xArgRM8:        8 / 8,
1686  	xArgXmm2M128:   128 / 8,
1687  	xArgYmm2M256:   256 / 8,
1688  	xArgXmm2M16:    16 / 8,
1689  	xArgXmm2M32:    32 / 8,
1690  	xArgXmm2M64:    64 / 8,
1691  	xArgXmm:        128 / 8,
1692  	xArgXmmM128:    128 / 8,
1693  	xArgXmmM32:     32 / 8,
1694  	xArgXmmM64:     64 / 8,
1695  }
1696  
1697  // isCondJmp records the conditional jumps.
1698  var isCondJmp = [maxOp + 1]bool{
1699  	JA:  true,
1700  	JAE: true,
1701  	JB:  true,
1702  	JBE: true,
1703  	JE:  true,
1704  	JG:  true,
1705  	JGE: true,
1706  	JL:  true,
1707  	JLE: true,
1708  	JNE: true,
1709  	JNO: true,
1710  	JNP: true,
1711  	JNS: true,
1712  	JO:  true,
1713  	JP:  true,
1714  	JS:  true,
1715  }
1716  
1717  // isLoop records the loop operators.
1718  var isLoop = [maxOp + 1]bool{
1719  	LOOP:   true,
1720  	LOOPE:  true,
1721  	LOOPNE: true,
1722  	JECXZ:  true,
1723  	JRCXZ:  true,
1724  }
1725