asm6.go raw
1 // Inferno utils/6l/span.c
2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
3 //
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6 // Portions Copyright © 1997-1999 Vita Nuova Limited
7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8 // Portions Copyright © 2004,2006 Bruce Ellis
9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11 // Portions Copyright © 2009 The Go Authors. All rights reserved.
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a copy
14 // of this software and associated documentation files (the "Software"), to deal
15 // in the Software without restriction, including without limitation the rights
16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 // copies of the Software, and to permit persons to whom the Software is
18 // furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included in
21 // all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 // THE SOFTWARE.
30
31 package x86
32
33 import (
34 "github.com/twitchyliquid64/golang-asm/obj"
35 "github.com/twitchyliquid64/golang-asm/objabi"
36 "github.com/twitchyliquid64/golang-asm/sys"
37 "encoding/binary"
38 "fmt"
39 "log"
40 "strings"
41 )
42
43 var (
44 plan9privates *obj.LSym
45 deferreturn *obj.LSym
46 )
47
48 // Instruction layout.
49
50 // Loop alignment constants:
51 // want to align loop entry to loopAlign-byte boundary,
52 // and willing to insert at most maxLoopPad bytes of NOP to do so.
53 // We define a loop entry as the target of a backward jump.
54 //
55 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
56 // and it aligns all jump targets, not just backward jump targets.
57 //
58 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
59 // is very slight but negative, so the alignment is disabled by
60 // setting MaxLoopPad = 0. The code is here for reference and
61 // for future experiments.
62 //
63 const (
64 loopAlign = 16
65 maxLoopPad = 0
66 )
67
68 // Bit flags that are used to express jump target properties.
69 const (
70 // branchBackwards marks targets that are located behind.
71 // Used to express jumps to loop headers.
72 branchBackwards = (1 << iota)
73 // branchShort marks branches those target is close,
74 // with offset is in -128..127 range.
75 branchShort
76 // branchLoopHead marks loop entry.
77 // Used to insert padding for misaligned loops.
78 branchLoopHead
79 )
80
81 // opBytes holds optab encoding bytes.
82 // Each ytab reserves fixed amount of bytes in this array.
83 //
84 // The size should be the minimal number of bytes that
85 // are enough to hold biggest optab op lines.
86 type opBytes [31]uint8
87
88 type Optab struct {
89 as obj.As
90 ytab []ytab
91 prefix uint8
92 op opBytes
93 }
94
95 type movtab struct {
96 as obj.As
97 ft uint8
98 f3t uint8
99 tt uint8
100 code uint8
101 op [4]uint8
102 }
103
104 const (
105 Yxxx = iota
106 Ynone
107 Yi0 // $0
108 Yi1 // $1
109 Yu2 // $x, x fits in uint2
110 Yi8 // $x, x fits in int8
111 Yu8 // $x, x fits in uint8
112 Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
113 Ys32
114 Yi32
115 Yi64
116 Yiauto
117 Yal
118 Ycl
119 Yax
120 Ycx
121 Yrb
122 Yrl
123 Yrl32 // Yrl on 32-bit system
124 Yrf
125 Yf0
126 Yrx
127 Ymb
128 Yml
129 Ym
130 Ybr
131 Ycs
132 Yss
133 Yds
134 Yes
135 Yfs
136 Ygs
137 Ygdtr
138 Yidtr
139 Yldtr
140 Ymsw
141 Ytask
142 Ycr0
143 Ycr1
144 Ycr2
145 Ycr3
146 Ycr4
147 Ycr5
148 Ycr6
149 Ycr7
150 Ycr8
151 Ydr0
152 Ydr1
153 Ydr2
154 Ydr3
155 Ydr4
156 Ydr5
157 Ydr6
158 Ydr7
159 Ytr0
160 Ytr1
161 Ytr2
162 Ytr3
163 Ytr4
164 Ytr5
165 Ytr6
166 Ytr7
167 Ymr
168 Ymm
169 Yxr0 // X0 only. "<XMM0>" notation in Intel manual.
170 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
171 Yxr // X0..X15
172 YxrEvex // X0..X31
173 Yxm
174 YxmEvex // YxrEvex+Ym
175 Yxvm // VSIB vector array; vm32x/vm64x
176 YxvmEvex // Yxvm which permits High-16 X register as index.
177 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
178 Yyr // Y0..Y15
179 YyrEvex // Y0..Y31
180 Yym
181 YymEvex // YyrEvex+Ym
182 Yyvm // VSIB vector array; vm32y/vm64y
183 YyvmEvex // Yyvm which permits High-16 Y register as index.
184 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
185 Yzr // Z0..Z31
186 Yzm // Yzr+Ym
187 Yzvm // VSIB vector array; vm32z/vm64z
188 Yk0 // K0
189 Yknot0 // K1..K7; write mask
190 Yk // K0..K7; used for KOP
191 Ykm // Yk+Ym; used for KOP
192 Ytls
193 Ytextsize
194 Yindir
195 Ymax
196 )
197
198 const (
199 Zxxx = iota
200 Zlit
201 Zlitm_r
202 Zlitr_m
203 Zlit_m_r
204 Z_rp
205 Zbr
206 Zcall
207 Zcallcon
208 Zcallduff
209 Zcallind
210 Zcallindreg
211 Zib_
212 Zib_rp
213 Zibo_m
214 Zibo_m_xm
215 Zil_
216 Zil_rp
217 Ziq_rp
218 Zilo_m
219 Zjmp
220 Zjmpcon
221 Zloop
222 Zo_iw
223 Zm_o
224 Zm_r
225 Z_m_r
226 Zm2_r
227 Zm_r_xm
228 Zm_r_i_xm
229 Zm_r_xm_nr
230 Zr_m_xm_nr
231 Zibm_r // mmx1,mmx2/mem64,imm8
232 Zibr_m
233 Zmb_r
234 Zaut_r
235 Zo_m
236 Zo_m64
237 Zpseudo
238 Zr_m
239 Zr_m_xm
240 Zrp_
241 Z_ib
242 Z_il
243 Zm_ibo
244 Zm_ilo
245 Zib_rr
246 Zil_rr
247 Zbyte
248
249 Zvex_rm_v_r
250 Zvex_rm_v_ro
251 Zvex_r_v_rm
252 Zvex_i_rm_vo
253 Zvex_v_rm_r
254 Zvex_i_rm_r
255 Zvex_i_r_v
256 Zvex_i_rm_v_r
257 Zvex
258 Zvex_rm_r_vo
259 Zvex_i_r_rm
260 Zvex_hr_rm_v_r
261
262 Zevex_first
263 Zevex_i_r_k_rm
264 Zevex_i_r_rm
265 Zevex_i_rm_k_r
266 Zevex_i_rm_k_vo
267 Zevex_i_rm_r
268 Zevex_i_rm_v_k_r
269 Zevex_i_rm_v_r
270 Zevex_i_rm_vo
271 Zevex_k_rmo
272 Zevex_r_k_rm
273 Zevex_r_v_k_rm
274 Zevex_r_v_rm
275 Zevex_rm_k_r
276 Zevex_rm_v_k_r
277 Zevex_rm_v_r
278 Zevex_last
279
280 Zmax
281 )
282
283 const (
284 Px = 0
285 Px1 = 1 // symbolic; exact value doesn't matter
286 P32 = 0x32 // 32-bit only
287 Pe = 0x66 // operand escape
288 Pm = 0x0f // 2byte opcode escape
289 Pq = 0xff // both escapes: 66 0f
290 Pb = 0xfe // byte operands
291 Pf2 = 0xf2 // xmm escape 1: f2 0f
292 Pf3 = 0xf3 // xmm escape 2: f3 0f
293 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
294 Pq3 = 0x67 // xmm escape 3: 66 48 0f
295 Pq4 = 0x68 // xmm escape 4: 66 0F 38
296 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
297 Pq5 = 0x6a // xmm escape 5: F3 0F 38
298 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
299 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f
300 Pw = 0x48 // Rex.w
301 Pw8 = 0x90 // symbolic; exact value doesn't matter
302 Py = 0x80 // defaults to 64-bit mode
303 Py1 = 0x81 // symbolic; exact value doesn't matter
304 Py3 = 0x83 // symbolic; exact value doesn't matter
305 Pavx = 0x84 // symbolic: exact value doesn't matter
306
307 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
308 Rxw = 1 << 3 // =1, 64-bit operand size
309 Rxr = 1 << 2 // extend modrm reg
310 Rxx = 1 << 1 // extend sib index
311 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg
312 )
313
314 const (
315 // Encoding for VEX prefix in tables.
316 // The P, L, and W fields are chosen to match
317 // their eventual locations in the VEX prefix bytes.
318
319 // Encoding for VEX prefix in tables.
320 // The P, L, and W fields are chosen to match
321 // their eventual locations in the VEX prefix bytes.
322
323 // Using spare bit to make leading [E]VEX encoding byte different from
324 // 0x0f even if all other VEX fields are 0.
325 avxEscape = 1 << 6
326
327 // P field - 2 bits
328 vex66 = 1 << 0
329 vexF3 = 2 << 0
330 vexF2 = 3 << 0
331 // L field - 1 bit
332 vexLZ = 0 << 2
333 vexLIG = 0 << 2
334 vex128 = 0 << 2
335 vex256 = 1 << 2
336 // W field - 1 bit
337 vexWIG = 0 << 7
338 vexW0 = 0 << 7
339 vexW1 = 1 << 7
340 // M field - 5 bits, but mostly reserved; we can store up to 3
341 vex0F = 1 << 3
342 vex0F38 = 2 << 3
343 vex0F3A = 3 << 3
344 )
345
346 var ycover [Ymax * Ymax]uint8
347
348 var reg [MAXREG]int
349
350 var regrex [MAXREG + 1]int
351
352 var ynone = []ytab{
353 {Zlit, 1, argList{}},
354 }
355
356 var ytext = []ytab{
357 {Zpseudo, 0, argList{Ymb, Ytextsize}},
358 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
359 }
360
361 var ynop = []ytab{
362 {Zpseudo, 0, argList{}},
363 {Zpseudo, 0, argList{Yiauto}},
364 {Zpseudo, 0, argList{Yml}},
365 {Zpseudo, 0, argList{Yrf}},
366 {Zpseudo, 0, argList{Yxr}},
367 {Zpseudo, 0, argList{Yiauto}},
368 {Zpseudo, 0, argList{Yml}},
369 {Zpseudo, 0, argList{Yrf}},
370 {Zpseudo, 1, argList{Yxr}},
371 }
372
373 var yfuncdata = []ytab{
374 {Zpseudo, 0, argList{Yi32, Ym}},
375 }
376
377 var ypcdata = []ytab{
378 {Zpseudo, 0, argList{Yi32, Yi32}},
379 }
380
381 var yxorb = []ytab{
382 {Zib_, 1, argList{Yi32, Yal}},
383 {Zibo_m, 2, argList{Yi32, Ymb}},
384 {Zr_m, 1, argList{Yrb, Ymb}},
385 {Zm_r, 1, argList{Ymb, Yrb}},
386 }
387
388 var yaddl = []ytab{
389 {Zibo_m, 2, argList{Yi8, Yml}},
390 {Zil_, 1, argList{Yi32, Yax}},
391 {Zilo_m, 2, argList{Yi32, Yml}},
392 {Zr_m, 1, argList{Yrl, Yml}},
393 {Zm_r, 1, argList{Yml, Yrl}},
394 }
395
396 var yincl = []ytab{
397 {Z_rp, 1, argList{Yrl}},
398 {Zo_m, 2, argList{Yml}},
399 }
400
401 var yincq = []ytab{
402 {Zo_m, 2, argList{Yml}},
403 }
404
405 var ycmpb = []ytab{
406 {Z_ib, 1, argList{Yal, Yi32}},
407 {Zm_ibo, 2, argList{Ymb, Yi32}},
408 {Zm_r, 1, argList{Ymb, Yrb}},
409 {Zr_m, 1, argList{Yrb, Ymb}},
410 }
411
412 var ycmpl = []ytab{
413 {Zm_ibo, 2, argList{Yml, Yi8}},
414 {Z_il, 1, argList{Yax, Yi32}},
415 {Zm_ilo, 2, argList{Yml, Yi32}},
416 {Zm_r, 1, argList{Yml, Yrl}},
417 {Zr_m, 1, argList{Yrl, Yml}},
418 }
419
420 var yshb = []ytab{
421 {Zo_m, 2, argList{Yi1, Ymb}},
422 {Zibo_m, 2, argList{Yu8, Ymb}},
423 {Zo_m, 2, argList{Ycx, Ymb}},
424 }
425
426 var yshl = []ytab{
427 {Zo_m, 2, argList{Yi1, Yml}},
428 {Zibo_m, 2, argList{Yu8, Yml}},
429 {Zo_m, 2, argList{Ycl, Yml}},
430 {Zo_m, 2, argList{Ycx, Yml}},
431 }
432
433 var ytestl = []ytab{
434 {Zil_, 1, argList{Yi32, Yax}},
435 {Zilo_m, 2, argList{Yi32, Yml}},
436 {Zr_m, 1, argList{Yrl, Yml}},
437 {Zm_r, 1, argList{Yml, Yrl}},
438 }
439
440 var ymovb = []ytab{
441 {Zr_m, 1, argList{Yrb, Ymb}},
442 {Zm_r, 1, argList{Ymb, Yrb}},
443 {Zib_rp, 1, argList{Yi32, Yrb}},
444 {Zibo_m, 2, argList{Yi32, Ymb}},
445 }
446
447 var ybtl = []ytab{
448 {Zibo_m, 2, argList{Yi8, Yml}},
449 {Zr_m, 1, argList{Yrl, Yml}},
450 }
451
452 var ymovw = []ytab{
453 {Zr_m, 1, argList{Yrl, Yml}},
454 {Zm_r, 1, argList{Yml, Yrl}},
455 {Zil_rp, 1, argList{Yi32, Yrl}},
456 {Zilo_m, 2, argList{Yi32, Yml}},
457 {Zaut_r, 2, argList{Yiauto, Yrl}},
458 }
459
460 var ymovl = []ytab{
461 {Zr_m, 1, argList{Yrl, Yml}},
462 {Zm_r, 1, argList{Yml, Yrl}},
463 {Zil_rp, 1, argList{Yi32, Yrl}},
464 {Zilo_m, 2, argList{Yi32, Yml}},
465 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
466 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
467 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
468 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
469 {Zaut_r, 2, argList{Yiauto, Yrl}},
470 }
471
472 var yret = []ytab{
473 {Zo_iw, 1, argList{}},
474 {Zo_iw, 1, argList{Yi32}},
475 }
476
477 var ymovq = []ytab{
478 // valid in 32-bit mode
479 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding)
480 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ
481 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
482 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
483 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
484
485 // valid only in 64-bit mode, usually with 64-bit prefix
486 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89
487 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b
488 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0)
489 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate
490 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0)
491 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD
492 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD
493 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load
494 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store
495 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
496 }
497
498 var ymovbe = []ytab{
499 {Zlitm_r, 3, argList{Ym, Yrl}},
500 {Zlitr_m, 3, argList{Yrl, Ym}},
501 }
502
503 var ym_rl = []ytab{
504 {Zm_r, 1, argList{Ym, Yrl}},
505 }
506
507 var yrl_m = []ytab{
508 {Zr_m, 1, argList{Yrl, Ym}},
509 }
510
511 var ymb_rl = []ytab{
512 {Zmb_r, 1, argList{Ymb, Yrl}},
513 }
514
515 var yml_rl = []ytab{
516 {Zm_r, 1, argList{Yml, Yrl}},
517 }
518
519 var yrl_ml = []ytab{
520 {Zr_m, 1, argList{Yrl, Yml}},
521 }
522
523 var yml_mb = []ytab{
524 {Zr_m, 1, argList{Yrb, Ymb}},
525 {Zm_r, 1, argList{Ymb, Yrb}},
526 }
527
528 var yrb_mb = []ytab{
529 {Zr_m, 1, argList{Yrb, Ymb}},
530 }
531
532 var yxchg = []ytab{
533 {Z_rp, 1, argList{Yax, Yrl}},
534 {Zrp_, 1, argList{Yrl, Yax}},
535 {Zr_m, 1, argList{Yrl, Yml}},
536 {Zm_r, 1, argList{Yml, Yrl}},
537 }
538
539 var ydivl = []ytab{
540 {Zm_o, 2, argList{Yml}},
541 }
542
543 var ydivb = []ytab{
544 {Zm_o, 2, argList{Ymb}},
545 }
546
547 var yimul = []ytab{
548 {Zm_o, 2, argList{Yml}},
549 {Zib_rr, 1, argList{Yi8, Yrl}},
550 {Zil_rr, 1, argList{Yi32, Yrl}},
551 {Zm_r, 2, argList{Yml, Yrl}},
552 }
553
554 var yimul3 = []ytab{
555 {Zibm_r, 2, argList{Yi8, Yml, Yrl}},
556 {Zibm_r, 2, argList{Yi32, Yml, Yrl}},
557 }
558
559 var ybyte = []ytab{
560 {Zbyte, 1, argList{Yi64}},
561 }
562
563 var yin = []ytab{
564 {Zib_, 1, argList{Yi32}},
565 {Zlit, 1, argList{}},
566 }
567
568 var yint = []ytab{
569 {Zib_, 1, argList{Yi32}},
570 }
571
572 var ypushl = []ytab{
573 {Zrp_, 1, argList{Yrl}},
574 {Zm_o, 2, argList{Ym}},
575 {Zib_, 1, argList{Yi8}},
576 {Zil_, 1, argList{Yi32}},
577 }
578
579 var ypopl = []ytab{
580 {Z_rp, 1, argList{Yrl}},
581 {Zo_m, 2, argList{Ym}},
582 }
583
584 var ywrfsbase = []ytab{
585 {Zm_o, 2, argList{Yrl}},
586 }
587
588 var yrdrand = []ytab{
589 {Zo_m, 2, argList{Yrl}},
590 }
591
592 var yclflush = []ytab{
593 {Zo_m, 2, argList{Ym}},
594 }
595
596 var ybswap = []ytab{
597 {Z_rp, 2, argList{Yrl}},
598 }
599
600 var yscond = []ytab{
601 {Zo_m, 2, argList{Ymb}},
602 }
603
604 var yjcond = []ytab{
605 {Zbr, 0, argList{Ybr}},
606 {Zbr, 0, argList{Yi0, Ybr}},
607 {Zbr, 1, argList{Yi1, Ybr}},
608 }
609
610 var yloop = []ytab{
611 {Zloop, 1, argList{Ybr}},
612 }
613
614 var ycall = []ytab{
615 {Zcallindreg, 0, argList{Yml}},
616 {Zcallindreg, 2, argList{Yrx, Yrx}},
617 {Zcallind, 2, argList{Yindir}},
618 {Zcall, 0, argList{Ybr}},
619 {Zcallcon, 1, argList{Yi32}},
620 }
621
622 var yduff = []ytab{
623 {Zcallduff, 1, argList{Yi32}},
624 }
625
626 var yjmp = []ytab{
627 {Zo_m64, 2, argList{Yml}},
628 {Zjmp, 0, argList{Ybr}},
629 {Zjmpcon, 1, argList{Yi32}},
630 }
631
632 var yfmvd = []ytab{
633 {Zm_o, 2, argList{Ym, Yf0}},
634 {Zo_m, 2, argList{Yf0, Ym}},
635 {Zm_o, 2, argList{Yrf, Yf0}},
636 {Zo_m, 2, argList{Yf0, Yrf}},
637 }
638
639 var yfmvdp = []ytab{
640 {Zo_m, 2, argList{Yf0, Ym}},
641 {Zo_m, 2, argList{Yf0, Yrf}},
642 }
643
644 var yfmvf = []ytab{
645 {Zm_o, 2, argList{Ym, Yf0}},
646 {Zo_m, 2, argList{Yf0, Ym}},
647 }
648
649 var yfmvx = []ytab{
650 {Zm_o, 2, argList{Ym, Yf0}},
651 }
652
653 var yfmvp = []ytab{
654 {Zo_m, 2, argList{Yf0, Ym}},
655 }
656
657 var yfcmv = []ytab{
658 {Zm_o, 2, argList{Yrf, Yf0}},
659 }
660
661 var yfadd = []ytab{
662 {Zm_o, 2, argList{Ym, Yf0}},
663 {Zm_o, 2, argList{Yrf, Yf0}},
664 {Zo_m, 2, argList{Yf0, Yrf}},
665 }
666
667 var yfxch = []ytab{
668 {Zo_m, 2, argList{Yf0, Yrf}},
669 {Zm_o, 2, argList{Yrf, Yf0}},
670 }
671
672 var ycompp = []ytab{
673 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
674 }
675
676 var ystsw = []ytab{
677 {Zo_m, 2, argList{Ym}},
678 {Zlit, 1, argList{Yax}},
679 }
680
681 var ysvrs_mo = []ytab{
682 {Zm_o, 2, argList{Ym}},
683 }
684
685 // unaryDst version of "ysvrs_mo".
686 var ysvrs_om = []ytab{
687 {Zo_m, 2, argList{Ym}},
688 }
689
690 var ymm = []ytab{
691 {Zm_r_xm, 1, argList{Ymm, Ymr}},
692 {Zm_r_xm, 2, argList{Yxm, Yxr}},
693 }
694
695 var yxm = []ytab{
696 {Zm_r_xm, 1, argList{Yxm, Yxr}},
697 }
698
699 var yxm_q4 = []ytab{
700 {Zm_r, 1, argList{Yxm, Yxr}},
701 }
702
703 var yxcvm1 = []ytab{
704 {Zm_r_xm, 2, argList{Yxm, Yxr}},
705 {Zm_r_xm, 2, argList{Yxm, Ymr}},
706 }
707
708 var yxcvm2 = []ytab{
709 {Zm_r_xm, 2, argList{Yxm, Yxr}},
710 {Zm_r_xm, 2, argList{Ymm, Yxr}},
711 }
712
713 var yxr = []ytab{
714 {Zm_r_xm, 1, argList{Yxr, Yxr}},
715 }
716
717 var yxr_ml = []ytab{
718 {Zr_m_xm, 1, argList{Yxr, Yml}},
719 }
720
721 var ymr = []ytab{
722 {Zm_r, 1, argList{Ymr, Ymr}},
723 }
724
725 var ymr_ml = []ytab{
726 {Zr_m_xm, 1, argList{Ymr, Yml}},
727 }
728
729 var yxcmpi = []ytab{
730 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
731 }
732
733 var yxmov = []ytab{
734 {Zm_r_xm, 1, argList{Yxm, Yxr}},
735 {Zr_m_xm, 1, argList{Yxr, Yxm}},
736 }
737
738 var yxcvfl = []ytab{
739 {Zm_r_xm, 1, argList{Yxm, Yrl}},
740 }
741
742 var yxcvlf = []ytab{
743 {Zm_r_xm, 1, argList{Yml, Yxr}},
744 }
745
746 var yxcvfq = []ytab{
747 {Zm_r_xm, 2, argList{Yxm, Yrl}},
748 }
749
750 var yxcvqf = []ytab{
751 {Zm_r_xm, 2, argList{Yml, Yxr}},
752 }
753
754 var yps = []ytab{
755 {Zm_r_xm, 1, argList{Ymm, Ymr}},
756 {Zibo_m_xm, 2, argList{Yi8, Ymr}},
757 {Zm_r_xm, 2, argList{Yxm, Yxr}},
758 {Zibo_m_xm, 3, argList{Yi8, Yxr}},
759 }
760
761 var yxrrl = []ytab{
762 {Zm_r, 1, argList{Yxr, Yrl}},
763 }
764
765 var ymrxr = []ytab{
766 {Zm_r, 1, argList{Ymr, Yxr}},
767 {Zm_r_xm, 1, argList{Yxm, Yxr}},
768 }
769
770 var ymshuf = []ytab{
771 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
772 }
773
774 var ymshufb = []ytab{
775 {Zm2_r, 2, argList{Yxm, Yxr}},
776 }
777
778 // It should never have more than 1 entry,
779 // because some optab entries you opcode secuences that
780 // are longer than 2 bytes (zoffset=2 here),
781 // ROUNDPD and ROUNDPS and recently added BLENDPD,
782 // to name a few.
783 var yxshuf = []ytab{
784 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
785 }
786
787 var yextrw = []ytab{
788 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
789 {Zibr_m, 2, argList{Yu8, Yxr, Yml}},
790 }
791
792 var yextr = []ytab{
793 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
794 }
795
796 var yinsrw = []ytab{
797 {Zibm_r, 2, argList{Yu8, Yml, Yxr}},
798 }
799
800 var yinsr = []ytab{
801 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
802 }
803
804 var ypsdq = []ytab{
805 {Zibo_m, 2, argList{Yi8, Yxr}},
806 }
807
808 var ymskb = []ytab{
809 {Zm_r_xm, 2, argList{Yxr, Yrl}},
810 {Zm_r_xm, 1, argList{Ymr, Yrl}},
811 }
812
813 var ycrc32l = []ytab{
814 {Zlitm_r, 0, argList{Yml, Yrl}},
815 }
816
817 var ycrc32b = []ytab{
818 {Zlitm_r, 0, argList{Ymb, Yrl}},
819 }
820
821 var yprefetch = []ytab{
822 {Zm_o, 2, argList{Ym}},
823 }
824
825 var yaes = []ytab{
826 {Zlitm_r, 2, argList{Yxm, Yxr}},
827 }
828
829 var yxbegin = []ytab{
830 {Zjmp, 1, argList{Ybr}},
831 }
832
833 var yxabort = []ytab{
834 {Zib_, 1, argList{Yu8}},
835 }
836
837 var ylddqu = []ytab{
838 {Zm_r, 1, argList{Ym, Yxr}},
839 }
840
841 var ypalignr = []ytab{
842 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
843 }
844
845 var ysha256rnds2 = []ytab{
846 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
847 }
848
849 var yblendvpd = []ytab{
850 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
851 }
852
853 var ymmxmm0f38 = []ytab{
854 {Zlitm_r, 3, argList{Ymm, Ymr}},
855 {Zlitm_r, 5, argList{Yxm, Yxr}},
856 }
857
858 var yextractps = []ytab{
859 {Zibr_m, 2, argList{Yu2, Yxr, Yml}},
860 }
861
862 var ysha1rnds4 = []ytab{
863 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
864 }
865
866 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
867 // ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab
868 // to find the entry with the given p.As and then looks through the ytable for
869 // that instruction (the second field in the optab struct) for a line whose
870 // first two values match the Ytypes of the p.From and p.To operands. The
871 // function oclass computes the specific Ytype of an operand and then the set
872 // of more general Ytypes that it satisfies is implied by the ycover table, set
873 // up in instinit. For example, oclass distinguishes the constants 0 and 1
874 // from the more general 8-bit constants, but instinit says
875 //
876 // ycover[Yi0*Ymax+Ys32] = 1
877 // ycover[Yi1*Ymax+Ys32] = 1
878 // ycover[Yi8*Ymax+Ys32] = 1
879 //
880 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
881 // if that's what an instruction can handle.
882 //
883 // In parallel with the scan through the ytable for the appropriate line, there
884 // is a z pointer that starts out pointing at the strange magic byte list in
885 // the Optab struct. With each step past a non-matching ytable line, z
886 // advances by the 4th entry in the line. When a matching line is found, that
887 // z pointer has the extra data to use in laying down the instruction bytes.
888 // The actual bytes laid down are a function of the 3rd entry in the line (that
889 // is, the Ztype) and the z bytes.
890 //
891 // For example, let's look at AADDL. The optab line says:
892 // {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
893 //
894 // and yaddl says
895 // var yaddl = []ytab{
896 // {Yi8, Ynone, Yml, Zibo_m, 2},
897 // {Yi32, Ynone, Yax, Zil_, 1},
898 // {Yi32, Ynone, Yml, Zilo_m, 2},
899 // {Yrl, Ynone, Yml, Zr_m, 1},
900 // {Yml, Ynone, Yrl, Zm_r, 1},
901 // }
902 //
903 // so there are 5 possible types of ADDL instruction that can be laid down, and
904 // possible states used to lay them down (Ztype and z pointer, assuming z
905 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
906 //
907 // Yi8, Yml -> Zibo_m, z (0x83, 00)
908 // Yi32, Yax -> Zil_, z+2 (0x05)
909 // Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
910 // Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
911 // Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
912 //
913 // The Pconstant in the optab line controls the prefix bytes to emit. That's
914 // relatively straightforward as this program goes.
915 //
916 // The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for
917 // example, is an opcode byte (z[0]) then an asmando (which is some kind of
918 // encoded addressing mode for the Yml arg), and then a single immediate byte.
919 // Zilo_m is the same but a long (32-bit) immediate.
920 var optab =
921 // as, ytab, andproto, opcode
922 [...]Optab{
923 {obj.AXXX, nil, 0, opBytes{}},
924 {AAAA, ynone, P32, opBytes{0x37}},
925 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
926 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
927 {AAAS, ynone, P32, opBytes{0x3f}},
928 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
929 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
930 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
931 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
932 {AADCXL, yml_rl, Pq4, opBytes{0xf6}},
933 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
934 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
935 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
936 {AADDPD, yxm, Pq, opBytes{0x58}},
937 {AADDPS, yxm, Pm, opBytes{0x58}},
938 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
939 {AADDSD, yxm, Pf2, opBytes{0x58}},
940 {AADDSS, yxm, Pf3, opBytes{0x58}},
941 {AADDSUBPD, yxm, Pq, opBytes{0xd0}},
942 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
943 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
944 {AADOXL, yml_rl, Pq5, opBytes{0xf6}},
945 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
946 {AADJSP, nil, 0, opBytes{}},
947 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
948 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
949 {AANDNPD, yxm, Pq, opBytes{0x55}},
950 {AANDNPS, yxm, Pm, opBytes{0x55}},
951 {AANDPD, yxm, Pq, opBytes{0x54}},
952 {AANDPS, yxm, Pm, opBytes{0x54}},
953 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
954 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
955 {AARPL, yrl_ml, P32, opBytes{0x63}},
956 {ABOUNDL, yrl_m, P32, opBytes{0x62}},
957 {ABOUNDW, yrl_m, Pe, opBytes{0x62}},
958 {ABSFL, yml_rl, Pm, opBytes{0xbc}},
959 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
960 {ABSFW, yml_rl, Pq, opBytes{0xbc}},
961 {ABSRL, yml_rl, Pm, opBytes{0xbd}},
962 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
963 {ABSRW, yml_rl, Pq, opBytes{0xbd}},
964 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
965 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
966 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
967 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
968 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
969 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
970 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
971 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
972 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
973 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
974 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
975 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
976 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
977 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
978 {ABYTE, ybyte, Px, opBytes{1}},
979 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
980 {ACBW, ynone, Pe, opBytes{0x98}},
981 {ACDQ, ynone, Px, opBytes{0x99}},
982 {ACDQE, ynone, Pw, opBytes{0x98}},
983 {ACLAC, ynone, Pm, opBytes{01, 0xca}},
984 {ACLC, ynone, Px, opBytes{0xf8}},
985 {ACLD, ynone, Px, opBytes{0xfc}},
986 {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
987 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
988 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
989 {ACLI, ynone, Px, opBytes{0xfa}},
990 {ACLTS, ynone, Pm, opBytes{0x06}},
991 {ACLWB, yclflush, Pq, opBytes{0xae, 06}},
992 {ACMC, ynone, Px, opBytes{0xf5}},
993 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
994 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
995 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
996 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
997 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
998 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
999 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
1000 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
1001 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
1002 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
1003 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
1004 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
1005 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
1006 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
1007 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
1008 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
1009 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
1010 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
1011 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
1012 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
1013 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
1014 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
1015 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
1016 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
1017 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
1018 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
1019 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
1020 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
1021 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
1022 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
1023 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
1024 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
1025 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
1026 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
1027 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
1028 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
1029 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
1030 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
1031 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
1032 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
1033 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
1034 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
1035 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
1036 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
1037 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
1038 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
1039 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
1040 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
1041 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
1042 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1043 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
1044 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
1045 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1046 {ACMPSB, ynone, Pb, opBytes{0xa6}},
1047 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
1048 {ACMPSL, ynone, Px, opBytes{0xa7}},
1049 {ACMPSQ, ynone, Pw, opBytes{0xa7}},
1050 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
1051 {ACMPSW, ynone, Pe, opBytes{0xa7}},
1052 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
1053 {ACOMISD, yxm, Pe, opBytes{0x2f}},
1054 {ACOMISS, yxm, Pm, opBytes{0x2f}},
1055 {ACPUID, ynone, Pm, opBytes{0xa2}},
1056 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
1057 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
1058 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
1059 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
1060 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
1061 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
1062 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
1063 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
1064 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
1065 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
1066 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
1067 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
1068 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
1069 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
1070 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
1071 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
1072 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
1073 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
1074 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
1075 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
1076 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
1077 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
1078 {ACWD, ynone, Pe, opBytes{0x99}},
1079 {ACWDE, ynone, Px, opBytes{0x98}},
1080 {ACQO, ynone, Pw, opBytes{0x99}},
1081 {ADAA, ynone, P32, opBytes{0x27}},
1082 {ADAS, ynone, P32, opBytes{0x2f}},
1083 {ADECB, yscond, Pb, opBytes{0xfe, 01}},
1084 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
1085 {ADECQ, yincq, Pw, opBytes{0xff, 01}},
1086 {ADECW, yincq, Pe, opBytes{0xff, 01}},
1087 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
1088 {ADIVL, ydivl, Px, opBytes{0xf7, 06}},
1089 {ADIVPD, yxm, Pe, opBytes{0x5e}},
1090 {ADIVPS, yxm, Pm, opBytes{0x5e}},
1091 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
1092 {ADIVSD, yxm, Pf2, opBytes{0x5e}},
1093 {ADIVSS, yxm, Pf3, opBytes{0x5e}},
1094 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
1095 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
1096 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
1097 {AEMMS, ynone, Pm, opBytes{0x77}},
1098 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
1099 {AENTER, nil, 0, opBytes{}}, // botch
1100 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
1101 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
1102 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
1103 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
1104 {AHLT, ynone, Px, opBytes{0xf4}},
1105 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
1106 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
1107 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
1108 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
1109 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
1110 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1111 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1112 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
1113 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
1114 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
1115 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
1116 {AINB, yin, Pb, opBytes{0xe4, 0xec}},
1117 {AINW, yin, Pe, opBytes{0xe5, 0xed}},
1118 {AINL, yin, Px, opBytes{0xe5, 0xed}},
1119 {AINCB, yscond, Pb, opBytes{0xfe, 00}},
1120 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
1121 {AINCQ, yincq, Pw, opBytes{0xff, 00}},
1122 {AINCW, yincq, Pe, opBytes{0xff, 00}},
1123 {AINSB, ynone, Pb, opBytes{0x6c}},
1124 {AINSL, ynone, Px, opBytes{0x6d}},
1125 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
1126 {AINSW, ynone, Pe, opBytes{0x6d}},
1127 {AICEBP, ynone, Px, opBytes{0xf1}},
1128 {AINT, yint, Px, opBytes{0xcd}},
1129 {AINTO, ynone, P32, opBytes{0xce}},
1130 {AIRETL, ynone, Px, opBytes{0xcf}},
1131 {AIRETQ, ynone, Pw, opBytes{0xcf}},
1132 {AIRETW, ynone, Pe, opBytes{0xcf}},
1133 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
1134 {AJCS, yjcond, Px, opBytes{0x72, 0x82}},
1135 {AJCXZL, yloop, Px, opBytes{0xe3}},
1136 {AJCXZW, yloop, Px, opBytes{0xe3}},
1137 {AJCXZQ, yloop, Px, opBytes{0xe3}},
1138 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
1139 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
1140 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
1141 {AJHI, yjcond, Px, opBytes{0x77, 0x87}},
1142 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
1143 {AJLS, yjcond, Px, opBytes{0x76, 0x86}},
1144 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
1145 {AJMI, yjcond, Px, opBytes{0x78, 0x88}},
1146 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
1147 {AJNE, yjcond, Px, opBytes{0x75, 0x85}},
1148 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
1149 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
1150 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
1151 {AJPL, yjcond, Px, opBytes{0x79, 0x89}},
1152 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
1153 {AHADDPD, yxm, Pq, opBytes{0x7c}},
1154 {AHADDPS, yxm, Pf2, opBytes{0x7c}},
1155 {AHSUBPD, yxm, Pq, opBytes{0x7d}},
1156 {AHSUBPS, yxm, Pf2, opBytes{0x7d}},
1157 {ALAHF, ynone, Px, opBytes{0x9f}},
1158 {ALARL, yml_rl, Pm, opBytes{0x02}},
1159 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
1160 {ALARW, yml_rl, Pq, opBytes{0x02}},
1161 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
1162 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
1163 {ALEAL, ym_rl, Px, opBytes{0x8d}},
1164 {ALEAQ, ym_rl, Pw, opBytes{0x8d}},
1165 {ALEAVEL, ynone, P32, opBytes{0xc9}},
1166 {ALEAVEQ, ynone, Py, opBytes{0xc9}},
1167 {ALEAVEW, ynone, Pe, opBytes{0xc9}},
1168 {ALEAW, ym_rl, Pe, opBytes{0x8d}},
1169 {ALOCK, ynone, Px, opBytes{0xf0}},
1170 {ALODSB, ynone, Pb, opBytes{0xac}},
1171 {ALODSL, ynone, Px, opBytes{0xad}},
1172 {ALODSQ, ynone, Pw, opBytes{0xad}},
1173 {ALODSW, ynone, Pe, opBytes{0xad}},
1174 {ALONG, ybyte, Px, opBytes{4}},
1175 {ALOOP, yloop, Px, opBytes{0xe2}},
1176 {ALOOPEQ, yloop, Px, opBytes{0xe1}},
1177 {ALOOPNE, yloop, Px, opBytes{0xe0}},
1178 {ALTR, ydivl, Pm, opBytes{0x00, 03}},
1179 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
1180 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
1181 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
1182 {ALSLL, yml_rl, Pm, opBytes{0x03}},
1183 {ALSLW, yml_rl, Pq, opBytes{0x03}},
1184 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
1185 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
1186 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
1187 {AMAXPD, yxm, Pe, opBytes{0x5f}},
1188 {AMAXPS, yxm, Pm, opBytes{0x5f}},
1189 {AMAXSD, yxm, Pf2, opBytes{0x5f}},
1190 {AMAXSS, yxm, Pf3, opBytes{0x5f}},
1191 {AMINPD, yxm, Pe, opBytes{0x5d}},
1192 {AMINPS, yxm, Pm, opBytes{0x5d}},
1193 {AMINSD, yxm, Pf2, opBytes{0x5d}},
1194 {AMINSS, yxm, Pf3, opBytes{0x5d}},
1195 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
1196 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
1197 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
1198 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
1199 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
1200 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
1201 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
1202 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
1203 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
1204 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
1205 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
1206 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
1207 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
1208 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
1209 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
1210 {AMOVHLPS, yxr, Pm, opBytes{0x12}},
1211 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
1212 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
1213 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
1214 {AMOVLHPS, yxr, Pm, opBytes{0x16}},
1215 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
1216 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
1217 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
1218 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
1219 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
1220 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
1221 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
1222 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
1223 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
1224 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
1225 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
1226 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
1227 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
1228 {AMOVSB, ynone, Pb, opBytes{0xa4}},
1229 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
1230 {AMOVSL, ynone, Px, opBytes{0xa5}},
1231 {AMOVSQ, ynone, Pw, opBytes{0xa5}},
1232 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
1233 {AMOVSW, ynone, Pe, opBytes{0xa5}},
1234 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
1235 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
1236 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
1237 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
1238 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
1239 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
1240 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
1241 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
1242 {AMULB, ydivb, Pb, opBytes{0xf6, 04}},
1243 {AMULL, ydivl, Px, opBytes{0xf7, 04}},
1244 {AMULPD, yxm, Pe, opBytes{0x59}},
1245 {AMULPS, yxm, Ym, opBytes{0x59}},
1246 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
1247 {AMULSD, yxm, Pf2, opBytes{0x59}},
1248 {AMULSS, yxm, Pf3, opBytes{0x59}},
1249 {AMULW, ydivl, Pe, opBytes{0xf7, 04}},
1250 {ANEGB, yscond, Pb, opBytes{0xf6, 03}},
1251 {ANEGL, yscond, Px, opBytes{0xf7, 03}},
1252 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
1253 {ANEGW, yscond, Pe, opBytes{0xf7, 03}},
1254 {obj.ANOP, ynop, Px, opBytes{0, 0}},
1255 {ANOTB, yscond, Pb, opBytes{0xf6, 02}},
1256 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
1257 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
1258 {ANOTW, yscond, Pe, opBytes{0xf7, 02}},
1259 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
1260 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1261 {AORPD, yxm, Pq, opBytes{0x56}},
1262 {AORPS, yxm, Pm, opBytes{0x56}},
1263 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1264 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
1265 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
1266 {AOUTL, yin, Px, opBytes{0xe7, 0xef}},
1267 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
1268 {AOUTSB, ynone, Pb, opBytes{0x6e}},
1269 {AOUTSL, ynone, Px, opBytes{0x6f}},
1270 {AOUTSW, ynone, Pe, opBytes{0x6f}},
1271 {APABSB, yxm_q4, Pq4, opBytes{0x1c}},
1272 {APABSD, yxm_q4, Pq4, opBytes{0x1e}},
1273 {APABSW, yxm_q4, Pq4, opBytes{0x1d}},
1274 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
1275 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
1276 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
1277 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
1278 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
1279 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
1280 {APADDQ, yxm, Pe, opBytes{0xd4}},
1281 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
1282 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
1283 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
1284 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
1285 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
1286 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
1287 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
1288 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
1289 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
1290 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
1291 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
1292 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
1293 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
1294 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
1295 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
1296 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
1297 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
1298 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
1299 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
1300 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
1301 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
1302 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
1303 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
1304 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
1305 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
1306 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
1307 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
1308 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
1309 {APHADDW, yxm_q4, Pq4, opBytes{0x01}},
1310 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
1311 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
1312 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
1313 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
1314 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
1315 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
1316 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
1317 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
1318 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
1319 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
1320 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
1321 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
1322 {APMAXSW, yxm, Pe, opBytes{0xee}},
1323 {APMAXUB, yxm, Pe, opBytes{0xde}},
1324 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
1325 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
1326 {APMINSB, yxm_q4, Pq4, opBytes{0x38}},
1327 {APMINSD, yxm_q4, Pq4, opBytes{0x39}},
1328 {APMINSW, yxm, Pe, opBytes{0xea}},
1329 {APMINUB, yxm, Pe, opBytes{0xda}},
1330 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
1331 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
1332 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
1333 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
1334 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
1335 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
1336 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
1337 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
1338 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
1339 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
1340 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
1341 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
1342 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
1343 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
1344 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
1345 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
1346 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
1347 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
1348 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
1349 {APMULLD, yxm_q4, Pq4, opBytes{0x40}},
1350 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
1351 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
1352 {APOPAL, ynone, P32, opBytes{0x61}},
1353 {APOPAW, ynone, Pe, opBytes{0x61}},
1354 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
1355 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
1356 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
1357 {APOPFL, ynone, P32, opBytes{0x9d}},
1358 {APOPFQ, ynone, Py, opBytes{0x9d}},
1359 {APOPFW, ynone, Pe, opBytes{0x9d}},
1360 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
1361 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
1362 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
1363 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
1364 {APSADBW, yxm, Pq, opBytes{0xf6}},
1365 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
1366 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
1367 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
1368 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
1369 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
1370 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
1371 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
1372 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
1373 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
1374 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
1375 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
1376 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
1377 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
1378 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
1379 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
1380 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
1381 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
1382 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
1383 {APSUBB, yxm, Pe, opBytes{0xf8}},
1384 {APSUBL, yxm, Pe, opBytes{0xfa}},
1385 {APSUBQ, yxm, Pe, opBytes{0xfb}},
1386 {APSUBSB, yxm, Pe, opBytes{0xe8}},
1387 {APSUBSW, yxm, Pe, opBytes{0xe9}},
1388 {APSUBUSB, yxm, Pe, opBytes{0xd8}},
1389 {APSUBUSW, yxm, Pe, opBytes{0xd9}},
1390 {APSUBW, yxm, Pe, opBytes{0xf9}},
1391 {APTEST, yxm_q4, Pq4, opBytes{0x17}},
1392 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
1393 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
1394 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
1395 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
1396 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
1397 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
1398 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
1399 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
1400 {APUSHAL, ynone, P32, opBytes{0x60}},
1401 {APUSHAW, ynone, Pe, opBytes{0x60}},
1402 {APUSHFL, ynone, P32, opBytes{0x9c}},
1403 {APUSHFQ, ynone, Py, opBytes{0x9c}},
1404 {APUSHFW, ynone, Pe, opBytes{0x9c}},
1405 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1406 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1407 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
1408 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
1409 {AQUAD, ybyte, Px, opBytes{8}},
1410 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
1411 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1412 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1413 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
1414 {ARCPPS, yxm, Pm, opBytes{0x53}},
1415 {ARCPSS, yxm, Pf3, opBytes{0x53}},
1416 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
1417 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1418 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1419 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
1420 {AREP, ynone, Px, opBytes{0xf3}},
1421 {AREPN, ynone, Px, opBytes{0xf2}},
1422 {obj.ARET, ynone, Px, opBytes{0xc3}},
1423 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
1424 {ARETFL, yret, Px, opBytes{0xcb, 0xca}},
1425 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
1426 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
1427 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1428 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1429 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
1430 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
1431 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1432 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1433 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
1434 {ARSQRTPS, yxm, Pm, opBytes{0x52}},
1435 {ARSQRTSS, yxm, Pf3, opBytes{0x52}},
1436 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
1437 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
1438 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1439 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1440 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1441 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
1442 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1443 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1444 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
1445 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
1446 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1447 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1448 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
1449 {ASCASB, ynone, Pb, opBytes{0xae}},
1450 {ASCASL, ynone, Px, opBytes{0xaf}},
1451 {ASCASQ, ynone, Pw, opBytes{0xaf}},
1452 {ASCASW, ynone, Pe, opBytes{0xaf}},
1453 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
1454 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
1455 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
1456 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
1457 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
1458 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
1459 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
1460 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
1461 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
1462 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
1463 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
1464 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
1465 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
1466 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
1467 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
1468 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
1469 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
1470 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1471 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1472 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
1473 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
1474 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1475 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1476 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
1477 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
1478 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
1479 {ASQRTPD, yxm, Pe, opBytes{0x51}},
1480 {ASQRTPS, yxm, Pm, opBytes{0x51}},
1481 {ASQRTSD, yxm, Pf2, opBytes{0x51}},
1482 {ASQRTSS, yxm, Pf3, opBytes{0x51}},
1483 {ASTC, ynone, Px, opBytes{0xf9}},
1484 {ASTD, ynone, Px, opBytes{0xfd}},
1485 {ASTI, ynone, Px, opBytes{0xfb}},
1486 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
1487 {ASTOSB, ynone, Pb, opBytes{0xaa}},
1488 {ASTOSL, ynone, Px, opBytes{0xab}},
1489 {ASTOSQ, ynone, Pw, opBytes{0xab}},
1490 {ASTOSW, ynone, Pe, opBytes{0xab}},
1491 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
1492 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1493 {ASUBPD, yxm, Pe, opBytes{0x5c}},
1494 {ASUBPS, yxm, Pm, opBytes{0x5c}},
1495 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1496 {ASUBSD, yxm, Pf2, opBytes{0x5c}},
1497 {ASUBSS, yxm, Pf3, opBytes{0x5c}},
1498 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
1499 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
1500 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
1501 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
1502 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1503 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1504 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
1505 {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
1506 {obj.ATEXT, ytext, Px, opBytes{}},
1507 {AUCOMISD, yxm, Pe, opBytes{0x2e}},
1508 {AUCOMISS, yxm, Pm, opBytes{0x2e}},
1509 {AUNPCKHPD, yxm, Pe, opBytes{0x15}},
1510 {AUNPCKHPS, yxm, Pm, opBytes{0x15}},
1511 {AUNPCKLPD, yxm, Pe, opBytes{0x14}},
1512 {AUNPCKLPS, yxm, Pm, opBytes{0x14}},
1513 {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
1514 {AVERR, ydivl, Pm, opBytes{0x00, 04}},
1515 {AVERW, ydivl, Pm, opBytes{0x00, 05}},
1516 {AWAIT, ynone, Px, opBytes{0x9b}},
1517 {AWORD, ybyte, Px, opBytes{2}},
1518 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
1519 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
1520 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
1521 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
1522 {AXLAT, ynone, Px, opBytes{0xd7}},
1523 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
1524 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1525 {AXORPD, yxm, Pe, opBytes{0x57}},
1526 {AXORPS, yxm, Pm, opBytes{0x57}},
1527 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1528 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
1529 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
1530 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
1531 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
1532 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
1533 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
1534 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
1535 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
1536 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
1537 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
1538 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
1539 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
1540 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
1541 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
1542 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
1543 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
1544 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
1545 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
1546 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
1547 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
1548 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
1549 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
1550 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
1551 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
1552 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
1553 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
1554 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
1555 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
1556 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
1557 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch
1558 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
1559 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
1560 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
1561 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
1562 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
1563 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
1564 {AFCOML, yfmvx, Px, opBytes{0xda, 02}},
1565 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
1566 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
1567 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
1568 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
1569 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
1570 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
1571 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
1572 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
1573 {AFADDDP, ycompp, Px, opBytes{0xde, 00}},
1574 {AFADDW, yfmvx, Px, opBytes{0xde, 00}},
1575 {AFADDL, yfmvx, Px, opBytes{0xda, 00}},
1576 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
1577 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
1578 {AFMULDP, ycompp, Px, opBytes{0xde, 01}},
1579 {AFMULW, yfmvx, Px, opBytes{0xde, 01}},
1580 {AFMULL, yfmvx, Px, opBytes{0xda, 01}},
1581 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
1582 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
1583 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
1584 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
1585 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
1586 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
1587 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
1588 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
1589 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
1590 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
1591 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
1592 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
1593 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
1594 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
1595 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
1596 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
1597 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
1598 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
1599 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
1600 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
1601 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
1602 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
1603 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
1604 {AFFREE, nil, 0, opBytes{}},
1605 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
1606 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
1607 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
1608 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
1609 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
1610 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
1611 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
1612 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
1613 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
1614 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
1615 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
1616 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
1617 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
1618 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
1619 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
1620 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
1621 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
1622 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
1623 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
1624 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
1625 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
1626 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
1627 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
1628 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
1629 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
1630 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
1631 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
1632 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
1633 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
1634 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
1635 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
1636 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
1637 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
1638 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
1639 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
1640 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
1641 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
1642 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
1643 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
1644 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
1645 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
1646 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
1647 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
1648 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
1649 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
1650 {AINVD, ynone, Pm, opBytes{0x08}},
1651 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
1652 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
1653 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
1654 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
1655 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
1656 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
1657 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
1658 {ARDMSR, ynone, Pm, opBytes{0x32}},
1659 {ARDPMC, ynone, Pm, opBytes{0x33}},
1660 {ARDTSC, ynone, Pm, opBytes{0x31}},
1661 {ARSM, ynone, Pm, opBytes{0xaa}},
1662 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
1663 {ASYSRET, ynone, Pm, opBytes{0x07}},
1664 {AWBINVD, ynone, Pm, opBytes{0x09}},
1665 {AWRMSR, ynone, Pm, opBytes{0x30}},
1666 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
1667 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
1668 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
1669 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
1670 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
1671 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
1672 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1673 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1674 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
1675 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
1676 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
1677 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
1678 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
1679 {AMOVQL, yrl_ml, Px, opBytes{0x89}},
1680 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
1681 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
1682 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
1683 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
1684 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
1685 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
1686 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
1687 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
1688 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
1689 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
1690 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
1691 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
1692 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
1693 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
1694 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
1695 {AMOVDDUP, yxm, Pf2, opBytes{0x12}},
1696 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
1697 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
1698 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
1699 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
1700 {AUD1, ynone, Pm, opBytes{0xb9, 0}},
1701 {AUD2, ynone, Pm, opBytes{0x0b, 0}},
1702 {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
1703 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
1704 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
1705 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
1706 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
1707 {ALMSW, ydivl, Pm, opBytes{0x01, 06}},
1708 {ALLDT, ydivl, Pm, opBytes{0x00, 02}},
1709 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
1710 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
1711 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
1712 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
1713 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
1714 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
1715 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
1716 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
1717 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
1718 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
1719 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
1720 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
1721 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
1722 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
1723 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
1724 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
1725 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
1726 {ASGDT, yclflush, Pm, opBytes{0x01, 00}},
1727 {ASIDT, yclflush, Pm, opBytes{0x01, 01}},
1728 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
1729 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
1730 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
1731 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
1732 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
1733 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
1734 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
1735 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
1736 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
1737 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
1738 {AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
1739 {AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
1740 {AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
1741 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
1742 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
1743 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
1744 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
1745 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
1746 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
1747 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
1748 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
1749 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
1750 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
1751 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
1752 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
1753 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
1754 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
1755 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
1756 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
1757 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
1758 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
1759 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
1760 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
1761 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
1762 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
1763 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
1764 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
1765 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
1766 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
1767 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
1768 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
1769 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
1770 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
1771 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
1772 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
1773 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
1774 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
1775 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
1776
1777 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
1778 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
1779 {AXACQUIRE, ynone, Px, opBytes{0xf2}},
1780 {AXRELEASE, ynone, Px, opBytes{0xf3}},
1781 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
1782 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
1783 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
1784 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
1785 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
1786 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
1787 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
1788 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
1789 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
1790
1791 {obj.AEND, nil, 0, opBytes{}},
1792 {0, nil, 0, opBytes{}},
1793 }
1794
1795 var opindex [(ALAST + 1) & obj.AMask]*Optab
1796
1797 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
1798 // This happens on systems like Solaris that call .so functions instead of system calls.
1799 // It does not seem to be necessary for any other systems. This is probably working
1800 // around a Solaris-specific bug that should be fixed differently, but we don't know
1801 // what that bug is. And this does fix it.
1802 func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
1803 if ctxt.Headtype == objabi.Hsolaris {
1804 // All the Solaris dynamic imports from libc.so begin with "libc_".
1805 return strings.HasPrefix(s.Name, "libc_")
1806 }
1807 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
1808 }
1809
1810 // single-instruction no-ops of various lengths.
1811 // constructed by hand and disassembled with gdb to verify.
1812 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
1813 var nop = [][16]uint8{
1814 {0x90},
1815 {0x66, 0x90},
1816 {0x0F, 0x1F, 0x00},
1817 {0x0F, 0x1F, 0x40, 0x00},
1818 {0x0F, 0x1F, 0x44, 0x00, 0x00},
1819 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
1820 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
1821 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1822 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1823 }
1824
1825 // Native Client rejects the repeated 0x66 prefix.
1826 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
1827 func fillnop(p []byte, n int) {
1828 var m int
1829
1830 for n > 0 {
1831 m = n
1832 if m > len(nop) {
1833 m = len(nop)
1834 }
1835 copy(p[:m], nop[m-1][:m])
1836 p = p[m:]
1837 n -= m
1838 }
1839 }
1840
1841 func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
1842 s.Grow(int64(c) + int64(pad))
1843 fillnop(s.P[c:], int(pad))
1844 return c + pad
1845 }
1846
1847 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
1848 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
1849 return l
1850 }
1851 return q
1852 }
1853
1854 // If the environment variable GOAMD64=alignedjumps the assembler will ensure that
1855 // no standalone or macro-fused jump will straddle or end on a 32 byte boundary
1856 // by inserting NOPs before the jumps
1857 func isJump(p *obj.Prog) bool {
1858 return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
1859 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
1860 }
1861
1862 // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
1863 // jump. Otherwise, nil is returned.
1864 func lookForJCC(p *obj.Prog) *obj.Prog {
1865 // Skip any PCDATA, FUNCDATA or NOP instructions
1866 var q *obj.Prog
1867 for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
1868 }
1869
1870 if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
1871 return nil
1872 }
1873
1874 switch q.As {
1875 case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
1876 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
1877 default:
1878 return nil
1879 }
1880
1881 return q
1882 }
1883
1884 // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
1885 // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
1886 // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
1887 func fusedJump(p *obj.Prog) (bool, uint8) {
1888 var fusedSize uint8
1889
1890 // The first instruction in a macro fused pair may be preceeded by the LOCK prefix,
1891 // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
1892 // need to be careful to insert any padding before the locks rather than directly after them.
1893
1894 if p.As == AXRELEASE || p.As == AXACQUIRE {
1895 fusedSize += p.Isize
1896 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1897 }
1898 if p == nil {
1899 return false, 0
1900 }
1901 }
1902 if p.As == ALOCK {
1903 fusedSize += p.Isize
1904 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1905 }
1906 if p == nil {
1907 return false, 0
1908 }
1909 }
1910 cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
1911
1912 cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
1913 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
1914
1915 testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
1916 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
1917
1918 incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
1919 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
1920
1921 if !cmpAddSub && !testAnd && !incDec {
1922 return false, 0
1923 }
1924
1925 if !incDec {
1926 var argOne obj.AddrType
1927 var argTwo obj.AddrType
1928 if cmp {
1929 argOne = p.From.Type
1930 argTwo = p.To.Type
1931 } else {
1932 argOne = p.To.Type
1933 argTwo = p.From.Type
1934 }
1935 if argOne == obj.TYPE_REG {
1936 if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
1937 return false, 0
1938 }
1939 } else if argOne == obj.TYPE_MEM {
1940 if argTwo != obj.TYPE_REG {
1941 return false, 0
1942 }
1943 } else {
1944 return false, 0
1945 }
1946 }
1947
1948 fusedSize += p.Isize
1949 jmp := lookForJCC(p)
1950 if jmp == nil {
1951 return false, 0
1952 }
1953
1954 fusedSize += jmp.Isize
1955
1956 if testAnd {
1957 return true, fusedSize
1958 }
1959
1960 if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
1961 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
1962 return false, 0
1963 }
1964
1965 if cmpAddSub {
1966 return true, fusedSize
1967 }
1968
1969 if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
1970 return false, 0
1971 }
1972
1973 return true, fusedSize
1974 }
1975
1976 type padJumpsCtx int32
1977
1978 func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
1979 // Disable jump padding on 32 bit builds by settting
1980 // padJumps to 0.
1981 if ctxt.Arch.Family == sys.I386 {
1982 return padJumpsCtx(0)
1983 }
1984
1985 // Disable jump padding for hand written assembly code.
1986 if ctxt.IsAsm {
1987 return padJumpsCtx(0)
1988 }
1989
1990 if objabi.GOAMD64 != "alignedjumps" {
1991 return padJumpsCtx(0)
1992
1993 }
1994
1995 return padJumpsCtx(32)
1996 }
1997
1998 // padJump detects whether the instruction being assembled is a standalone or a macro-fused
1999 // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
2000 // not cross or end on a 32 byte boundary.
2001 func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
2002 if pjc == 0 {
2003 return c
2004 }
2005
2006 var toPad int32
2007 fj, fjSize := fusedJump(p)
2008 mask := int32(pjc - 1)
2009 if fj {
2010 if (c&mask)+int32(fjSize) >= int32(pjc) {
2011 toPad = int32(pjc) - (c & mask)
2012 }
2013 } else if isJump(p) {
2014 if (c&mask)+int32(p.Isize) >= int32(pjc) {
2015 toPad = int32(pjc) - (c & mask)
2016 }
2017 }
2018 if toPad <= 0 {
2019 return c
2020 }
2021
2022 return noppad(ctxt, s, c, toPad)
2023 }
2024
2025 // reAssemble is called if an instruction's size changes during assembly. If
2026 // it does and the instruction is a standalone or a macro-fused jump we need to
2027 // reassemble.
2028 func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
2029 if pjc == 0 {
2030 return false
2031 }
2032
2033 fj, _ := fusedJump(p)
2034 return fj || isJump(p)
2035 }
2036
2037 type nopPad struct {
2038 p *obj.Prog // Instruction before the pad
2039 n int32 // Size of the pad
2040 }
2041
2042 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
2043 pjc := makePjcCtx(ctxt)
2044
2045 if s.P != nil {
2046 return
2047 }
2048
2049 if ycover[0] == 0 {
2050 ctxt.Diag("x86 tables not initialized, call x86.instinit first")
2051 }
2052
2053 for p := s.Func.Text; p != nil; p = p.Link {
2054 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
2055 p.To.SetTarget(p)
2056 }
2057 if p.As == AADJSP {
2058 p.To.Type = obj.TYPE_REG
2059 p.To.Reg = REG_SP
2060 // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
2061 // One exception: It is smaller to encode $-0x80 than $0x80.
2062 // For that case, flip the sign and the op:
2063 // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
2064 switch v := p.From.Offset; {
2065 case v == 0:
2066 p.As = obj.ANOP
2067 case v == 0x80 || (v < 0 && v != -0x80):
2068 p.As = spadjop(ctxt, AADDL, AADDQ)
2069 p.From.Offset *= -1
2070 default:
2071 p.As = spadjop(ctxt, ASUBL, ASUBQ)
2072 }
2073 }
2074 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
2075 if p.To.Type != obj.TYPE_REG {
2076 ctxt.Diag("non-retpoline-compatible: %v", p)
2077 continue
2078 }
2079 p.To.Type = obj.TYPE_BRANCH
2080 p.To.Name = obj.NAME_EXTERN
2081 p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
2082 p.To.Reg = 0
2083 p.To.Offset = 0
2084 }
2085 }
2086
2087 var count int64 // rough count of number of instructions
2088 for p := s.Func.Text; p != nil; p = p.Link {
2089 count++
2090 p.Back = branchShort // use short branches first time through
2091 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
2092 p.Back |= branchBackwards
2093 q.Back |= branchLoopHead
2094 }
2095 }
2096 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
2097
2098 var ab AsmBuf
2099 var n int
2100 var c int32
2101 errors := ctxt.Errors
2102 var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
2103 for {
2104 // This loop continues while there are reasons to re-assemble
2105 // whole block, like the presence of long forward jumps.
2106 reAssemble := false
2107 for i := range s.R {
2108 s.R[i] = obj.Reloc{}
2109 }
2110 s.R = s.R[:0]
2111 s.P = s.P[:0]
2112 c = 0
2113 var pPrev *obj.Prog
2114 nops = nops[:0]
2115 for p := s.Func.Text; p != nil; p = p.Link {
2116 c0 := c
2117 c = pjc.padJump(ctxt, s, p, c)
2118
2119 if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
2120 // pad with NOPs
2121 v := -c & (loopAlign - 1)
2122
2123 if v <= maxLoopPad {
2124 s.Grow(int64(c) + int64(v))
2125 fillnop(s.P[c:], int(v))
2126 c += v
2127 }
2128 }
2129
2130 p.Pc = int64(c)
2131
2132 // process forward jumps to p
2133 for q := p.Rel; q != nil; q = q.Forwd {
2134 v := int32(p.Pc - (q.Pc + int64(q.Isize)))
2135 if q.Back&branchShort != 0 {
2136 if v > 127 {
2137 reAssemble = true
2138 q.Back ^= branchShort
2139 }
2140
2141 if q.As == AJCXZL || q.As == AXBEGIN {
2142 s.P[q.Pc+2] = byte(v)
2143 } else {
2144 s.P[q.Pc+1] = byte(v)
2145 }
2146 } else {
2147 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
2148 }
2149 }
2150
2151 p.Rel = nil
2152
2153 p.Pc = int64(c)
2154 ab.asmins(ctxt, s, p)
2155 m := ab.Len()
2156 if int(p.Isize) != m {
2157 p.Isize = uint8(m)
2158 if pjc.reAssemble(p) {
2159 // We need to re-assemble here to check for jumps and fused jumps
2160 // that span or end on 32 byte boundaries.
2161 reAssemble = true
2162 }
2163 }
2164
2165 s.Grow(p.Pc + int64(m))
2166 copy(s.P[p.Pc:], ab.Bytes())
2167 // If there was padding, remember it.
2168 if pPrev != nil && !ctxt.IsAsm && c > c0 {
2169 nops = append(nops, nopPad{p: pPrev, n: c - c0})
2170 }
2171 c += int32(m)
2172 pPrev = p
2173 }
2174
2175 n++
2176 if n > 20 {
2177 ctxt.Diag("span must be looping")
2178 log.Fatalf("loop")
2179 }
2180 if !reAssemble {
2181 break
2182 }
2183 if ctxt.Errors > errors {
2184 return
2185 }
2186 }
2187 // splice padding nops into Progs
2188 for _, n := range nops {
2189 pp := n.p
2190 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
2191 pp.Link = np
2192 }
2193
2194 s.Size = int64(c)
2195
2196 if false { /* debug['a'] > 1 */
2197 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
2198 var i int
2199 for i = 0; i < len(s.P); i++ {
2200 fmt.Printf(" %.2x", s.P[i])
2201 if i%16 == 15 {
2202 fmt.Printf("\n %.6x", uint(i+1))
2203 }
2204 }
2205
2206 if i%16 != 0 {
2207 fmt.Printf("\n")
2208 }
2209
2210 for i := 0; i < len(s.R); i++ {
2211 r := &s.R[i]
2212 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
2213 }
2214 }
2215
2216 // Mark nonpreemptible instruction sequences.
2217 // The 2-instruction TLS access sequence
2218 // MOVQ TLS, BX
2219 // MOVQ 0(BX)(TLS*1), BX
2220 // is not async preemptible, as if it is preempted and resumed on
2221 // a different thread, the TLS address may become invalid.
2222 if !CanUse1InsnTLS(ctxt) {
2223 useTLS := func(p *obj.Prog) bool {
2224 // Only need to mark the second instruction, which has
2225 // REG_TLS as Index. (It is okay to interrupt and restart
2226 // the first instruction.)
2227 return p.From.Index == REG_TLS
2228 }
2229 obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS, nil)
2230 }
2231 }
2232
2233 func instinit(ctxt *obj.Link) {
2234 if ycover[0] != 0 {
2235 // Already initialized; stop now.
2236 // This happens in the cmd/asm tests,
2237 // each of which re-initializes the arch.
2238 return
2239 }
2240
2241 switch ctxt.Headtype {
2242 case objabi.Hplan9:
2243 plan9privates = ctxt.Lookup("_privates")
2244 }
2245
2246 for i := range avxOptab {
2247 c := avxOptab[i].as
2248 if opindex[c&obj.AMask] != nil {
2249 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
2250 }
2251 opindex[c&obj.AMask] = &avxOptab[i]
2252 }
2253 for i := 1; optab[i].as != 0; i++ {
2254 c := optab[i].as
2255 if opindex[c&obj.AMask] != nil {
2256 ctxt.Diag("phase error in optab: %d (%v)", i, c)
2257 }
2258 opindex[c&obj.AMask] = &optab[i]
2259 }
2260
2261 for i := 0; i < Ymax; i++ {
2262 ycover[i*Ymax+i] = 1
2263 }
2264
2265 ycover[Yi0*Ymax+Yu2] = 1
2266 ycover[Yi1*Ymax+Yu2] = 1
2267
2268 ycover[Yi0*Ymax+Yi8] = 1
2269 ycover[Yi1*Ymax+Yi8] = 1
2270 ycover[Yu2*Ymax+Yi8] = 1
2271 ycover[Yu7*Ymax+Yi8] = 1
2272
2273 ycover[Yi0*Ymax+Yu7] = 1
2274 ycover[Yi1*Ymax+Yu7] = 1
2275 ycover[Yu2*Ymax+Yu7] = 1
2276
2277 ycover[Yi0*Ymax+Yu8] = 1
2278 ycover[Yi1*Ymax+Yu8] = 1
2279 ycover[Yu2*Ymax+Yu8] = 1
2280 ycover[Yu7*Ymax+Yu8] = 1
2281
2282 ycover[Yi0*Ymax+Ys32] = 1
2283 ycover[Yi1*Ymax+Ys32] = 1
2284 ycover[Yu2*Ymax+Ys32] = 1
2285 ycover[Yu7*Ymax+Ys32] = 1
2286 ycover[Yu8*Ymax+Ys32] = 1
2287 ycover[Yi8*Ymax+Ys32] = 1
2288
2289 ycover[Yi0*Ymax+Yi32] = 1
2290 ycover[Yi1*Ymax+Yi32] = 1
2291 ycover[Yu2*Ymax+Yi32] = 1
2292 ycover[Yu7*Ymax+Yi32] = 1
2293 ycover[Yu8*Ymax+Yi32] = 1
2294 ycover[Yi8*Ymax+Yi32] = 1
2295 ycover[Ys32*Ymax+Yi32] = 1
2296
2297 ycover[Yi0*Ymax+Yi64] = 1
2298 ycover[Yi1*Ymax+Yi64] = 1
2299 ycover[Yu7*Ymax+Yi64] = 1
2300 ycover[Yu2*Ymax+Yi64] = 1
2301 ycover[Yu8*Ymax+Yi64] = 1
2302 ycover[Yi8*Ymax+Yi64] = 1
2303 ycover[Ys32*Ymax+Yi64] = 1
2304 ycover[Yi32*Ymax+Yi64] = 1
2305
2306 ycover[Yal*Ymax+Yrb] = 1
2307 ycover[Ycl*Ymax+Yrb] = 1
2308 ycover[Yax*Ymax+Yrb] = 1
2309 ycover[Ycx*Ymax+Yrb] = 1
2310 ycover[Yrx*Ymax+Yrb] = 1
2311 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
2312
2313 ycover[Ycl*Ymax+Ycx] = 1
2314
2315 ycover[Yax*Ymax+Yrx] = 1
2316 ycover[Ycx*Ymax+Yrx] = 1
2317
2318 ycover[Yax*Ymax+Yrl] = 1
2319 ycover[Ycx*Ymax+Yrl] = 1
2320 ycover[Yrx*Ymax+Yrl] = 1
2321 ycover[Yrl32*Ymax+Yrl] = 1
2322
2323 ycover[Yf0*Ymax+Yrf] = 1
2324
2325 ycover[Yal*Ymax+Ymb] = 1
2326 ycover[Ycl*Ymax+Ymb] = 1
2327 ycover[Yax*Ymax+Ymb] = 1
2328 ycover[Ycx*Ymax+Ymb] = 1
2329 ycover[Yrx*Ymax+Ymb] = 1
2330 ycover[Yrb*Ymax+Ymb] = 1
2331 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
2332 ycover[Ym*Ymax+Ymb] = 1
2333
2334 ycover[Yax*Ymax+Yml] = 1
2335 ycover[Ycx*Ymax+Yml] = 1
2336 ycover[Yrx*Ymax+Yml] = 1
2337 ycover[Yrl*Ymax+Yml] = 1
2338 ycover[Yrl32*Ymax+Yml] = 1
2339 ycover[Ym*Ymax+Yml] = 1
2340
2341 ycover[Yax*Ymax+Ymm] = 1
2342 ycover[Ycx*Ymax+Ymm] = 1
2343 ycover[Yrx*Ymax+Ymm] = 1
2344 ycover[Yrl*Ymax+Ymm] = 1
2345 ycover[Yrl32*Ymax+Ymm] = 1
2346 ycover[Ym*Ymax+Ymm] = 1
2347 ycover[Ymr*Ymax+Ymm] = 1
2348
2349 ycover[Yxr0*Ymax+Yxr] = 1
2350
2351 ycover[Ym*Ymax+Yxm] = 1
2352 ycover[Yxr0*Ymax+Yxm] = 1
2353 ycover[Yxr*Ymax+Yxm] = 1
2354
2355 ycover[Ym*Ymax+Yym] = 1
2356 ycover[Yyr*Ymax+Yym] = 1
2357
2358 ycover[Yxr0*Ymax+YxrEvex] = 1
2359 ycover[Yxr*Ymax+YxrEvex] = 1
2360
2361 ycover[Ym*Ymax+YxmEvex] = 1
2362 ycover[Yxr0*Ymax+YxmEvex] = 1
2363 ycover[Yxr*Ymax+YxmEvex] = 1
2364 ycover[YxrEvex*Ymax+YxmEvex] = 1
2365
2366 ycover[Yyr*Ymax+YyrEvex] = 1
2367
2368 ycover[Ym*Ymax+YymEvex] = 1
2369 ycover[Yyr*Ymax+YymEvex] = 1
2370 ycover[YyrEvex*Ymax+YymEvex] = 1
2371
2372 ycover[Ym*Ymax+Yzm] = 1
2373 ycover[Yzr*Ymax+Yzm] = 1
2374
2375 ycover[Yk0*Ymax+Yk] = 1
2376 ycover[Yknot0*Ymax+Yk] = 1
2377
2378 ycover[Yk0*Ymax+Ykm] = 1
2379 ycover[Yknot0*Ymax+Ykm] = 1
2380 ycover[Yk*Ymax+Ykm] = 1
2381 ycover[Ym*Ymax+Ykm] = 1
2382
2383 ycover[Yxvm*Ymax+YxvmEvex] = 1
2384
2385 ycover[Yyvm*Ymax+YyvmEvex] = 1
2386
2387 for i := 0; i < MAXREG; i++ {
2388 reg[i] = -1
2389 if i >= REG_AL && i <= REG_R15B {
2390 reg[i] = (i - REG_AL) & 7
2391 if i >= REG_SPB && i <= REG_DIB {
2392 regrex[i] = 0x40
2393 }
2394 if i >= REG_R8B && i <= REG_R15B {
2395 regrex[i] = Rxr | Rxx | Rxb
2396 }
2397 }
2398
2399 if i >= REG_AH && i <= REG_BH {
2400 reg[i] = 4 + ((i - REG_AH) & 7)
2401 }
2402 if i >= REG_AX && i <= REG_R15 {
2403 reg[i] = (i - REG_AX) & 7
2404 if i >= REG_R8 {
2405 regrex[i] = Rxr | Rxx | Rxb
2406 }
2407 }
2408
2409 if i >= REG_F0 && i <= REG_F0+7 {
2410 reg[i] = (i - REG_F0) & 7
2411 }
2412 if i >= REG_M0 && i <= REG_M0+7 {
2413 reg[i] = (i - REG_M0) & 7
2414 }
2415 if i >= REG_K0 && i <= REG_K0+7 {
2416 reg[i] = (i - REG_K0) & 7
2417 }
2418 if i >= REG_X0 && i <= REG_X0+15 {
2419 reg[i] = (i - REG_X0) & 7
2420 if i >= REG_X0+8 {
2421 regrex[i] = Rxr | Rxx | Rxb
2422 }
2423 }
2424 if i >= REG_X16 && i <= REG_X16+15 {
2425 reg[i] = (i - REG_X16) & 7
2426 if i >= REG_X16+8 {
2427 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2428 } else {
2429 regrex[i] = RxrEvex
2430 }
2431 }
2432 if i >= REG_Y0 && i <= REG_Y0+15 {
2433 reg[i] = (i - REG_Y0) & 7
2434 if i >= REG_Y0+8 {
2435 regrex[i] = Rxr | Rxx | Rxb
2436 }
2437 }
2438 if i >= REG_Y16 && i <= REG_Y16+15 {
2439 reg[i] = (i - REG_Y16) & 7
2440 if i >= REG_Y16+8 {
2441 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2442 } else {
2443 regrex[i] = RxrEvex
2444 }
2445 }
2446 if i >= REG_Z0 && i <= REG_Z0+15 {
2447 reg[i] = (i - REG_Z0) & 7
2448 if i > REG_Z0+7 {
2449 regrex[i] = Rxr | Rxx | Rxb
2450 }
2451 }
2452 if i >= REG_Z16 && i <= REG_Z16+15 {
2453 reg[i] = (i - REG_Z16) & 7
2454 if i >= REG_Z16+8 {
2455 regrex[i] = Rxr | Rxx | Rxb | RxrEvex
2456 } else {
2457 regrex[i] = RxrEvex
2458 }
2459 }
2460
2461 if i >= REG_CR+8 && i <= REG_CR+15 {
2462 regrex[i] = Rxr
2463 }
2464 }
2465 }
2466
2467 var isAndroid = objabi.GOOS == "android"
2468
2469 func prefixof(ctxt *obj.Link, a *obj.Addr) int {
2470 if a.Reg < REG_CS && a.Index < REG_CS { // fast path
2471 return 0
2472 }
2473 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
2474 switch a.Reg {
2475 case REG_CS:
2476 return 0x2e
2477
2478 case REG_DS:
2479 return 0x3e
2480
2481 case REG_ES:
2482 return 0x26
2483
2484 case REG_FS:
2485 return 0x64
2486
2487 case REG_GS:
2488 return 0x65
2489
2490 case REG_TLS:
2491 // NOTE: Systems listed here should be only systems that
2492 // support direct TLS references like 8(TLS) implemented as
2493 // direct references from FS or GS. Systems that require
2494 // the initial-exec model, where you load the TLS base into
2495 // a register and then index from that register, do not reach
2496 // this code and should not be listed.
2497 if ctxt.Arch.Family == sys.I386 {
2498 switch ctxt.Headtype {
2499 default:
2500 if isAndroid {
2501 return 0x65 // GS
2502 }
2503 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
2504
2505 case objabi.Hdarwin,
2506 objabi.Hdragonfly,
2507 objabi.Hfreebsd,
2508 objabi.Hnetbsd,
2509 objabi.Hopenbsd:
2510 return 0x65 // GS
2511 }
2512 }
2513
2514 switch ctxt.Headtype {
2515 default:
2516 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
2517
2518 case objabi.Hlinux:
2519 if isAndroid {
2520 return 0x64 // FS
2521 }
2522
2523 if ctxt.Flag_shared {
2524 log.Fatalf("unknown TLS base register for linux with -shared")
2525 } else {
2526 return 0x64 // FS
2527 }
2528
2529 case objabi.Hdragonfly,
2530 objabi.Hfreebsd,
2531 objabi.Hnetbsd,
2532 objabi.Hopenbsd,
2533 objabi.Hsolaris:
2534 return 0x64 // FS
2535
2536 case objabi.Hdarwin:
2537 return 0x65 // GS
2538 }
2539 }
2540 }
2541
2542 if ctxt.Arch.Family == sys.I386 {
2543 if a.Index == REG_TLS && ctxt.Flag_shared {
2544 // When building for inclusion into a shared library, an instruction of the form
2545 // MOVL off(CX)(TLS*1), AX
2546 // becomes
2547 // mov %gs:off(%ecx), %eax
2548 // which assumes that the correct TLS offset has been loaded into %ecx (today
2549 // there is only one TLS variable -- g -- so this is OK). When not building for
2550 // a shared library the instruction it becomes
2551 // mov 0x0(%ecx), %eax
2552 // and a R_TLS_LE relocation, and so does not require a prefix.
2553 return 0x65 // GS
2554 }
2555 return 0
2556 }
2557
2558 switch a.Index {
2559 case REG_CS:
2560 return 0x2e
2561
2562 case REG_DS:
2563 return 0x3e
2564
2565 case REG_ES:
2566 return 0x26
2567
2568 case REG_TLS:
2569 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
2570 // When building for inclusion into a shared library, an instruction of the form
2571 // MOV off(CX)(TLS*1), AX
2572 // becomes
2573 // mov %fs:off(%rcx), %rax
2574 // which assumes that the correct TLS offset has been loaded into %rcx (today
2575 // there is only one TLS variable -- g -- so this is OK). When not building for
2576 // a shared library the instruction does not require a prefix.
2577 return 0x64
2578 }
2579
2580 case REG_FS:
2581 return 0x64
2582
2583 case REG_GS:
2584 return 0x65
2585 }
2586
2587 return 0
2588 }
2589
2590 // oclassRegList returns multisource operand class for addr.
2591 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
2592 // TODO(quasilyte): when oclass register case is refactored into
2593 // lookup table, use it here to get register kind more easily.
2594 // Helper functions like regIsXmm should go away too (they will become redundant).
2595
2596 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
2597 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
2598 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
2599
2600 reg0, reg1 := decodeRegisterRange(addr.Offset)
2601 low := regIndex(int16(reg0))
2602 high := regIndex(int16(reg1))
2603
2604 if ctxt.Arch.Family == sys.I386 {
2605 if low >= 8 || high >= 8 {
2606 return Yxxx
2607 }
2608 }
2609
2610 switch high - low {
2611 case 3:
2612 switch {
2613 case regIsXmm(reg0) && regIsXmm(reg1):
2614 return YxrEvexMulti4
2615 case regIsYmm(reg0) && regIsYmm(reg1):
2616 return YyrEvexMulti4
2617 case regIsZmm(reg0) && regIsZmm(reg1):
2618 return YzrMulti4
2619 default:
2620 return Yxxx
2621 }
2622 default:
2623 return Yxxx
2624 }
2625 }
2626
2627 // oclassVMem returns V-mem (vector memory with VSIB) operand class.
2628 // For addr that is not V-mem returns (Yxxx, false).
2629 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
2630 switch addr.Index {
2631 case REG_X0 + 0,
2632 REG_X0 + 1,
2633 REG_X0 + 2,
2634 REG_X0 + 3,
2635 REG_X0 + 4,
2636 REG_X0 + 5,
2637 REG_X0 + 6,
2638 REG_X0 + 7:
2639 return Yxvm, true
2640 case REG_X8 + 0,
2641 REG_X8 + 1,
2642 REG_X8 + 2,
2643 REG_X8 + 3,
2644 REG_X8 + 4,
2645 REG_X8 + 5,
2646 REG_X8 + 6,
2647 REG_X8 + 7:
2648 if ctxt.Arch.Family == sys.I386 {
2649 return Yxxx, true
2650 }
2651 return Yxvm, true
2652 case REG_X16 + 0,
2653 REG_X16 + 1,
2654 REG_X16 + 2,
2655 REG_X16 + 3,
2656 REG_X16 + 4,
2657 REG_X16 + 5,
2658 REG_X16 + 6,
2659 REG_X16 + 7,
2660 REG_X16 + 8,
2661 REG_X16 + 9,
2662 REG_X16 + 10,
2663 REG_X16 + 11,
2664 REG_X16 + 12,
2665 REG_X16 + 13,
2666 REG_X16 + 14,
2667 REG_X16 + 15:
2668 if ctxt.Arch.Family == sys.I386 {
2669 return Yxxx, true
2670 }
2671 return YxvmEvex, true
2672
2673 case REG_Y0 + 0,
2674 REG_Y0 + 1,
2675 REG_Y0 + 2,
2676 REG_Y0 + 3,
2677 REG_Y0 + 4,
2678 REG_Y0 + 5,
2679 REG_Y0 + 6,
2680 REG_Y0 + 7:
2681 return Yyvm, true
2682 case REG_Y8 + 0,
2683 REG_Y8 + 1,
2684 REG_Y8 + 2,
2685 REG_Y8 + 3,
2686 REG_Y8 + 4,
2687 REG_Y8 + 5,
2688 REG_Y8 + 6,
2689 REG_Y8 + 7:
2690 if ctxt.Arch.Family == sys.I386 {
2691 return Yxxx, true
2692 }
2693 return Yyvm, true
2694 case REG_Y16 + 0,
2695 REG_Y16 + 1,
2696 REG_Y16 + 2,
2697 REG_Y16 + 3,
2698 REG_Y16 + 4,
2699 REG_Y16 + 5,
2700 REG_Y16 + 6,
2701 REG_Y16 + 7,
2702 REG_Y16 + 8,
2703 REG_Y16 + 9,
2704 REG_Y16 + 10,
2705 REG_Y16 + 11,
2706 REG_Y16 + 12,
2707 REG_Y16 + 13,
2708 REG_Y16 + 14,
2709 REG_Y16 + 15:
2710 if ctxt.Arch.Family == sys.I386 {
2711 return Yxxx, true
2712 }
2713 return YyvmEvex, true
2714
2715 case REG_Z0 + 0,
2716 REG_Z0 + 1,
2717 REG_Z0 + 2,
2718 REG_Z0 + 3,
2719 REG_Z0 + 4,
2720 REG_Z0 + 5,
2721 REG_Z0 + 6,
2722 REG_Z0 + 7:
2723 return Yzvm, true
2724 case REG_Z8 + 0,
2725 REG_Z8 + 1,
2726 REG_Z8 + 2,
2727 REG_Z8 + 3,
2728 REG_Z8 + 4,
2729 REG_Z8 + 5,
2730 REG_Z8 + 6,
2731 REG_Z8 + 7,
2732 REG_Z8 + 8,
2733 REG_Z8 + 9,
2734 REG_Z8 + 10,
2735 REG_Z8 + 11,
2736 REG_Z8 + 12,
2737 REG_Z8 + 13,
2738 REG_Z8 + 14,
2739 REG_Z8 + 15,
2740 REG_Z8 + 16,
2741 REG_Z8 + 17,
2742 REG_Z8 + 18,
2743 REG_Z8 + 19,
2744 REG_Z8 + 20,
2745 REG_Z8 + 21,
2746 REG_Z8 + 22,
2747 REG_Z8 + 23:
2748 if ctxt.Arch.Family == sys.I386 {
2749 return Yxxx, true
2750 }
2751 return Yzvm, true
2752 }
2753
2754 return Yxxx, false
2755 }
2756
2757 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
2758 switch a.Type {
2759 case obj.TYPE_REGLIST:
2760 return oclassRegList(ctxt, a)
2761
2762 case obj.TYPE_NONE:
2763 return Ynone
2764
2765 case obj.TYPE_BRANCH:
2766 return Ybr
2767
2768 case obj.TYPE_INDIR:
2769 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
2770 return Yindir
2771 }
2772 return Yxxx
2773
2774 case obj.TYPE_MEM:
2775 // Pseudo registers have negative index, but SP is
2776 // not pseudo on x86, hence REG_SP check is not redundant.
2777 if a.Index == REG_SP || a.Index < 0 {
2778 // Can't use FP/SB/PC/SP as the index register.
2779 return Yxxx
2780 }
2781
2782 if vmem, ok := oclassVMem(ctxt, a); ok {
2783 return vmem
2784 }
2785
2786 if ctxt.Arch.Family == sys.AMD64 {
2787 switch a.Name {
2788 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
2789 // Global variables can't use index registers and their
2790 // base register is %rip (%rip is encoded as REG_NONE).
2791 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
2792 return Yxxx
2793 }
2794 case obj.NAME_AUTO, obj.NAME_PARAM:
2795 // These names must have a base of SP. The old compiler
2796 // uses 0 for the base register. SSA uses REG_SP.
2797 if a.Reg != REG_SP && a.Reg != 0 {
2798 return Yxxx
2799 }
2800 case obj.NAME_NONE:
2801 // everything is ok
2802 default:
2803 // unknown name
2804 return Yxxx
2805 }
2806 }
2807 return Ym
2808
2809 case obj.TYPE_ADDR:
2810 switch a.Name {
2811 case obj.NAME_GOTREF:
2812 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
2813 return Yxxx
2814
2815 case obj.NAME_EXTERN,
2816 obj.NAME_STATIC:
2817 if a.Sym != nil && useAbs(ctxt, a.Sym) {
2818 return Yi32
2819 }
2820 return Yiauto // use pc-relative addressing
2821
2822 case obj.NAME_AUTO,
2823 obj.NAME_PARAM:
2824 return Yiauto
2825 }
2826
2827 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
2828 // and got Yi32 in an earlier version of this code.
2829 // Keep doing that until we fix yduff etc.
2830 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
2831 return Yi32
2832 }
2833
2834 if a.Sym != nil || a.Name != obj.NAME_NONE {
2835 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
2836 }
2837 fallthrough
2838
2839 case obj.TYPE_CONST:
2840 if a.Sym != nil {
2841 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
2842 }
2843
2844 v := a.Offset
2845 if ctxt.Arch.Family == sys.I386 {
2846 v = int64(int32(v))
2847 }
2848 switch {
2849 case v == 0:
2850 return Yi0
2851 case v == 1:
2852 return Yi1
2853 case v >= 0 && v <= 3:
2854 return Yu2
2855 case v >= 0 && v <= 127:
2856 return Yu7
2857 case v >= 0 && v <= 255:
2858 return Yu8
2859 case v >= -128 && v <= 127:
2860 return Yi8
2861 }
2862 if ctxt.Arch.Family == sys.I386 {
2863 return Yi32
2864 }
2865 l := int32(v)
2866 if int64(l) == v {
2867 return Ys32 // can sign extend
2868 }
2869 if v>>32 == 0 {
2870 return Yi32 // unsigned
2871 }
2872 return Yi64
2873
2874 case obj.TYPE_TEXTSIZE:
2875 return Ytextsize
2876 }
2877
2878 if a.Type != obj.TYPE_REG {
2879 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
2880 return Yxxx
2881 }
2882
2883 switch a.Reg {
2884 case REG_AL:
2885 return Yal
2886
2887 case REG_AX:
2888 return Yax
2889
2890 /*
2891 case REG_SPB:
2892 */
2893 case REG_BPB,
2894 REG_SIB,
2895 REG_DIB,
2896 REG_R8B,
2897 REG_R9B,
2898 REG_R10B,
2899 REG_R11B,
2900 REG_R12B,
2901 REG_R13B,
2902 REG_R14B,
2903 REG_R15B:
2904 if ctxt.Arch.Family == sys.I386 {
2905 return Yxxx
2906 }
2907 fallthrough
2908
2909 case REG_DL,
2910 REG_BL,
2911 REG_AH,
2912 REG_CH,
2913 REG_DH,
2914 REG_BH:
2915 return Yrb
2916
2917 case REG_CL:
2918 return Ycl
2919
2920 case REG_CX:
2921 return Ycx
2922
2923 case REG_DX, REG_BX:
2924 return Yrx
2925
2926 case REG_R8, // not really Yrl
2927 REG_R9,
2928 REG_R10,
2929 REG_R11,
2930 REG_R12,
2931 REG_R13,
2932 REG_R14,
2933 REG_R15:
2934 if ctxt.Arch.Family == sys.I386 {
2935 return Yxxx
2936 }
2937 fallthrough
2938
2939 case REG_SP, REG_BP, REG_SI, REG_DI:
2940 if ctxt.Arch.Family == sys.I386 {
2941 return Yrl32
2942 }
2943 return Yrl
2944
2945 case REG_F0 + 0:
2946 return Yf0
2947
2948 case REG_F0 + 1,
2949 REG_F0 + 2,
2950 REG_F0 + 3,
2951 REG_F0 + 4,
2952 REG_F0 + 5,
2953 REG_F0 + 6,
2954 REG_F0 + 7:
2955 return Yrf
2956
2957 case REG_M0 + 0,
2958 REG_M0 + 1,
2959 REG_M0 + 2,
2960 REG_M0 + 3,
2961 REG_M0 + 4,
2962 REG_M0 + 5,
2963 REG_M0 + 6,
2964 REG_M0 + 7:
2965 return Ymr
2966
2967 case REG_X0:
2968 return Yxr0
2969
2970 case REG_X0 + 1,
2971 REG_X0 + 2,
2972 REG_X0 + 3,
2973 REG_X0 + 4,
2974 REG_X0 + 5,
2975 REG_X0 + 6,
2976 REG_X0 + 7,
2977 REG_X0 + 8,
2978 REG_X0 + 9,
2979 REG_X0 + 10,
2980 REG_X0 + 11,
2981 REG_X0 + 12,
2982 REG_X0 + 13,
2983 REG_X0 + 14,
2984 REG_X0 + 15:
2985 return Yxr
2986
2987 case REG_X0 + 16,
2988 REG_X0 + 17,
2989 REG_X0 + 18,
2990 REG_X0 + 19,
2991 REG_X0 + 20,
2992 REG_X0 + 21,
2993 REG_X0 + 22,
2994 REG_X0 + 23,
2995 REG_X0 + 24,
2996 REG_X0 + 25,
2997 REG_X0 + 26,
2998 REG_X0 + 27,
2999 REG_X0 + 28,
3000 REG_X0 + 29,
3001 REG_X0 + 30,
3002 REG_X0 + 31:
3003 return YxrEvex
3004
3005 case REG_Y0 + 0,
3006 REG_Y0 + 1,
3007 REG_Y0 + 2,
3008 REG_Y0 + 3,
3009 REG_Y0 + 4,
3010 REG_Y0 + 5,
3011 REG_Y0 + 6,
3012 REG_Y0 + 7,
3013 REG_Y0 + 8,
3014 REG_Y0 + 9,
3015 REG_Y0 + 10,
3016 REG_Y0 + 11,
3017 REG_Y0 + 12,
3018 REG_Y0 + 13,
3019 REG_Y0 + 14,
3020 REG_Y0 + 15:
3021 return Yyr
3022
3023 case REG_Y0 + 16,
3024 REG_Y0 + 17,
3025 REG_Y0 + 18,
3026 REG_Y0 + 19,
3027 REG_Y0 + 20,
3028 REG_Y0 + 21,
3029 REG_Y0 + 22,
3030 REG_Y0 + 23,
3031 REG_Y0 + 24,
3032 REG_Y0 + 25,
3033 REG_Y0 + 26,
3034 REG_Y0 + 27,
3035 REG_Y0 + 28,
3036 REG_Y0 + 29,
3037 REG_Y0 + 30,
3038 REG_Y0 + 31:
3039 return YyrEvex
3040
3041 case REG_Z0 + 0,
3042 REG_Z0 + 1,
3043 REG_Z0 + 2,
3044 REG_Z0 + 3,
3045 REG_Z0 + 4,
3046 REG_Z0 + 5,
3047 REG_Z0 + 6,
3048 REG_Z0 + 7:
3049 return Yzr
3050
3051 case REG_Z0 + 8,
3052 REG_Z0 + 9,
3053 REG_Z0 + 10,
3054 REG_Z0 + 11,
3055 REG_Z0 + 12,
3056 REG_Z0 + 13,
3057 REG_Z0 + 14,
3058 REG_Z0 + 15,
3059 REG_Z0 + 16,
3060 REG_Z0 + 17,
3061 REG_Z0 + 18,
3062 REG_Z0 + 19,
3063 REG_Z0 + 20,
3064 REG_Z0 + 21,
3065 REG_Z0 + 22,
3066 REG_Z0 + 23,
3067 REG_Z0 + 24,
3068 REG_Z0 + 25,
3069 REG_Z0 + 26,
3070 REG_Z0 + 27,
3071 REG_Z0 + 28,
3072 REG_Z0 + 29,
3073 REG_Z0 + 30,
3074 REG_Z0 + 31:
3075 if ctxt.Arch.Family == sys.I386 {
3076 return Yxxx
3077 }
3078 return Yzr
3079
3080 case REG_K0:
3081 return Yk0
3082
3083 case REG_K0 + 1,
3084 REG_K0 + 2,
3085 REG_K0 + 3,
3086 REG_K0 + 4,
3087 REG_K0 + 5,
3088 REG_K0 + 6,
3089 REG_K0 + 7:
3090 return Yknot0
3091
3092 case REG_CS:
3093 return Ycs
3094 case REG_SS:
3095 return Yss
3096 case REG_DS:
3097 return Yds
3098 case REG_ES:
3099 return Yes
3100 case REG_FS:
3101 return Yfs
3102 case REG_GS:
3103 return Ygs
3104 case REG_TLS:
3105 return Ytls
3106
3107 case REG_GDTR:
3108 return Ygdtr
3109 case REG_IDTR:
3110 return Yidtr
3111 case REG_LDTR:
3112 return Yldtr
3113 case REG_MSW:
3114 return Ymsw
3115 case REG_TASK:
3116 return Ytask
3117
3118 case REG_CR + 0:
3119 return Ycr0
3120 case REG_CR + 1:
3121 return Ycr1
3122 case REG_CR + 2:
3123 return Ycr2
3124 case REG_CR + 3:
3125 return Ycr3
3126 case REG_CR + 4:
3127 return Ycr4
3128 case REG_CR + 5:
3129 return Ycr5
3130 case REG_CR + 6:
3131 return Ycr6
3132 case REG_CR + 7:
3133 return Ycr7
3134 case REG_CR + 8:
3135 return Ycr8
3136
3137 case REG_DR + 0:
3138 return Ydr0
3139 case REG_DR + 1:
3140 return Ydr1
3141 case REG_DR + 2:
3142 return Ydr2
3143 case REG_DR + 3:
3144 return Ydr3
3145 case REG_DR + 4:
3146 return Ydr4
3147 case REG_DR + 5:
3148 return Ydr5
3149 case REG_DR + 6:
3150 return Ydr6
3151 case REG_DR + 7:
3152 return Ydr7
3153
3154 case REG_TR + 0:
3155 return Ytr0
3156 case REG_TR + 1:
3157 return Ytr1
3158 case REG_TR + 2:
3159 return Ytr2
3160 case REG_TR + 3:
3161 return Ytr3
3162 case REG_TR + 4:
3163 return Ytr4
3164 case REG_TR + 5:
3165 return Ytr5
3166 case REG_TR + 6:
3167 return Ytr6
3168 case REG_TR + 7:
3169 return Ytr7
3170 }
3171
3172 return Yxxx
3173 }
3174
3175 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
3176 // and hold assembly state.
3177 type AsmBuf struct {
3178 buf [100]byte
3179 off int
3180 rexflag int
3181 vexflag bool // Per inst: true for VEX-encoded
3182 evexflag bool // Per inst: true for EVEX-encoded
3183 rep bool
3184 repn bool
3185 lock bool
3186
3187 evex evexBits // Initialized when evexflag is true
3188 }
3189
3190 // Put1 appends one byte to the end of the buffer.
3191 func (ab *AsmBuf) Put1(x byte) {
3192 ab.buf[ab.off] = x
3193 ab.off++
3194 }
3195
3196 // Put2 appends two bytes to the end of the buffer.
3197 func (ab *AsmBuf) Put2(x, y byte) {
3198 ab.buf[ab.off+0] = x
3199 ab.buf[ab.off+1] = y
3200 ab.off += 2
3201 }
3202
3203 // Put3 appends three bytes to the end of the buffer.
3204 func (ab *AsmBuf) Put3(x, y, z byte) {
3205 ab.buf[ab.off+0] = x
3206 ab.buf[ab.off+1] = y
3207 ab.buf[ab.off+2] = z
3208 ab.off += 3
3209 }
3210
3211 // Put4 appends four bytes to the end of the buffer.
3212 func (ab *AsmBuf) Put4(x, y, z, w byte) {
3213 ab.buf[ab.off+0] = x
3214 ab.buf[ab.off+1] = y
3215 ab.buf[ab.off+2] = z
3216 ab.buf[ab.off+3] = w
3217 ab.off += 4
3218 }
3219
3220 // PutInt16 writes v into the buffer using little-endian encoding.
3221 func (ab *AsmBuf) PutInt16(v int16) {
3222 ab.buf[ab.off+0] = byte(v)
3223 ab.buf[ab.off+1] = byte(v >> 8)
3224 ab.off += 2
3225 }
3226
3227 // PutInt32 writes v into the buffer using little-endian encoding.
3228 func (ab *AsmBuf) PutInt32(v int32) {
3229 ab.buf[ab.off+0] = byte(v)
3230 ab.buf[ab.off+1] = byte(v >> 8)
3231 ab.buf[ab.off+2] = byte(v >> 16)
3232 ab.buf[ab.off+3] = byte(v >> 24)
3233 ab.off += 4
3234 }
3235
3236 // PutInt64 writes v into the buffer using little-endian encoding.
3237 func (ab *AsmBuf) PutInt64(v int64) {
3238 ab.buf[ab.off+0] = byte(v)
3239 ab.buf[ab.off+1] = byte(v >> 8)
3240 ab.buf[ab.off+2] = byte(v >> 16)
3241 ab.buf[ab.off+3] = byte(v >> 24)
3242 ab.buf[ab.off+4] = byte(v >> 32)
3243 ab.buf[ab.off+5] = byte(v >> 40)
3244 ab.buf[ab.off+6] = byte(v >> 48)
3245 ab.buf[ab.off+7] = byte(v >> 56)
3246 ab.off += 8
3247 }
3248
3249 // Put copies b into the buffer.
3250 func (ab *AsmBuf) Put(b []byte) {
3251 copy(ab.buf[ab.off:], b)
3252 ab.off += len(b)
3253 }
3254
3255 // PutOpBytesLit writes zero terminated sequence of bytes from op,
3256 // starting at specified offset (e.g. z counter value).
3257 // Trailing 0 is not written.
3258 //
3259 // Intended to be used for literal Z cases.
3260 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
3261 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
3262 for int(op[offset]) != 0 {
3263 ab.Put1(byte(op[offset]))
3264 offset++
3265 }
3266 }
3267
3268 // Insert inserts b at offset i.
3269 func (ab *AsmBuf) Insert(i int, b byte) {
3270 ab.off++
3271 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
3272 ab.buf[i] = b
3273 }
3274
3275 // Last returns the byte at the end of the buffer.
3276 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
3277
3278 // Len returns the length of the buffer.
3279 func (ab *AsmBuf) Len() int { return ab.off }
3280
3281 // Bytes returns the contents of the buffer.
3282 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
3283
3284 // Reset empties the buffer.
3285 func (ab *AsmBuf) Reset() { ab.off = 0 }
3286
3287 // At returns the byte at offset i.
3288 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
3289
3290 // asmidx emits SIB byte.
3291 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
3292 var i int
3293
3294 // X/Y index register is used in VSIB.
3295 switch index {
3296 default:
3297 goto bad
3298
3299 case REG_NONE:
3300 i = 4 << 3
3301 goto bas
3302
3303 case REG_R8,
3304 REG_R9,
3305 REG_R10,
3306 REG_R11,
3307 REG_R12,
3308 REG_R13,
3309 REG_R14,
3310 REG_R15,
3311 REG_X8,
3312 REG_X9,
3313 REG_X10,
3314 REG_X11,
3315 REG_X12,
3316 REG_X13,
3317 REG_X14,
3318 REG_X15,
3319 REG_X16,
3320 REG_X17,
3321 REG_X18,
3322 REG_X19,
3323 REG_X20,
3324 REG_X21,
3325 REG_X22,
3326 REG_X23,
3327 REG_X24,
3328 REG_X25,
3329 REG_X26,
3330 REG_X27,
3331 REG_X28,
3332 REG_X29,
3333 REG_X30,
3334 REG_X31,
3335 REG_Y8,
3336 REG_Y9,
3337 REG_Y10,
3338 REG_Y11,
3339 REG_Y12,
3340 REG_Y13,
3341 REG_Y14,
3342 REG_Y15,
3343 REG_Y16,
3344 REG_Y17,
3345 REG_Y18,
3346 REG_Y19,
3347 REG_Y20,
3348 REG_Y21,
3349 REG_Y22,
3350 REG_Y23,
3351 REG_Y24,
3352 REG_Y25,
3353 REG_Y26,
3354 REG_Y27,
3355 REG_Y28,
3356 REG_Y29,
3357 REG_Y30,
3358 REG_Y31,
3359 REG_Z8,
3360 REG_Z9,
3361 REG_Z10,
3362 REG_Z11,
3363 REG_Z12,
3364 REG_Z13,
3365 REG_Z14,
3366 REG_Z15,
3367 REG_Z16,
3368 REG_Z17,
3369 REG_Z18,
3370 REG_Z19,
3371 REG_Z20,
3372 REG_Z21,
3373 REG_Z22,
3374 REG_Z23,
3375 REG_Z24,
3376 REG_Z25,
3377 REG_Z26,
3378 REG_Z27,
3379 REG_Z28,
3380 REG_Z29,
3381 REG_Z30,
3382 REG_Z31:
3383 if ctxt.Arch.Family == sys.I386 {
3384 goto bad
3385 }
3386 fallthrough
3387
3388 case REG_AX,
3389 REG_CX,
3390 REG_DX,
3391 REG_BX,
3392 REG_BP,
3393 REG_SI,
3394 REG_DI,
3395 REG_X0,
3396 REG_X1,
3397 REG_X2,
3398 REG_X3,
3399 REG_X4,
3400 REG_X5,
3401 REG_X6,
3402 REG_X7,
3403 REG_Y0,
3404 REG_Y1,
3405 REG_Y2,
3406 REG_Y3,
3407 REG_Y4,
3408 REG_Y5,
3409 REG_Y6,
3410 REG_Y7,
3411 REG_Z0,
3412 REG_Z1,
3413 REG_Z2,
3414 REG_Z3,
3415 REG_Z4,
3416 REG_Z5,
3417 REG_Z6,
3418 REG_Z7:
3419 i = reg[index] << 3
3420 }
3421
3422 switch scale {
3423 default:
3424 goto bad
3425
3426 case 1:
3427 break
3428
3429 case 2:
3430 i |= 1 << 6
3431
3432 case 4:
3433 i |= 2 << 6
3434
3435 case 8:
3436 i |= 3 << 6
3437 }
3438
3439 bas:
3440 switch base {
3441 default:
3442 goto bad
3443
3444 case REG_NONE: // must be mod=00
3445 i |= 5
3446
3447 case REG_R8,
3448 REG_R9,
3449 REG_R10,
3450 REG_R11,
3451 REG_R12,
3452 REG_R13,
3453 REG_R14,
3454 REG_R15:
3455 if ctxt.Arch.Family == sys.I386 {
3456 goto bad
3457 }
3458 fallthrough
3459
3460 case REG_AX,
3461 REG_CX,
3462 REG_DX,
3463 REG_BX,
3464 REG_SP,
3465 REG_BP,
3466 REG_SI,
3467 REG_DI:
3468 i |= reg[base]
3469 }
3470
3471 ab.Put1(byte(i))
3472 return
3473
3474 bad:
3475 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
3476 ab.Put1(0)
3477 }
3478
3479 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
3480 var rel obj.Reloc
3481
3482 v := vaddr(ctxt, p, a, &rel)
3483 if rel.Siz != 0 {
3484 if rel.Siz != 4 {
3485 ctxt.Diag("bad reloc")
3486 }
3487 r := obj.Addrel(cursym)
3488 *r = rel
3489 r.Off = int32(p.Pc + int64(ab.Len()))
3490 }
3491
3492 ab.PutInt32(int32(v))
3493 }
3494
3495 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
3496 if r != nil {
3497 *r = obj.Reloc{}
3498 }
3499
3500 switch a.Name {
3501 case obj.NAME_STATIC,
3502 obj.NAME_GOTREF,
3503 obj.NAME_EXTERN:
3504 s := a.Sym
3505 if r == nil {
3506 ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
3507 log.Fatalf("reloc")
3508 }
3509
3510 if a.Name == obj.NAME_GOTREF {
3511 r.Siz = 4
3512 r.Type = objabi.R_GOTPCREL
3513 } else if useAbs(ctxt, s) {
3514 r.Siz = 4
3515 r.Type = objabi.R_ADDR
3516 } else {
3517 r.Siz = 4
3518 r.Type = objabi.R_PCREL
3519 }
3520
3521 r.Off = -1 // caller must fill in
3522 r.Sym = s
3523 r.Add = a.Offset
3524
3525 return 0
3526 }
3527
3528 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
3529 if r == nil {
3530 ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
3531 log.Fatalf("reloc")
3532 }
3533
3534 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
3535 r.Type = objabi.R_TLS_LE
3536 r.Siz = 4
3537 r.Off = -1 // caller must fill in
3538 r.Add = a.Offset
3539 }
3540 return 0
3541 }
3542
3543 return a.Offset
3544 }
3545
3546 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
3547 var base int
3548 var rel obj.Reloc
3549
3550 rex &= 0x40 | Rxr
3551 if a.Offset != int64(int32(a.Offset)) {
3552 // The rules are slightly different for 386 and AMD64,
3553 // mostly for historical reasons. We may unify them later,
3554 // but it must be discussed beforehand.
3555 //
3556 // For 64bit mode only LEAL is allowed to overflow.
3557 // It's how https://golang.org/cl/59630 made it.
3558 // crypto/sha1/sha1block_amd64.s depends on this feature.
3559 //
3560 // For 32bit mode rules are more permissive.
3561 // If offset fits uint32, it's permitted.
3562 // This is allowed for assembly that wants to use 32-bit hex
3563 // constants, e.g. LEAL 0x99999999(AX), AX.
3564 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
3565 (ctxt.Arch.Family != sys.AMD64 &&
3566 int64(uint32(a.Offset)) == a.Offset &&
3567 ab.rexflag&Rxw == 0)
3568 if !overflowOK {
3569 ctxt.Diag("offset too large in %s", p)
3570 }
3571 }
3572 v := int32(a.Offset)
3573 rel.Siz = 0
3574
3575 switch a.Type {
3576 case obj.TYPE_ADDR:
3577 if a.Name == obj.NAME_NONE {
3578 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
3579 }
3580 if a.Index == REG_TLS {
3581 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
3582 }
3583 goto bad
3584
3585 case obj.TYPE_REG:
3586 const regFirst = REG_AL
3587 const regLast = REG_Z31
3588 if a.Reg < regFirst || regLast < a.Reg {
3589 goto bad
3590 }
3591 if v != 0 {
3592 goto bad
3593 }
3594 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
3595 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
3596 return
3597 }
3598
3599 if a.Type != obj.TYPE_MEM {
3600 goto bad
3601 }
3602
3603 if a.Index != REG_NONE && a.Index != REG_TLS {
3604 base := int(a.Reg)
3605 switch a.Name {
3606 case obj.NAME_EXTERN,
3607 obj.NAME_GOTREF,
3608 obj.NAME_STATIC:
3609 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
3610 goto bad
3611 }
3612 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
3613 // The base register has already been set. It holds the PC
3614 // of this instruction returned by a PC-reading thunk.
3615 // See obj6.go:rewriteToPcrel.
3616 } else {
3617 base = REG_NONE
3618 }
3619 v = int32(vaddr(ctxt, p, a, &rel))
3620
3621 case obj.NAME_AUTO,
3622 obj.NAME_PARAM:
3623 base = REG_SP
3624 }
3625
3626 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
3627 if base == REG_NONE {
3628 ab.Put1(byte(0<<6 | 4<<0 | r<<3))
3629 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3630 goto putrelv
3631 }
3632
3633 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
3634 ab.Put1(byte(0<<6 | 4<<0 | r<<3))
3635 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3636 return
3637 }
3638
3639 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
3640 ab.Put1(byte(1<<6 | 4<<0 | r<<3))
3641 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3642 ab.Put1(disp8)
3643 return
3644 }
3645
3646 ab.Put1(byte(2<<6 | 4<<0 | r<<3))
3647 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
3648 goto putrelv
3649 }
3650
3651 base = int(a.Reg)
3652 switch a.Name {
3653 case obj.NAME_STATIC,
3654 obj.NAME_GOTREF,
3655 obj.NAME_EXTERN:
3656 if a.Sym == nil {
3657 ctxt.Diag("bad addr: %v", p)
3658 }
3659 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
3660 // The base register has already been set. It holds the PC
3661 // of this instruction returned by a PC-reading thunk.
3662 // See obj6.go:rewriteToPcrel.
3663 } else {
3664 base = REG_NONE
3665 }
3666 v = int32(vaddr(ctxt, p, a, &rel))
3667
3668 case obj.NAME_AUTO,
3669 obj.NAME_PARAM:
3670 base = REG_SP
3671 }
3672
3673 if base == REG_TLS {
3674 v = int32(vaddr(ctxt, p, a, &rel))
3675 }
3676
3677 ab.rexflag |= regrex[base]&Rxb | rex
3678 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
3679 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
3680 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
3681 ctxt.Diag("%v has offset against gotref", p)
3682 }
3683 ab.Put1(byte(0<<6 | 5<<0 | r<<3))
3684 goto putrelv
3685 }
3686
3687 // temporary
3688 ab.Put2(
3689 byte(0<<6|4<<0|r<<3), // sib present
3690 0<<6|4<<3|5<<0, // DS:d32
3691 )
3692 goto putrelv
3693 }
3694
3695 if base == REG_SP || base == REG_R12 {
3696 if v == 0 {
3697 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
3698 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3699 return
3700 }
3701
3702 if disp8, ok := toDisp8(v, p, ab); ok {
3703 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
3704 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3705 ab.Put1(disp8)
3706 return
3707 }
3708
3709 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
3710 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
3711 goto putrelv
3712 }
3713
3714 if REG_AX <= base && base <= REG_R15 {
3715 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
3716 rel = obj.Reloc{}
3717 rel.Type = objabi.R_TLS_LE
3718 rel.Siz = 4
3719 rel.Sym = nil
3720 rel.Add = int64(v)
3721 v = 0
3722 }
3723
3724 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
3725 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
3726 return
3727 }
3728
3729 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
3730 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
3731 return
3732 }
3733
3734 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
3735 goto putrelv
3736 }
3737
3738 goto bad
3739
3740 putrelv:
3741 if rel.Siz != 0 {
3742 if rel.Siz != 4 {
3743 ctxt.Diag("bad rel")
3744 goto bad
3745 }
3746
3747 r := obj.Addrel(cursym)
3748 *r = rel
3749 r.Off = int32(p.Pc + int64(ab.Len()))
3750 }
3751
3752 ab.PutInt32(v)
3753 return
3754
3755 bad:
3756 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
3757 }
3758
3759 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
3760 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
3761 }
3762
3763 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
3764 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
3765 }
3766
3767 func bytereg(a *obj.Addr, t *uint8) {
3768 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
3769 a.Reg += REG_AL - REG_AX
3770 *t = 0
3771 }
3772 }
3773
3774 func unbytereg(a *obj.Addr, t *uint8) {
3775 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
3776 a.Reg += REG_AX - REG_AL
3777 *t = 0
3778 }
3779 }
3780
3781 const (
3782 movLit uint8 = iota // Like Zlit
3783 movRegMem
3784 movMemReg
3785 movRegMem2op
3786 movMemReg2op
3787 movFullPtr // Load full pointer, trash heap (unsupported)
3788 movDoubleShift
3789 movTLSReg
3790 )
3791
3792 var ymovtab = []movtab{
3793 // push
3794 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
3795 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
3796 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
3797 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
3798 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
3799 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
3800 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
3801 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
3802 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
3803 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
3804 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
3805 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
3806 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
3807 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
3808
3809 // pop
3810 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
3811 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
3812 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
3813 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
3814 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
3815 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
3816 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
3817 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
3818 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
3819 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
3820 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
3821 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
3822
3823 // mov seg
3824 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
3825 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
3826 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
3827 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
3828 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
3829 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
3830 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
3831 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
3832 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
3833 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
3834 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
3835 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
3836
3837 // mov cr
3838 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
3839 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
3840 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
3841 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
3842 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
3843 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
3844 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
3845 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
3846 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
3847 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
3848 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
3849 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
3850 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
3851 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
3852 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
3853 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
3854 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
3855 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
3856 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
3857 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
3858
3859 // mov dr
3860 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
3861 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
3862 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
3863 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
3864 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
3865 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
3866 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
3867 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
3868 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
3869 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
3870 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
3871 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
3872 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
3873 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
3874 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
3875 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
3876
3877 // mov tr
3878 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
3879 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
3880 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
3881 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
3882
3883 // lgdt, sgdt, lidt, sidt
3884 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
3885 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
3886 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
3887 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
3888 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
3889 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
3890 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
3891 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
3892
3893 // lldt, sldt
3894 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
3895 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
3896
3897 // lmsw, smsw
3898 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
3899 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
3900
3901 // ltr, str
3902 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
3903 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
3904
3905 /* load full pointer - unsupported
3906 {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
3907 {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
3908 */
3909
3910 // double shift
3911 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3912 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3913 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
3914 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3915 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3916 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
3917 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3918 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3919 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
3920 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3921 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3922 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
3923 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3924 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3925 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
3926 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3927 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3928 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
3929
3930 // load TLS base
3931 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
3932 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
3933 {0, 0, 0, 0, 0, [4]uint8{}},
3934 }
3935
3936 func isax(a *obj.Addr) bool {
3937 switch a.Reg {
3938 case REG_AX, REG_AL, REG_AH:
3939 return true
3940 }
3941
3942 if a.Index == REG_AX {
3943 return true
3944 }
3945 return false
3946 }
3947
3948 func subreg(p *obj.Prog, from int, to int) {
3949 if false { /* debug['Q'] */
3950 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
3951 }
3952
3953 if int(p.From.Reg) == from {
3954 p.From.Reg = int16(to)
3955 p.Ft = 0
3956 }
3957
3958 if int(p.To.Reg) == from {
3959 p.To.Reg = int16(to)
3960 p.Tt = 0
3961 }
3962
3963 if int(p.From.Index) == from {
3964 p.From.Index = int16(to)
3965 p.Ft = 0
3966 }
3967
3968 if int(p.To.Index) == from {
3969 p.To.Index = int16(to)
3970 p.Tt = 0
3971 }
3972
3973 if false { /* debug['Q'] */
3974 fmt.Printf("%v\n", p)
3975 }
3976 }
3977
3978 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
3979 switch op {
3980 case Pm, Pe, Pf2, Pf3:
3981 if osize != 1 {
3982 if op != Pm {
3983 ab.Put1(byte(op))
3984 }
3985 ab.Put1(Pm)
3986 z++
3987 op = int(o.op[z])
3988 break
3989 }
3990 fallthrough
3991
3992 default:
3993 if ab.Len() == 0 || ab.Last() != Pm {
3994 ab.Put1(Pm)
3995 }
3996 }
3997
3998 ab.Put1(byte(op))
3999 return z
4000 }
4001
4002 var bpduff1 = []byte{
4003 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
4004 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
4005 }
4006
4007 var bpduff2 = []byte{
4008 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
4009 }
4010
4011 // asmevex emits EVEX pregis and opcode byte.
4012 // In addition to asmvex r/m, vvvv and reg fields also requires optional
4013 // K-masking register.
4014 //
4015 // Expects asmbuf.evex to be properly initialized.
4016 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
4017 ab.evexflag = true
4018 evex := ab.evex
4019
4020 rexR := byte(1)
4021 evexR := byte(1)
4022 rexX := byte(1)
4023 rexB := byte(1)
4024 if r != nil {
4025 if regrex[r.Reg]&Rxr != 0 {
4026 rexR = 0 // "ModR/M.reg" selector 4th bit.
4027 }
4028 if regrex[r.Reg]&RxrEvex != 0 {
4029 evexR = 0 // "ModR/M.reg" selector 5th bit.
4030 }
4031 }
4032 if rm != nil {
4033 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
4034 rexX = 0
4035 } else if regrex[rm.Index]&Rxx != 0 {
4036 rexX = 0
4037 }
4038 if regrex[rm.Reg]&Rxb != 0 {
4039 rexB = 0
4040 }
4041 }
4042 // P0 = [R][X][B][R'][00][mm]
4043 p0 := (rexR << 7) |
4044 (rexX << 6) |
4045 (rexB << 5) |
4046 (evexR << 4) |
4047 (0 << 2) |
4048 (evex.M() << 0)
4049
4050 vexV := byte(0)
4051 if v != nil {
4052 // 4bit-wide reg index.
4053 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
4054 }
4055 vexV ^= 0x0F
4056 // P1 = [W][vvvv][1][pp]
4057 p1 := (evex.W() << 7) |
4058 (vexV << 3) |
4059 (1 << 2) |
4060 (evex.P() << 0)
4061
4062 suffix := evexSuffixMap[p.Scond]
4063 evexZ := byte(0)
4064 evexLL := evex.L()
4065 evexB := byte(0)
4066 evexV := byte(1)
4067 evexA := byte(0)
4068 if suffix.zeroing {
4069 if !evex.ZeroingEnabled() {
4070 ctxt.Diag("unsupported zeroing: %v", p)
4071 }
4072 evexZ = 1
4073 }
4074 switch {
4075 case suffix.rounding != rcUnset:
4076 if rm != nil && rm.Type == obj.TYPE_MEM {
4077 ctxt.Diag("illegal rounding with memory argument: %v", p)
4078 } else if !evex.RoundingEnabled() {
4079 ctxt.Diag("unsupported rounding: %v", p)
4080 }
4081 evexB = 1
4082 evexLL = suffix.rounding
4083 case suffix.broadcast:
4084 if rm == nil || rm.Type != obj.TYPE_MEM {
4085 ctxt.Diag("illegal broadcast without memory argument: %v", p)
4086 } else if !evex.BroadcastEnabled() {
4087 ctxt.Diag("unsupported broadcast: %v", p)
4088 }
4089 evexB = 1
4090 case suffix.sae:
4091 if rm != nil && rm.Type == obj.TYPE_MEM {
4092 ctxt.Diag("illegal SAE with memory argument: %v", p)
4093 } else if !evex.SaeEnabled() {
4094 ctxt.Diag("unsupported SAE: %v", p)
4095 }
4096 evexB = 1
4097 }
4098 if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
4099 evexV = 0
4100 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
4101 evexV = 0 // VSR selector 5th bit.
4102 }
4103 if k != nil {
4104 evexA = byte(reg[k.Reg])
4105 }
4106 // P2 = [z][L'L][b][V'][aaa]
4107 p2 := (evexZ << 7) |
4108 (evexLL << 5) |
4109 (evexB << 4) |
4110 (evexV << 3) |
4111 (evexA << 0)
4112
4113 const evexEscapeByte = 0x62
4114 ab.Put4(evexEscapeByte, p0, p1, p2)
4115 ab.Put1(evex.opcode)
4116 }
4117
4118 // Emit VEX prefix and opcode byte.
4119 // The three addresses are the r/m, vvvv, and reg fields.
4120 // The reg and rm arguments appear in the same order as the
4121 // arguments to asmand, which typically follows the call to asmvex.
4122 // The final two arguments are the VEX prefix (see encoding above)
4123 // and the opcode byte.
4124 // For details about vex prefix see:
4125 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
4126 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
4127 ab.vexflag = true
4128 rexR := 0
4129 if r != nil {
4130 rexR = regrex[r.Reg] & Rxr
4131 }
4132 rexB := 0
4133 rexX := 0
4134 if rm != nil {
4135 rexB = regrex[rm.Reg] & Rxb
4136 rexX = regrex[rm.Index] & Rxx
4137 }
4138 vexM := (vex >> 3) & 0x7
4139 vexWLP := vex & 0x87
4140 vexV := byte(0)
4141 if v != nil {
4142 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
4143 }
4144 vexV ^= 0xF
4145 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
4146 // Can use 2-byte encoding.
4147 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
4148 } else {
4149 // Must use 3-byte encoding.
4150 ab.Put3(0xc4,
4151 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
4152 vexV<<3|vexWLP,
4153 )
4154 }
4155 ab.Put1(opcode)
4156 }
4157
4158 // regIndex returns register index that fits in 5 bits.
4159 //
4160 // R : 3 bit | legacy instructions | N/A
4161 // [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
4162 // EVEX.R : 1 bit | EVEX extension bit | RxrEvex
4163 //
4164 // Examples:
4165 // REG_Z30 => 30
4166 // REG_X15 => 15
4167 // REG_R9 => 9
4168 // REG_AX => 0
4169 //
4170 func regIndex(r int16) int {
4171 lower3bits := reg[r]
4172 high4bit := regrex[r] & Rxr << 1
4173 high5bit := regrex[r] & RxrEvex << 0
4174 return lower3bits | high4bit | high5bit
4175 }
4176
4177 // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
4178 // Reports errors via ctxt.
4179 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
4180 // If any pair of the index, mask, or destination registers
4181 // are the same, illegal instruction trap (#UD) is triggered.
4182 index := regIndex(p.GetFrom3().Index)
4183 mask := regIndex(p.From.Reg)
4184 dest := regIndex(p.To.Reg)
4185 if dest == mask || dest == index || mask == index {
4186 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
4187 return false
4188 }
4189
4190 return true
4191 }
4192
4193 // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
4194 // Reports errors via ctxt.
4195 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
4196 // Illegal instruction trap (#UD) is triggered if the destination vector
4197 // register is the same as index vector in VSIB.
4198 index := regIndex(p.From.Index)
4199 dest := regIndex(p.To.Reg)
4200 if dest == index {
4201 ctxt.Diag("index and destination registers should be distinct: %v", p)
4202 return false
4203 }
4204
4205 return true
4206 }
4207
4208 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
4209 o := opindex[p.As&obj.AMask]
4210
4211 if o == nil {
4212 ctxt.Diag("asmins: missing op %v", p)
4213 return
4214 }
4215
4216 if pre := prefixof(ctxt, &p.From); pre != 0 {
4217 ab.Put1(byte(pre))
4218 }
4219 if pre := prefixof(ctxt, &p.To); pre != 0 {
4220 ab.Put1(byte(pre))
4221 }
4222
4223 // Checks to warn about instruction/arguments combinations that
4224 // will unconditionally trigger illegal instruction trap (#UD).
4225 switch p.As {
4226 case AVGATHERDPD,
4227 AVGATHERQPD,
4228 AVGATHERDPS,
4229 AVGATHERQPS,
4230 AVPGATHERDD,
4231 AVPGATHERQD,
4232 AVPGATHERDQ,
4233 AVPGATHERQQ:
4234 // AVX512 gather requires explicit K mask.
4235 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
4236 if !avx512gatherValid(ctxt, p) {
4237 return
4238 }
4239 } else {
4240 if !avx2gatherValid(ctxt, p) {
4241 return
4242 }
4243 }
4244 }
4245
4246 if p.Ft == 0 {
4247 p.Ft = uint8(oclass(ctxt, p, &p.From))
4248 }
4249 if p.Tt == 0 {
4250 p.Tt = uint8(oclass(ctxt, p, &p.To))
4251 }
4252
4253 ft := int(p.Ft) * Ymax
4254 var f3t int
4255 tt := int(p.Tt) * Ymax
4256
4257 xo := obj.Bool2int(o.op[0] == 0x0f)
4258 z := 0
4259 var a *obj.Addr
4260 var l int
4261 var op int
4262 var q *obj.Prog
4263 var r *obj.Reloc
4264 var rel obj.Reloc
4265 var v int64
4266
4267 args := make([]int, 0, argListMax)
4268 if ft != Ynone*Ymax {
4269 args = append(args, ft)
4270 }
4271 for i := range p.RestArgs {
4272 args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
4273 }
4274 if tt != Ynone*Ymax {
4275 args = append(args, tt)
4276 }
4277
4278 for _, yt := range o.ytab {
4279 // ytab matching is purely args-based,
4280 // but AVX512 suffixes like "Z" or "RU_SAE" will
4281 // add EVEX-only filter that will reject non-EVEX matches.
4282 //
4283 // Consider "VADDPD.BCST 2032(DX), X0, X0".
4284 // Without this rule, operands will lead to VEX-encoded form
4285 // and produce "c5b15813" encoding.
4286 if !yt.match(args) {
4287 // "xo" is always zero for VEX/EVEX encoded insts.
4288 z += int(yt.zoffset) + xo
4289 } else {
4290 if p.Scond != 0 && !evexZcase(yt.zcase) {
4291 // Do not signal error and continue to search
4292 // for matching EVEX-encoded form.
4293 z += int(yt.zoffset)
4294 continue
4295 }
4296
4297 switch o.prefix {
4298 case Px1: // first option valid only in 32-bit mode
4299 if ctxt.Arch.Family == sys.AMD64 && z == 0 {
4300 z += int(yt.zoffset) + xo
4301 continue
4302 }
4303 case Pq: // 16 bit escape and opcode escape
4304 ab.Put2(Pe, Pm)
4305
4306 case Pq3: // 16 bit escape and opcode escape + REX.W
4307 ab.rexflag |= Pw
4308 ab.Put2(Pe, Pm)
4309
4310 case Pq4: // 66 0F 38
4311 ab.Put3(0x66, 0x0F, 0x38)
4312
4313 case Pq4w: // 66 0F 38 + REX.W
4314 ab.rexflag |= Pw
4315 ab.Put3(0x66, 0x0F, 0x38)
4316
4317 case Pq5: // F3 0F 38
4318 ab.Put3(0xF3, 0x0F, 0x38)
4319
4320 case Pq5w: // F3 0F 38 + REX.W
4321 ab.rexflag |= Pw
4322 ab.Put3(0xF3, 0x0F, 0x38)
4323
4324 case Pf2, // xmm opcode escape
4325 Pf3:
4326 ab.Put2(o.prefix, Pm)
4327
4328 case Pef3:
4329 ab.Put3(Pe, Pf3, Pm)
4330
4331 case Pfw: // xmm opcode escape + REX.W
4332 ab.rexflag |= Pw
4333 ab.Put2(Pf3, Pm)
4334
4335 case Pm: // opcode escape
4336 ab.Put1(Pm)
4337
4338 case Pe: // 16 bit escape
4339 ab.Put1(Pe)
4340
4341 case Pw: // 64-bit escape
4342 if ctxt.Arch.Family != sys.AMD64 {
4343 ctxt.Diag("asmins: illegal 64: %v", p)
4344 }
4345 ab.rexflag |= Pw
4346
4347 case Pw8: // 64-bit escape if z >= 8
4348 if z >= 8 {
4349 if ctxt.Arch.Family != sys.AMD64 {
4350 ctxt.Diag("asmins: illegal 64: %v", p)
4351 }
4352 ab.rexflag |= Pw
4353 }
4354
4355 case Pb: // botch
4356 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
4357 goto bad
4358 }
4359 // NOTE(rsc): This is probably safe to do always,
4360 // but when enabled it chooses different encodings
4361 // than the old cmd/internal/obj/i386 code did,
4362 // which breaks our "same bits out" checks.
4363 // In particular, CMPB AX, $0 encodes as 80 f8 00
4364 // in the original obj/i386, and it would encode
4365 // (using a valid, shorter form) as 3c 00 if we enabled
4366 // the call to bytereg here.
4367 if ctxt.Arch.Family == sys.AMD64 {
4368 bytereg(&p.From, &p.Ft)
4369 bytereg(&p.To, &p.Tt)
4370 }
4371
4372 case P32: // 32 bit but illegal if 64-bit mode
4373 if ctxt.Arch.Family == sys.AMD64 {
4374 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
4375 }
4376
4377 case Py: // 64-bit only, no prefix
4378 if ctxt.Arch.Family != sys.AMD64 {
4379 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4380 }
4381
4382 case Py1: // 64-bit only if z < 1, no prefix
4383 if z < 1 && ctxt.Arch.Family != sys.AMD64 {
4384 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4385 }
4386
4387 case Py3: // 64-bit only if z < 3, no prefix
4388 if z < 3 && ctxt.Arch.Family != sys.AMD64 {
4389 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
4390 }
4391 }
4392
4393 if z >= len(o.op) {
4394 log.Fatalf("asmins bad table %v", p)
4395 }
4396 op = int(o.op[z])
4397 if op == 0x0f {
4398 ab.Put1(byte(op))
4399 z++
4400 op = int(o.op[z])
4401 }
4402
4403 switch yt.zcase {
4404 default:
4405 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
4406 return
4407
4408 case Zpseudo:
4409 break
4410
4411 case Zlit:
4412 ab.PutOpBytesLit(z, &o.op)
4413
4414 case Zlitr_m:
4415 ab.PutOpBytesLit(z, &o.op)
4416 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4417
4418 case Zlitm_r:
4419 ab.PutOpBytesLit(z, &o.op)
4420 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4421
4422 case Zlit_m_r:
4423 ab.PutOpBytesLit(z, &o.op)
4424 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4425
4426 case Zmb_r:
4427 bytereg(&p.From, &p.Ft)
4428 fallthrough
4429
4430 case Zm_r:
4431 ab.Put1(byte(op))
4432 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4433
4434 case Z_m_r:
4435 ab.Put1(byte(op))
4436 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4437
4438 case Zm2_r:
4439 ab.Put2(byte(op), o.op[z+1])
4440 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4441
4442 case Zm_r_xm:
4443 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4444 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4445
4446 case Zm_r_xm_nr:
4447 ab.rexflag = 0
4448 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4449 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4450
4451 case Zm_r_i_xm:
4452 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4453 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
4454 ab.Put1(byte(p.To.Offset))
4455
4456 case Zibm_r, Zibr_m:
4457 ab.PutOpBytesLit(z, &o.op)
4458 if yt.zcase == Zibr_m {
4459 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4460 } else {
4461 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4462 }
4463 switch {
4464 default:
4465 ab.Put1(byte(p.From.Offset))
4466 case yt.args[0] == Yi32 && o.prefix == Pe:
4467 ab.PutInt16(int16(p.From.Offset))
4468 case yt.args[0] == Yi32:
4469 ab.PutInt32(int32(p.From.Offset))
4470 }
4471
4472 case Zaut_r:
4473 ab.Put1(0x8d) // leal
4474 if p.From.Type != obj.TYPE_ADDR {
4475 ctxt.Diag("asmins: Zaut sb type ADDR")
4476 }
4477 p.From.Type = obj.TYPE_MEM
4478 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4479 p.From.Type = obj.TYPE_ADDR
4480
4481 case Zm_o:
4482 ab.Put1(byte(op))
4483 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4484
4485 case Zr_m:
4486 ab.Put1(byte(op))
4487 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4488
4489 case Zvex:
4490 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4491
4492 case Zvex_rm_v_r:
4493 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4494 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4495
4496 case Zvex_rm_v_ro:
4497 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
4498 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
4499
4500 case Zvex_i_rm_vo:
4501 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
4502 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
4503 ab.Put1(byte(p.From.Offset))
4504
4505 case Zvex_i_r_v:
4506 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
4507 regnum := byte(0x7)
4508 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
4509 regnum &= byte(p.GetFrom3().Reg - REG_X0)
4510 } else {
4511 regnum &= byte(p.GetFrom3().Reg - REG_Y0)
4512 }
4513 ab.Put1(o.op[z+2] | regnum)
4514 ab.Put1(byte(p.From.Offset))
4515
4516 case Zvex_i_rm_v_r:
4517 imm, from, from3, to := unpackOps4(p)
4518 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
4519 ab.asmand(ctxt, cursym, p, from, to)
4520 ab.Put1(byte(imm.Offset))
4521
4522 case Zvex_i_rm_r:
4523 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
4524 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4525 ab.Put1(byte(p.From.Offset))
4526
4527 case Zvex_v_rm_r:
4528 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
4529 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4530
4531 case Zvex_r_v_rm:
4532 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
4533 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4534
4535 case Zvex_rm_r_vo:
4536 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
4537 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
4538
4539 case Zvex_i_r_rm:
4540 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
4541 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4542 ab.Put1(byte(p.From.Offset))
4543
4544 case Zvex_hr_rm_v_r:
4545 hr, from, from3, to := unpackOps4(p)
4546 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
4547 ab.asmand(ctxt, cursym, p, from, to)
4548 ab.Put1(byte(regIndex(hr.Reg) << 4))
4549
4550 case Zevex_k_rmo:
4551 ab.evex = newEVEXBits(z, &o.op)
4552 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
4553 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
4554
4555 case Zevex_i_rm_vo:
4556 ab.evex = newEVEXBits(z, &o.op)
4557 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
4558 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
4559 ab.Put1(byte(p.From.Offset))
4560
4561 case Zevex_i_rm_k_vo:
4562 imm, from, kmask, to := unpackOps4(p)
4563 ab.evex = newEVEXBits(z, &o.op)
4564 ab.asmevex(ctxt, p, from, to, nil, kmask)
4565 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
4566 ab.Put1(byte(imm.Offset))
4567
4568 case Zevex_i_r_rm:
4569 ab.evex = newEVEXBits(z, &o.op)
4570 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
4571 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
4572 ab.Put1(byte(p.From.Offset))
4573
4574 case Zevex_i_r_k_rm:
4575 imm, from, kmask, to := unpackOps4(p)
4576 ab.evex = newEVEXBits(z, &o.op)
4577 ab.asmevex(ctxt, p, to, nil, from, kmask)
4578 ab.asmand(ctxt, cursym, p, to, from)
4579 ab.Put1(byte(imm.Offset))
4580
4581 case Zevex_i_rm_r:
4582 ab.evex = newEVEXBits(z, &o.op)
4583 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
4584 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
4585 ab.Put1(byte(p.From.Offset))
4586
4587 case Zevex_i_rm_k_r:
4588 imm, from, kmask, to := unpackOps4(p)
4589 ab.evex = newEVEXBits(z, &o.op)
4590 ab.asmevex(ctxt, p, from, nil, to, kmask)
4591 ab.asmand(ctxt, cursym, p, from, to)
4592 ab.Put1(byte(imm.Offset))
4593
4594 case Zevex_i_rm_v_r:
4595 imm, from, from3, to := unpackOps4(p)
4596 ab.evex = newEVEXBits(z, &o.op)
4597 ab.asmevex(ctxt, p, from, from3, to, nil)
4598 ab.asmand(ctxt, cursym, p, from, to)
4599 ab.Put1(byte(imm.Offset))
4600
4601 case Zevex_i_rm_v_k_r:
4602 imm, from, from3, kmask, to := unpackOps5(p)
4603 ab.evex = newEVEXBits(z, &o.op)
4604 ab.asmevex(ctxt, p, from, from3, to, kmask)
4605 ab.asmand(ctxt, cursym, p, from, to)
4606 ab.Put1(byte(imm.Offset))
4607
4608 case Zevex_r_v_rm:
4609 ab.evex = newEVEXBits(z, &o.op)
4610 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
4611 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4612
4613 case Zevex_rm_v_r:
4614 ab.evex = newEVEXBits(z, &o.op)
4615 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
4616 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4617
4618 case Zevex_rm_k_r:
4619 ab.evex = newEVEXBits(z, &o.op)
4620 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
4621 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
4622
4623 case Zevex_r_k_rm:
4624 ab.evex = newEVEXBits(z, &o.op)
4625 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
4626 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4627
4628 case Zevex_rm_v_k_r:
4629 from, from3, kmask, to := unpackOps4(p)
4630 ab.evex = newEVEXBits(z, &o.op)
4631 ab.asmevex(ctxt, p, from, from3, to, kmask)
4632 ab.asmand(ctxt, cursym, p, from, to)
4633
4634 case Zevex_r_v_k_rm:
4635 from, from3, kmask, to := unpackOps4(p)
4636 ab.evex = newEVEXBits(z, &o.op)
4637 ab.asmevex(ctxt, p, to, from3, from, kmask)
4638 ab.asmand(ctxt, cursym, p, to, from)
4639
4640 case Zr_m_xm:
4641 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4642 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4643
4644 case Zr_m_xm_nr:
4645 ab.rexflag = 0
4646 ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4647 ab.asmand(ctxt, cursym, p, &p.To, &p.From)
4648
4649 case Zo_m:
4650 ab.Put1(byte(op))
4651 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4652
4653 case Zcallindreg:
4654 r = obj.Addrel(cursym)
4655 r.Off = int32(p.Pc)
4656 r.Type = objabi.R_CALLIND
4657 r.Siz = 0
4658 fallthrough
4659
4660 case Zo_m64:
4661 ab.Put1(byte(op))
4662 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
4663
4664 case Zm_ibo:
4665 ab.Put1(byte(op))
4666 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4667 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
4668
4669 case Zibo_m:
4670 ab.Put1(byte(op))
4671 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4672 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4673
4674 case Zibo_m_xm:
4675 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
4676 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4677 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4678
4679 case Z_ib, Zib_:
4680 if yt.zcase == Zib_ {
4681 a = &p.From
4682 } else {
4683 a = &p.To
4684 }
4685 ab.Put1(byte(op))
4686 if p.As == AXABORT {
4687 ab.Put1(o.op[z+1])
4688 }
4689 ab.Put1(byte(vaddr(ctxt, p, a, nil)))
4690
4691 case Zib_rp:
4692 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
4693 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
4694
4695 case Zil_rp:
4696 ab.rexflag |= regrex[p.To.Reg] & Rxb
4697 ab.Put1(byte(op + reg[p.To.Reg]))
4698 if o.prefix == Pe {
4699 v = vaddr(ctxt, p, &p.From, nil)
4700 ab.PutInt16(int16(v))
4701 } else {
4702 ab.relput4(ctxt, cursym, p, &p.From)
4703 }
4704
4705 case Zo_iw:
4706 ab.Put1(byte(op))
4707 if p.From.Type != obj.TYPE_NONE {
4708 v = vaddr(ctxt, p, &p.From, nil)
4709 ab.PutInt16(int16(v))
4710 }
4711
4712 case Ziq_rp:
4713 v = vaddr(ctxt, p, &p.From, &rel)
4714 l = int(v >> 32)
4715 if l == 0 && rel.Siz != 8 {
4716 ab.rexflag &^= (0x40 | Rxw)
4717
4718 ab.rexflag |= regrex[p.To.Reg] & Rxb
4719 ab.Put1(byte(0xb8 + reg[p.To.Reg]))
4720 if rel.Type != 0 {
4721 r = obj.Addrel(cursym)
4722 *r = rel
4723 r.Off = int32(p.Pc + int64(ab.Len()))
4724 }
4725
4726 ab.PutInt32(int32(v))
4727 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
4728 ab.Put1(0xc7)
4729 ab.asmando(ctxt, cursym, p, &p.To, 0)
4730
4731 ab.PutInt32(int32(v)) // need all 8
4732 } else {
4733 ab.rexflag |= regrex[p.To.Reg] & Rxb
4734 ab.Put1(byte(op + reg[p.To.Reg]))
4735 if rel.Type != 0 {
4736 r = obj.Addrel(cursym)
4737 *r = rel
4738 r.Off = int32(p.Pc + int64(ab.Len()))
4739 }
4740
4741 ab.PutInt64(v)
4742 }
4743
4744 case Zib_rr:
4745 ab.Put1(byte(op))
4746 ab.asmand(ctxt, cursym, p, &p.To, &p.To)
4747 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
4748
4749 case Z_il, Zil_:
4750 if yt.zcase == Zil_ {
4751 a = &p.From
4752 } else {
4753 a = &p.To
4754 }
4755 ab.Put1(byte(op))
4756 if o.prefix == Pe {
4757 v = vaddr(ctxt, p, a, nil)
4758 ab.PutInt16(int16(v))
4759 } else {
4760 ab.relput4(ctxt, cursym, p, a)
4761 }
4762
4763 case Zm_ilo, Zilo_m:
4764 ab.Put1(byte(op))
4765 if yt.zcase == Zilo_m {
4766 a = &p.From
4767 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
4768 } else {
4769 a = &p.To
4770 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
4771 }
4772
4773 if o.prefix == Pe {
4774 v = vaddr(ctxt, p, a, nil)
4775 ab.PutInt16(int16(v))
4776 } else {
4777 ab.relput4(ctxt, cursym, p, a)
4778 }
4779
4780 case Zil_rr:
4781 ab.Put1(byte(op))
4782 ab.asmand(ctxt, cursym, p, &p.To, &p.To)
4783 if o.prefix == Pe {
4784 v = vaddr(ctxt, p, &p.From, nil)
4785 ab.PutInt16(int16(v))
4786 } else {
4787 ab.relput4(ctxt, cursym, p, &p.From)
4788 }
4789
4790 case Z_rp:
4791 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
4792 ab.Put1(byte(op + reg[p.To.Reg]))
4793
4794 case Zrp_:
4795 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
4796 ab.Put1(byte(op + reg[p.From.Reg]))
4797
4798 case Zcallcon, Zjmpcon:
4799 if yt.zcase == Zcallcon {
4800 ab.Put1(byte(op))
4801 } else {
4802 ab.Put1(o.op[z+1])
4803 }
4804 r = obj.Addrel(cursym)
4805 r.Off = int32(p.Pc + int64(ab.Len()))
4806 r.Type = objabi.R_PCREL
4807 r.Siz = 4
4808 r.Add = p.To.Offset
4809 ab.PutInt32(0)
4810
4811 case Zcallind:
4812 ab.Put2(byte(op), o.op[z+1])
4813 r = obj.Addrel(cursym)
4814 r.Off = int32(p.Pc + int64(ab.Len()))
4815 if ctxt.Arch.Family == sys.AMD64 {
4816 r.Type = objabi.R_PCREL
4817 } else {
4818 r.Type = objabi.R_ADDR
4819 }
4820 r.Siz = 4
4821 r.Add = p.To.Offset
4822 r.Sym = p.To.Sym
4823 ab.PutInt32(0)
4824
4825 case Zcall, Zcallduff:
4826 if p.To.Sym == nil {
4827 ctxt.Diag("call without target")
4828 ctxt.DiagFlush()
4829 log.Fatalf("bad code")
4830 }
4831
4832 if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
4833 ctxt.Diag("directly calling duff when dynamically linking Go")
4834 }
4835
4836 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
4837 // Maintain BP around call, since duffcopy/duffzero can't do it
4838 // (the call jumps into the middle of the function).
4839 // This makes it possible to see call sites for duffcopy/duffzero in
4840 // BP-based profiling tools like Linux perf (which is the
4841 // whole point of maintaining frame pointers in Go).
4842 // MOVQ BP, -16(SP)
4843 // LEAQ -16(SP), BP
4844 ab.Put(bpduff1)
4845 }
4846 ab.Put1(byte(op))
4847 r = obj.Addrel(cursym)
4848 r.Off = int32(p.Pc + int64(ab.Len()))
4849 r.Sym = p.To.Sym
4850 r.Add = p.To.Offset
4851 r.Type = objabi.R_CALL
4852 r.Siz = 4
4853 ab.PutInt32(0)
4854
4855 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
4856 // Pop BP pushed above.
4857 // MOVQ 0(BP), BP
4858 ab.Put(bpduff2)
4859 }
4860
4861 // TODO: jump across functions needs reloc
4862 case Zbr, Zjmp, Zloop:
4863 if p.As == AXBEGIN {
4864 ab.Put1(byte(op))
4865 }
4866 if p.To.Sym != nil {
4867 if yt.zcase != Zjmp {
4868 ctxt.Diag("branch to ATEXT")
4869 ctxt.DiagFlush()
4870 log.Fatalf("bad code")
4871 }
4872
4873 ab.Put1(o.op[z+1])
4874 r = obj.Addrel(cursym)
4875 r.Off = int32(p.Pc + int64(ab.Len()))
4876 r.Sym = p.To.Sym
4877 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
4878 // it can point to a trampoline instead of the destination itself.
4879 r.Type = objabi.R_CALL
4880 r.Siz = 4
4881 ab.PutInt32(0)
4882 break
4883 }
4884
4885 // Assumes q is in this function.
4886 // TODO: Check in input, preserve in brchain.
4887
4888 // Fill in backward jump now.
4889 q = p.To.Target()
4890
4891 if q == nil {
4892 ctxt.Diag("jmp/branch/loop without target")
4893 ctxt.DiagFlush()
4894 log.Fatalf("bad code")
4895 }
4896
4897 if p.Back&branchBackwards != 0 {
4898 v = q.Pc - (p.Pc + 2)
4899 if v >= -128 && p.As != AXBEGIN {
4900 if p.As == AJCXZL {
4901 ab.Put1(0x67)
4902 }
4903 ab.Put2(byte(op), byte(v))
4904 } else if yt.zcase == Zloop {
4905 ctxt.Diag("loop too far: %v", p)
4906 } else {
4907 v -= 5 - 2
4908 if p.As == AXBEGIN {
4909 v--
4910 }
4911 if yt.zcase == Zbr {
4912 ab.Put1(0x0f)
4913 v--
4914 }
4915
4916 ab.Put1(o.op[z+1])
4917 ab.PutInt32(int32(v))
4918 }
4919
4920 break
4921 }
4922
4923 // Annotate target; will fill in later.
4924 p.Forwd = q.Rel
4925
4926 q.Rel = p
4927 if p.Back&branchShort != 0 && p.As != AXBEGIN {
4928 if p.As == AJCXZL {
4929 ab.Put1(0x67)
4930 }
4931 ab.Put2(byte(op), 0)
4932 } else if yt.zcase == Zloop {
4933 ctxt.Diag("loop too far: %v", p)
4934 } else {
4935 if yt.zcase == Zbr {
4936 ab.Put1(0x0f)
4937 }
4938 ab.Put1(o.op[z+1])
4939 ab.PutInt32(0)
4940 }
4941
4942 case Zbyte:
4943 v = vaddr(ctxt, p, &p.From, &rel)
4944 if rel.Siz != 0 {
4945 rel.Siz = uint8(op)
4946 r = obj.Addrel(cursym)
4947 *r = rel
4948 r.Off = int32(p.Pc + int64(ab.Len()))
4949 }
4950
4951 ab.Put1(byte(v))
4952 if op > 1 {
4953 ab.Put1(byte(v >> 8))
4954 if op > 2 {
4955 ab.PutInt16(int16(v >> 16))
4956 if op > 4 {
4957 ab.PutInt32(int32(v >> 32))
4958 }
4959 }
4960 }
4961 }
4962
4963 return
4964 }
4965 }
4966 f3t = Ynone * Ymax
4967 if p.GetFrom3() != nil {
4968 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
4969 }
4970 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
4971 var pp obj.Prog
4972 var t []byte
4973 if p.As == mo[0].as {
4974 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
4975 t = mo[0].op[:]
4976 switch mo[0].code {
4977 default:
4978 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
4979
4980 case movLit:
4981 for z = 0; t[z] != 0; z++ {
4982 ab.Put1(t[z])
4983 }
4984
4985 case movRegMem:
4986 ab.Put1(t[0])
4987 ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
4988
4989 case movMemReg:
4990 ab.Put1(t[0])
4991 ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
4992
4993 case movRegMem2op: // r,m - 2op
4994 ab.Put2(t[0], t[1])
4995 ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
4996 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
4997
4998 case movMemReg2op:
4999 ab.Put2(t[0], t[1])
5000 ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
5001 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
5002
5003 case movFullPtr:
5004 if t[0] != 0 {
5005 ab.Put1(t[0])
5006 }
5007 switch p.To.Index {
5008 default:
5009 goto bad
5010
5011 case REG_DS:
5012 ab.Put1(0xc5)
5013
5014 case REG_SS:
5015 ab.Put2(0x0f, 0xb2)
5016
5017 case REG_ES:
5018 ab.Put1(0xc4)
5019
5020 case REG_FS:
5021 ab.Put2(0x0f, 0xb4)
5022
5023 case REG_GS:
5024 ab.Put2(0x0f, 0xb5)
5025 }
5026
5027 ab.asmand(ctxt, cursym, p, &p.From, &p.To)
5028
5029 case movDoubleShift:
5030 if t[0] == Pw {
5031 if ctxt.Arch.Family != sys.AMD64 {
5032 ctxt.Diag("asmins: illegal 64: %v", p)
5033 }
5034 ab.rexflag |= Pw
5035 t = t[1:]
5036 } else if t[0] == Pe {
5037 ab.Put1(Pe)
5038 t = t[1:]
5039 }
5040
5041 switch p.From.Type {
5042 default:
5043 goto bad
5044
5045 case obj.TYPE_CONST:
5046 ab.Put2(0x0f, t[0])
5047 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
5048 ab.Put1(byte(p.From.Offset))
5049
5050 case obj.TYPE_REG:
5051 switch p.From.Reg {
5052 default:
5053 goto bad
5054
5055 case REG_CL, REG_CX:
5056 ab.Put2(0x0f, t[1])
5057 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
5058 }
5059 }
5060
5061 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
5062 // where you load the TLS base register into a register and then index off that
5063 // register to access the actual TLS variables. Systems that allow direct TLS access
5064 // are handled in prefixof above and should not be listed here.
5065 case movTLSReg:
5066 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
5067 ctxt.Diag("invalid load of TLS: %v", p)
5068 }
5069
5070 if ctxt.Arch.Family == sys.I386 {
5071 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
5072 // where you load the TLS base register into a register and then index off that
5073 // register to access the actual TLS variables. Systems that allow direct TLS access
5074 // are handled in prefixof above and should not be listed here.
5075 switch ctxt.Headtype {
5076 default:
5077 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
5078
5079 case objabi.Hlinux, objabi.Hfreebsd:
5080 if ctxt.Flag_shared {
5081 // Note that this is not generating the same insns as the other cases.
5082 // MOV TLS, dst
5083 // becomes
5084 // call __x86.get_pc_thunk.dst
5085 // movl (gotpc + g@gotntpoff)(dst), dst
5086 // which is encoded as
5087 // call __x86.get_pc_thunk.dst
5088 // movq 0(dst), dst
5089 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
5090 // is g, which we can't check here, but will when we assemble the second
5091 // instruction.
5092 dst := p.To.Reg
5093 ab.Put1(0xe8)
5094 r = obj.Addrel(cursym)
5095 r.Off = int32(p.Pc + int64(ab.Len()))
5096 r.Type = objabi.R_CALL
5097 r.Siz = 4
5098 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
5099 ab.PutInt32(0)
5100
5101 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
5102 r = obj.Addrel(cursym)
5103 r.Off = int32(p.Pc + int64(ab.Len()))
5104 r.Type = objabi.R_TLS_IE
5105 r.Siz = 4
5106 r.Add = 2
5107 ab.PutInt32(0)
5108 } else {
5109 // ELF TLS base is 0(GS).
5110 pp.From = p.From
5111
5112 pp.From.Type = obj.TYPE_MEM
5113 pp.From.Reg = REG_GS
5114 pp.From.Offset = 0
5115 pp.From.Index = REG_NONE
5116 pp.From.Scale = 0
5117 ab.Put2(0x65, // GS
5118 0x8B)
5119 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5120 }
5121 case objabi.Hplan9:
5122 pp.From = obj.Addr{}
5123 pp.From.Type = obj.TYPE_MEM
5124 pp.From.Name = obj.NAME_EXTERN
5125 pp.From.Sym = plan9privates
5126 pp.From.Offset = 0
5127 pp.From.Index = REG_NONE
5128 ab.Put1(0x8B)
5129 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5130
5131 case objabi.Hwindows:
5132 // Windows TLS base is always 0x14(FS).
5133 pp.From = p.From
5134
5135 pp.From.Type = obj.TYPE_MEM
5136 pp.From.Reg = REG_FS
5137 pp.From.Offset = 0x14
5138 pp.From.Index = REG_NONE
5139 pp.From.Scale = 0
5140 ab.Put2(0x64, // FS
5141 0x8B)
5142 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5143 }
5144 break
5145 }
5146
5147 switch ctxt.Headtype {
5148 default:
5149 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
5150
5151 case objabi.Hlinux, objabi.Hfreebsd:
5152 if !ctxt.Flag_shared {
5153 log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
5154 }
5155 // Note that this is not generating the same insn as the other cases.
5156 // MOV TLS, R_to
5157 // becomes
5158 // movq g@gottpoff(%rip), R_to
5159 // which is encoded as
5160 // movq 0(%rip), R_to
5161 // and a R_TLS_IE reloc. This all assumes the only tls variable we access
5162 // is g, which we can't check here, but will when we assemble the second
5163 // instruction.
5164 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
5165
5166 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
5167 r = obj.Addrel(cursym)
5168 r.Off = int32(p.Pc + int64(ab.Len()))
5169 r.Type = objabi.R_TLS_IE
5170 r.Siz = 4
5171 r.Add = -4
5172 ab.PutInt32(0)
5173
5174 case objabi.Hplan9:
5175 pp.From = obj.Addr{}
5176 pp.From.Type = obj.TYPE_MEM
5177 pp.From.Name = obj.NAME_EXTERN
5178 pp.From.Sym = plan9privates
5179 pp.From.Offset = 0
5180 pp.From.Index = REG_NONE
5181 ab.rexflag |= Pw
5182 ab.Put1(0x8B)
5183 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5184
5185 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
5186 // TLS base is 0(FS).
5187 pp.From = p.From
5188
5189 pp.From.Type = obj.TYPE_MEM
5190 pp.From.Name = obj.NAME_NONE
5191 pp.From.Reg = REG_NONE
5192 pp.From.Offset = 0
5193 pp.From.Index = REG_NONE
5194 pp.From.Scale = 0
5195 ab.rexflag |= Pw
5196 ab.Put2(0x64, // FS
5197 0x8B)
5198 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5199
5200 case objabi.Hwindows:
5201 // Windows TLS base is always 0x28(GS).
5202 pp.From = p.From
5203
5204 pp.From.Type = obj.TYPE_MEM
5205 pp.From.Name = obj.NAME_NONE
5206 pp.From.Reg = REG_GS
5207 pp.From.Offset = 0x28
5208 pp.From.Index = REG_NONE
5209 pp.From.Scale = 0
5210 ab.rexflag |= Pw
5211 ab.Put2(0x65, // GS
5212 0x8B)
5213 ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
5214 }
5215 }
5216 return
5217 }
5218 }
5219 }
5220 goto bad
5221
5222 bad:
5223 if ctxt.Arch.Family != sys.AMD64 {
5224 // here, the assembly has failed.
5225 // if it's a byte instruction that has
5226 // unaddressable registers, try to
5227 // exchange registers and reissue the
5228 // instruction with the operands renamed.
5229 pp := *p
5230
5231 unbytereg(&pp.From, &pp.Ft)
5232 unbytereg(&pp.To, &pp.Tt)
5233
5234 z := int(p.From.Reg)
5235 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
5236 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
5237 // For now, different to keep bit-for-bit compatibility.
5238 if ctxt.Arch.Family == sys.I386 {
5239 breg := byteswapreg(ctxt, &p.To)
5240 if breg != REG_AX {
5241 ab.Put1(0x87) // xchg lhs,bx
5242 ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
5243 subreg(&pp, z, breg)
5244 ab.doasm(ctxt, cursym, &pp)
5245 ab.Put1(0x87) // xchg lhs,bx
5246 ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
5247 } else {
5248 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5249 subreg(&pp, z, REG_AX)
5250 ab.doasm(ctxt, cursym, &pp)
5251 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5252 }
5253 return
5254 }
5255
5256 if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
5257 // We certainly don't want to exchange
5258 // with AX if the op is MUL or DIV.
5259 ab.Put1(0x87) // xchg lhs,bx
5260 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
5261 subreg(&pp, z, REG_BX)
5262 ab.doasm(ctxt, cursym, &pp)
5263 ab.Put1(0x87) // xchg lhs,bx
5264 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
5265 } else {
5266 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5267 subreg(&pp, z, REG_AX)
5268 ab.doasm(ctxt, cursym, &pp)
5269 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
5270 }
5271 return
5272 }
5273
5274 z = int(p.To.Reg)
5275 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
5276 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
5277 // For now, different to keep bit-for-bit compatibility.
5278 if ctxt.Arch.Family == sys.I386 {
5279 breg := byteswapreg(ctxt, &p.From)
5280 if breg != REG_AX {
5281 ab.Put1(0x87) //xchg rhs,bx
5282 ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
5283 subreg(&pp, z, breg)
5284 ab.doasm(ctxt, cursym, &pp)
5285 ab.Put1(0x87) // xchg rhs,bx
5286 ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
5287 } else {
5288 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5289 subreg(&pp, z, REG_AX)
5290 ab.doasm(ctxt, cursym, &pp)
5291 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5292 }
5293 return
5294 }
5295
5296 if isax(&p.From) {
5297 ab.Put1(0x87) // xchg rhs,bx
5298 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
5299 subreg(&pp, z, REG_BX)
5300 ab.doasm(ctxt, cursym, &pp)
5301 ab.Put1(0x87) // xchg rhs,bx
5302 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
5303 } else {
5304 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5305 subreg(&pp, z, REG_AX)
5306 ab.doasm(ctxt, cursym, &pp)
5307 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
5308 }
5309 return
5310 }
5311 }
5312
5313 ctxt.Diag("invalid instruction: %v", p)
5314 }
5315
5316 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
5317 // which is not referenced in a.
5318 // If a is empty, it returns BX to account for MULB-like instructions
5319 // that might use DX and AX.
5320 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
5321 cana, canb, canc, cand := true, true, true, true
5322 if a.Type == obj.TYPE_NONE {
5323 cana, cand = false, false
5324 }
5325
5326 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
5327 switch a.Reg {
5328 case REG_NONE:
5329 cana, cand = false, false
5330 case REG_AX, REG_AL, REG_AH:
5331 cana = false
5332 case REG_BX, REG_BL, REG_BH:
5333 canb = false
5334 case REG_CX, REG_CL, REG_CH:
5335 canc = false
5336 case REG_DX, REG_DL, REG_DH:
5337 cand = false
5338 }
5339 }
5340
5341 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
5342 switch a.Index {
5343 case REG_AX:
5344 cana = false
5345 case REG_BX:
5346 canb = false
5347 case REG_CX:
5348 canc = false
5349 case REG_DX:
5350 cand = false
5351 }
5352 }
5353
5354 switch {
5355 case cana:
5356 return REG_AX
5357 case canb:
5358 return REG_BX
5359 case canc:
5360 return REG_CX
5361 case cand:
5362 return REG_DX
5363 default:
5364 ctxt.Diag("impossible byte register")
5365 ctxt.DiagFlush()
5366 log.Fatalf("bad code")
5367 return 0
5368 }
5369 }
5370
5371 func isbadbyte(a *obj.Addr) bool {
5372 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
5373 }
5374
5375 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
5376 ab.Reset()
5377
5378 ab.rexflag = 0
5379 ab.vexflag = false
5380 ab.evexflag = false
5381 mark := ab.Len()
5382 ab.doasm(ctxt, cursym, p)
5383 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
5384 // as befits the whole approach of the architecture,
5385 // the rex prefix must appear before the first opcode byte
5386 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
5387 // before the 0f opcode escape!), or it might be ignored.
5388 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
5389 if ctxt.Arch.Family != sys.AMD64 {
5390 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
5391 }
5392 n := ab.Len()
5393 var np int
5394 for np = mark; np < n; np++ {
5395 c := ab.At(np)
5396 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
5397 break
5398 }
5399 }
5400 ab.Insert(np, byte(0x40|ab.rexflag))
5401 }
5402
5403 n := ab.Len()
5404 for i := len(cursym.R) - 1; i >= 0; i-- {
5405 r := &cursym.R[i]
5406 if int64(r.Off) < p.Pc {
5407 break
5408 }
5409 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
5410 r.Off++
5411 }
5412 if r.Type == objabi.R_PCREL {
5413 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
5414 // PC-relative addressing is relative to the end of the instruction,
5415 // but the relocations applied by the linker are relative to the end
5416 // of the relocation. Because immediate instruction
5417 // arguments can follow the PC-relative memory reference in the
5418 // instruction encoding, the two may not coincide. In this case,
5419 // adjust addend so that linker can keep relocating relative to the
5420 // end of the relocation.
5421 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
5422 } else if ctxt.Arch.Family == sys.I386 {
5423 // On 386 PC-relative addressing (for non-call/jmp instructions)
5424 // assumes that the previous instruction loaded the PC of the end
5425 // of that instruction into CX, so the adjustment is relative to
5426 // that.
5427 r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
5428 }
5429 }
5430 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
5431 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
5432 r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
5433 }
5434
5435 }
5436 }
5437
5438 // unpackOps4 extracts 4 operands from p.
5439 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
5440 return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
5441 }
5442
5443 // unpackOps5 extracts 5 operands from p.
5444 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
5445 return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To
5446 }
5447