obj.go raw

   1  // Copyright © 2015 The Go Authors.  All rights reserved.
   2  //
   3  // Permission is hereby granted, free of charge, to any person obtaining a copy
   4  // of this software and associated documentation files (the "Software"), to deal
   5  // in the Software without restriction, including without limitation the rights
   6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   7  // copies of the Software, and to permit persons to whom the Software is
   8  // furnished to do so, subject to the following conditions:
   9  //
  10  // The above copyright notice and this permission notice shall be included in
  11  // all copies or substantial portions of the Software.
  12  //
  13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19  // THE SOFTWARE.
  20  
  21  package riscv
  22  
  23  import (
  24  	"github.com/twitchyliquid64/golang-asm/obj"
  25  	"github.com/twitchyliquid64/golang-asm/objabi"
  26  	"github.com/twitchyliquid64/golang-asm/sys"
  27  	"fmt"
  28  )
  29  
  30  func buildop(ctxt *obj.Link) {}
  31  
  32  // jalrToSym replaces p with a set of Progs needed to jump to the Sym in p.
  33  // lr is the link register to use for the JALR.
  34  // p must be a CALL, JMP or RET.
  35  func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog {
  36  	if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET {
  37  		ctxt.Diag("unexpected Prog in jalrToSym: %v", p)
  38  		return p
  39  	}
  40  
  41  	// TODO(jsing): Consider using a single JAL instruction and teaching
  42  	// the linker to provide trampolines for the case where the destination
  43  	// offset is too large. This would potentially reduce instructions for
  44  	// the common case, but would require three instructions to go via the
  45  	// trampoline.
  46  
  47  	to := p.To
  48  
  49  	p.As = AAUIPC
  50  	p.Mark |= NEED_PCREL_ITYPE_RELOC
  51  	p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym}}
  52  	p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0}
  53  	p.Reg = 0
  54  	p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
  55  	p = obj.Appendp(p, newprog)
  56  
  57  	// Leave Sym only for the CALL reloc in assemble.
  58  	p.As = AJALR
  59  	p.From.Type = obj.TYPE_REG
  60  	p.From.Reg = lr
  61  	p.Reg = 0
  62  	p.To.Type = obj.TYPE_REG
  63  	p.To.Reg = REG_TMP
  64  	p.To.Sym = to.Sym
  65  
  66  	return p
  67  }
  68  
  69  // progedit is called individually for each *obj.Prog. It normalizes instruction
  70  // formats and eliminates as many pseudo-instructions as possible.
  71  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
  72  
  73  	// Expand binary instructions to ternary ones.
  74  	if p.Reg == 0 {
  75  		switch p.As {
  76  		case AADDI, ASLTI, ASLTIU, AANDI, AORI, AXORI, ASLLI, ASRLI, ASRAI,
  77  			AADD, AAND, AOR, AXOR, ASLL, ASRL, ASUB, ASRA,
  78  			AMUL, AMULH, AMULHU, AMULHSU, AMULW, ADIV, ADIVU, ADIVW, ADIVUW,
  79  			AREM, AREMU, AREMW, AREMUW:
  80  			p.Reg = p.To.Reg
  81  		}
  82  	}
  83  
  84  	// Rewrite instructions with constant operands to refer to the immediate
  85  	// form of the instruction.
  86  	if p.From.Type == obj.TYPE_CONST {
  87  		switch p.As {
  88  		case AADD:
  89  			p.As = AADDI
  90  		case ASLT:
  91  			p.As = ASLTI
  92  		case ASLTU:
  93  			p.As = ASLTIU
  94  		case AAND:
  95  			p.As = AANDI
  96  		case AOR:
  97  			p.As = AORI
  98  		case AXOR:
  99  			p.As = AXORI
 100  		case ASLL:
 101  			p.As = ASLLI
 102  		case ASRL:
 103  			p.As = ASRLI
 104  		case ASRA:
 105  			p.As = ASRAI
 106  		}
 107  	}
 108  
 109  	switch p.As {
 110  	case obj.AJMP:
 111  		// Turn JMP into JAL ZERO or JALR ZERO.
 112  		p.From.Type = obj.TYPE_REG
 113  		p.From.Reg = REG_ZERO
 114  
 115  		switch p.To.Type {
 116  		case obj.TYPE_BRANCH:
 117  			p.As = AJAL
 118  		case obj.TYPE_MEM:
 119  			switch p.To.Name {
 120  			case obj.NAME_NONE:
 121  				p.As = AJALR
 122  			case obj.NAME_EXTERN:
 123  				// Handled in preprocess.
 124  			default:
 125  				ctxt.Diag("unsupported name %d for %v", p.To.Name, p)
 126  			}
 127  		default:
 128  			panic(fmt.Sprintf("unhandled type %+v", p.To.Type))
 129  		}
 130  
 131  	case obj.ACALL:
 132  		switch p.To.Type {
 133  		case obj.TYPE_MEM:
 134  			// Handled in preprocess.
 135  		case obj.TYPE_REG:
 136  			p.As = AJALR
 137  			p.From.Type = obj.TYPE_REG
 138  			p.From.Reg = REG_LR
 139  		default:
 140  			ctxt.Diag("unknown destination type %+v in CALL: %v", p.To.Type, p)
 141  		}
 142  
 143  	case obj.AUNDEF:
 144  		p.As = AEBREAK
 145  
 146  	case ASCALL:
 147  		// SCALL is the old name for ECALL.
 148  		p.As = AECALL
 149  
 150  	case ASBREAK:
 151  		// SBREAK is the old name for EBREAK.
 152  		p.As = AEBREAK
 153  	}
 154  }
 155  
 156  // addrToReg extracts the register from an Addr, handling special Addr.Names.
 157  func addrToReg(a obj.Addr) int16 {
 158  	switch a.Name {
 159  	case obj.NAME_PARAM, obj.NAME_AUTO:
 160  		return REG_SP
 161  	}
 162  	return a.Reg
 163  }
 164  
 165  // movToLoad converts a MOV mnemonic into the corresponding load instruction.
 166  func movToLoad(mnemonic obj.As) obj.As {
 167  	switch mnemonic {
 168  	case AMOV:
 169  		return ALD
 170  	case AMOVB:
 171  		return ALB
 172  	case AMOVH:
 173  		return ALH
 174  	case AMOVW:
 175  		return ALW
 176  	case AMOVBU:
 177  		return ALBU
 178  	case AMOVHU:
 179  		return ALHU
 180  	case AMOVWU:
 181  		return ALWU
 182  	case AMOVF:
 183  		return AFLW
 184  	case AMOVD:
 185  		return AFLD
 186  	default:
 187  		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
 188  	}
 189  }
 190  
 191  // movToStore converts a MOV mnemonic into the corresponding store instruction.
 192  func movToStore(mnemonic obj.As) obj.As {
 193  	switch mnemonic {
 194  	case AMOV:
 195  		return ASD
 196  	case AMOVB:
 197  		return ASB
 198  	case AMOVH:
 199  		return ASH
 200  	case AMOVW:
 201  		return ASW
 202  	case AMOVF:
 203  		return AFSW
 204  	case AMOVD:
 205  		return AFSD
 206  	default:
 207  		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
 208  	}
 209  }
 210  
 211  // rewriteMOV rewrites MOV pseudo-instructions.
 212  func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) {
 213  	switch p.As {
 214  	case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
 215  	default:
 216  		panic(fmt.Sprintf("%+v is not a MOV pseudo-instruction", p.As))
 217  	}
 218  
 219  	switch p.From.Type {
 220  	case obj.TYPE_MEM: // MOV c(Rs), Rd -> L $c, Rs, Rd
 221  		switch p.From.Name {
 222  		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
 223  			if p.To.Type != obj.TYPE_REG {
 224  				ctxt.Diag("unsupported load at %v", p)
 225  			}
 226  			p.As = movToLoad(p.As)
 227  			p.From.Reg = addrToReg(p.From)
 228  
 229  		case obj.NAME_EXTERN, obj.NAME_STATIC:
 230  			// AUIPC $off_hi, R
 231  			// L $off_lo, R
 232  			as := p.As
 233  			to := p.To
 234  
 235  			p.As = AAUIPC
 236  			p.Mark |= NEED_PCREL_ITYPE_RELOC
 237  			p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}}
 238  			p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0}
 239  			p.Reg = 0
 240  			p.To = obj.Addr{Type: obj.TYPE_REG, Reg: to.Reg}
 241  			p = obj.Appendp(p, newprog)
 242  
 243  			p.As = movToLoad(as)
 244  			p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: to.Reg, Offset: 0}
 245  			p.To = to
 246  
 247  		default:
 248  			ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
 249  		}
 250  
 251  	case obj.TYPE_REG:
 252  		switch p.To.Type {
 253  		case obj.TYPE_REG:
 254  			switch p.As {
 255  			case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb
 256  				p.As = AADDI
 257  				p.Reg = p.From.Reg
 258  				p.From = obj.Addr{Type: obj.TYPE_CONST}
 259  
 260  			case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb
 261  				p.As = AFSGNJS
 262  				p.Reg = p.From.Reg
 263  
 264  			case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb
 265  				p.As = AFSGNJD
 266  				p.Reg = p.From.Reg
 267  
 268  			default:
 269  				ctxt.Diag("unsupported register-register move at %v", p)
 270  			}
 271  
 272  		case obj.TYPE_MEM: // MOV Rs, c(Rd) -> S $c, Rs, Rd
 273  			switch p.As {
 274  			case AMOVBU, AMOVHU, AMOVWU:
 275  				ctxt.Diag("unsupported unsigned store at %v", p)
 276  			}
 277  			switch p.To.Name {
 278  			case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
 279  				p.As = movToStore(p.As)
 280  				p.To.Reg = addrToReg(p.To)
 281  
 282  			case obj.NAME_EXTERN:
 283  				// AUIPC $off_hi, TMP
 284  				// S $off_lo, TMP, R
 285  				as := p.As
 286  				from := p.From
 287  
 288  				p.As = AAUIPC
 289  				p.Mark |= NEED_PCREL_STYPE_RELOC
 290  				p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym}}
 291  				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0}
 292  				p.Reg = 0
 293  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 294  				p = obj.Appendp(p, newprog)
 295  
 296  				p.As = movToStore(as)
 297  				p.From = from
 298  				p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_TMP, Offset: 0}
 299  
 300  			default:
 301  				ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
 302  			}
 303  
 304  		default:
 305  			ctxt.Diag("unsupported MOV at %v", p)
 306  		}
 307  
 308  	case obj.TYPE_CONST:
 309  		// MOV $c, R
 310  		// If c is small enough, convert to:
 311  		//   ADD $c, ZERO, R
 312  		// If not, convert to:
 313  		//   LUI top20bits(c), R
 314  		//   ADD bottom12bits(c), R, R
 315  		if p.As != AMOV {
 316  			ctxt.Diag("unsupported constant load at %v", p)
 317  		}
 318  		off := p.From.Offset
 319  		to := p.To
 320  
 321  		low, high, err := Split32BitImmediate(off)
 322  		if err != nil {
 323  			ctxt.Diag("%v: constant %d too large: %v", p, off, err)
 324  		}
 325  
 326  		// LUI is only necessary if the offset doesn't fit in 12-bits.
 327  		needLUI := high != 0
 328  		if needLUI {
 329  			p.As = ALUI
 330  			p.To = to
 331  			// Pass top 20 bits to LUI.
 332  			p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high}
 333  			p = obj.Appendp(p, newprog)
 334  		}
 335  		p.As = AADDIW
 336  		p.To = to
 337  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: low}
 338  		p.Reg = REG_ZERO
 339  		if needLUI {
 340  			p.Reg = to.Reg
 341  		}
 342  
 343  	case obj.TYPE_ADDR: // MOV $sym+off(SP/SB), R
 344  		if p.To.Type != obj.TYPE_REG || p.As != AMOV {
 345  			ctxt.Diag("unsupported addr MOV at %v", p)
 346  		}
 347  		switch p.From.Name {
 348  		case obj.NAME_EXTERN, obj.NAME_STATIC:
 349  			// AUIPC $off_hi, R
 350  			// ADDI $off_lo, R
 351  			to := p.To
 352  
 353  			p.As = AAUIPC
 354  			p.Mark |= NEED_PCREL_ITYPE_RELOC
 355  			p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}}
 356  			p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0}
 357  			p.Reg = 0
 358  			p.To = to
 359  			p = obj.Appendp(p, newprog)
 360  
 361  			p.As = AADDI
 362  			p.From = obj.Addr{Type: obj.TYPE_CONST}
 363  			p.Reg = to.Reg
 364  			p.To = to
 365  
 366  		case obj.NAME_PARAM, obj.NAME_AUTO:
 367  			p.As = AADDI
 368  			p.Reg = REG_SP
 369  			p.From.Type = obj.TYPE_CONST
 370  
 371  		case obj.NAME_NONE:
 372  			p.As = AADDI
 373  			p.Reg = p.From.Reg
 374  			p.From.Type = obj.TYPE_CONST
 375  			p.From.Reg = 0
 376  
 377  		default:
 378  			ctxt.Diag("bad addr MOV from name %v at %v", p.From.Name, p)
 379  		}
 380  
 381  	default:
 382  		ctxt.Diag("unsupported MOV at %v", p)
 383  	}
 384  }
 385  
 386  // InvertBranch inverts the condition of a conditional branch.
 387  func InvertBranch(as obj.As) obj.As {
 388  	switch as {
 389  	case ABEQ:
 390  		return ABNE
 391  	case ABEQZ:
 392  		return ABNEZ
 393  	case ABGE:
 394  		return ABLT
 395  	case ABGEU:
 396  		return ABLTU
 397  	case ABGEZ:
 398  		return ABLTZ
 399  	case ABGT:
 400  		return ABLE
 401  	case ABGTU:
 402  		return ABLEU
 403  	case ABGTZ:
 404  		return ABLEZ
 405  	case ABLE:
 406  		return ABGT
 407  	case ABLEU:
 408  		return ABGTU
 409  	case ABLEZ:
 410  		return ABGTZ
 411  	case ABLT:
 412  		return ABGE
 413  	case ABLTU:
 414  		return ABGEU
 415  	case ABLTZ:
 416  		return ABGEZ
 417  	case ABNE:
 418  		return ABEQ
 419  	case ABNEZ:
 420  		return ABEQZ
 421  	default:
 422  		panic("InvertBranch: not a branch")
 423  	}
 424  }
 425  
 426  // containsCall reports whether the symbol contains a CALL (or equivalent)
 427  // instruction. Must be called after progedit.
 428  func containsCall(sym *obj.LSym) bool {
 429  	// CALLs are CALL or JAL(R) with link register LR.
 430  	for p := sym.Func.Text; p != nil; p = p.Link {
 431  		switch p.As {
 432  		case obj.ACALL:
 433  			return true
 434  		case AJAL, AJALR:
 435  			if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR {
 436  				return true
 437  			}
 438  		}
 439  	}
 440  
 441  	return false
 442  }
 443  
 444  // setPCs sets the Pc field in all instructions reachable from p.
 445  // It uses pc as the initial value.
 446  func setPCs(p *obj.Prog, pc int64) {
 447  	for ; p != nil; p = p.Link {
 448  		p.Pc = pc
 449  		for _, ins := range instructionsForProg(p) {
 450  			pc += int64(ins.length())
 451  		}
 452  	}
 453  }
 454  
 455  // stackOffset updates Addr offsets based on the current stack size.
 456  //
 457  // The stack looks like:
 458  // -------------------
 459  // |                 |
 460  // |      PARAMs     |
 461  // |                 |
 462  // |                 |
 463  // -------------------
 464  // |    Parent RA    |   SP on function entry
 465  // -------------------
 466  // |                 |
 467  // |                 |
 468  // |       AUTOs     |
 469  // |                 |
 470  // |                 |
 471  // -------------------
 472  // |        RA       |   SP during function execution
 473  // -------------------
 474  //
 475  // FixedFrameSize makes other packages aware of the space allocated for RA.
 476  //
 477  // A nicer version of this diagram can be found on slide 21 of the presentation
 478  // attached to:
 479  //
 480  //   https://golang.org/issue/16922#issuecomment-243748180
 481  //
 482  func stackOffset(a *obj.Addr, stacksize int64) {
 483  	switch a.Name {
 484  	case obj.NAME_AUTO:
 485  		// Adjust to the top of AUTOs.
 486  		a.Offset += stacksize
 487  	case obj.NAME_PARAM:
 488  		// Adjust to the bottom of PARAMs.
 489  		a.Offset += stacksize + 8
 490  	}
 491  }
 492  
 493  // preprocess generates prologue and epilogue code, computes PC-relative branch
 494  // and jump offsets, and resolves pseudo-registers.
 495  //
 496  // preprocess is called once per linker symbol.
 497  //
 498  // When preprocess finishes, all instructions in the symbol are either
 499  // concrete, real RISC-V instructions or directive pseudo-ops like TEXT,
 500  // PCDATA, and FUNCDATA.
 501  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 502  	if cursym.Func.Text == nil || cursym.Func.Text.Link == nil {
 503  		return
 504  	}
 505  
 506  	// Generate the prologue.
 507  	text := cursym.Func.Text
 508  	if text.As != obj.ATEXT {
 509  		ctxt.Diag("preprocess: found symbol that does not start with TEXT directive")
 510  		return
 511  	}
 512  
 513  	stacksize := text.To.Offset
 514  	if stacksize == -8 {
 515  		// Historical way to mark NOFRAME.
 516  		text.From.Sym.Set(obj.AttrNoFrame, true)
 517  		stacksize = 0
 518  	}
 519  	if stacksize < 0 {
 520  		ctxt.Diag("negative frame size %d - did you mean NOFRAME?", stacksize)
 521  	}
 522  	if text.From.Sym.NoFrame() {
 523  		if stacksize != 0 {
 524  			ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", stacksize)
 525  		}
 526  	}
 527  
 528  	if !containsCall(cursym) {
 529  		text.From.Sym.Set(obj.AttrLeaf, true)
 530  		if stacksize == 0 {
 531  			// A leaf function with no locals has no frame.
 532  			text.From.Sym.Set(obj.AttrNoFrame, true)
 533  		}
 534  	}
 535  
 536  	// Save LR unless there is no frame.
 537  	if !text.From.Sym.NoFrame() {
 538  		stacksize += ctxt.FixedFrameSize()
 539  	}
 540  
 541  	cursym.Func.Args = text.To.Val.(int32)
 542  	cursym.Func.Locals = int32(stacksize)
 543  
 544  	prologue := text
 545  
 546  	if !cursym.Func.Text.From.Sym.NoSplit() {
 547  		prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check
 548  	}
 549  
 550  	if stacksize != 0 {
 551  		prologue = ctxt.StartUnsafePoint(prologue, newprog)
 552  
 553  		// Actually save LR.
 554  		prologue = obj.Appendp(prologue, newprog)
 555  		prologue.As = AMOV
 556  		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
 557  		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -stacksize}
 558  
 559  		// Insert stack adjustment.
 560  		prologue = obj.Appendp(prologue, newprog)
 561  		prologue.As = AADDI
 562  		prologue.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -stacksize}
 563  		prologue.Reg = REG_SP
 564  		prologue.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
 565  		prologue.Spadj = int32(stacksize)
 566  
 567  		prologue = ctxt.EndUnsafePoint(prologue, newprog, -1)
 568  	}
 569  
 570  	if cursym.Func.Text.From.Sym.Wrapper() {
 571  		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
 572  		//
 573  		//   MOV g_panic(g), X11
 574  		//   BNE X11, ZERO, adjust
 575  		// end:
 576  		//   NOP
 577  		// ...rest of function..
 578  		// adjust:
 579  		//   MOV panic_argp(X11), X12
 580  		//   ADD $(autosize+FIXED_FRAME), SP, X13
 581  		//   BNE X12, X13, end
 582  		//   ADD $FIXED_FRAME, SP, X12
 583  		//   MOV X12, panic_argp(X11)
 584  		//   JMP end
 585  		//
 586  		// The NOP is needed to give the jumps somewhere to land.
 587  
 588  		ldpanic := obj.Appendp(prologue, newprog)
 589  
 590  		ldpanic.As = AMOV
 591  		ldpanic.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGG, Offset: 4 * int64(ctxt.Arch.PtrSize)} // G.panic
 592  		ldpanic.Reg = 0
 593  		ldpanic.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X11}
 594  
 595  		bneadj := obj.Appendp(ldpanic, newprog)
 596  		bneadj.As = ABNE
 597  		bneadj.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X11}
 598  		bneadj.Reg = REG_ZERO
 599  		bneadj.To.Type = obj.TYPE_BRANCH
 600  
 601  		endadj := obj.Appendp(bneadj, newprog)
 602  		endadj.As = obj.ANOP
 603  
 604  		last := endadj
 605  		for last.Link != nil {
 606  			last = last.Link
 607  		}
 608  
 609  		getargp := obj.Appendp(last, newprog)
 610  		getargp.As = AMOV
 611  		getargp.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X11, Offset: 0} // Panic.argp
 612  		getargp.Reg = 0
 613  		getargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12}
 614  
 615  		bneadj.To.SetTarget(getargp)
 616  
 617  		calcargp := obj.Appendp(getargp, newprog)
 618  		calcargp.As = AADDI
 619  		calcargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize + ctxt.FixedFrameSize()}
 620  		calcargp.Reg = REG_SP
 621  		calcargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X13}
 622  
 623  		testargp := obj.Appendp(calcargp, newprog)
 624  		testargp.As = ABNE
 625  		testargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12}
 626  		testargp.Reg = REG_X13
 627  		testargp.To.Type = obj.TYPE_BRANCH
 628  		testargp.To.SetTarget(endadj)
 629  
 630  		adjargp := obj.Appendp(testargp, newprog)
 631  		adjargp.As = AADDI
 632  		adjargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(ctxt.Arch.PtrSize)}
 633  		adjargp.Reg = REG_SP
 634  		adjargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12}
 635  
 636  		setargp := obj.Appendp(adjargp, newprog)
 637  		setargp.As = AMOV
 638  		setargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12}
 639  		setargp.Reg = 0
 640  		setargp.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X11, Offset: 0} // Panic.argp
 641  
 642  		godone := obj.Appendp(setargp, newprog)
 643  		godone.As = AJAL
 644  		godone.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
 645  		godone.To.Type = obj.TYPE_BRANCH
 646  		godone.To.SetTarget(endadj)
 647  	}
 648  
 649  	// Update stack-based offsets.
 650  	for p := cursym.Func.Text; p != nil; p = p.Link {
 651  		stackOffset(&p.From, stacksize)
 652  		stackOffset(&p.To, stacksize)
 653  	}
 654  
 655  	// Additional instruction rewriting.
 656  	for p := cursym.Func.Text; p != nil; p = p.Link {
 657  		switch p.As {
 658  		case obj.AGETCALLERPC:
 659  			if cursym.Leaf() {
 660  				// MOV LR, Rd
 661  				p.As = AMOV
 662  				p.From.Type = obj.TYPE_REG
 663  				p.From.Reg = REG_LR
 664  			} else {
 665  				// MOV (RSP), Rd
 666  				p.As = AMOV
 667  				p.From.Type = obj.TYPE_MEM
 668  				p.From.Reg = REG_SP
 669  			}
 670  
 671  		case obj.ACALL:
 672  			switch p.To.Type {
 673  			case obj.TYPE_MEM:
 674  				jalrToSym(ctxt, p, newprog, REG_LR)
 675  			}
 676  
 677  		case obj.AJMP:
 678  			switch p.To.Type {
 679  			case obj.TYPE_MEM:
 680  				switch p.To.Name {
 681  				case obj.NAME_EXTERN:
 682  					// JMP to symbol.
 683  					jalrToSym(ctxt, p, newprog, REG_ZERO)
 684  				}
 685  			}
 686  
 687  		case obj.ARET:
 688  			// Replace RET with epilogue.
 689  			retJMP := p.To.Sym
 690  
 691  			if stacksize != 0 {
 692  				// Restore LR.
 693  				p.As = AMOV
 694  				p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
 695  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
 696  				p = obj.Appendp(p, newprog)
 697  
 698  				p.As = AADDI
 699  				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize}
 700  				p.Reg = REG_SP
 701  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
 702  				p.Spadj = int32(-stacksize)
 703  				p = obj.Appendp(p, newprog)
 704  			}
 705  
 706  			if retJMP != nil {
 707  				p.As = obj.ARET
 708  				p.To.Sym = retJMP
 709  				p = jalrToSym(ctxt, p, newprog, REG_ZERO)
 710  			} else {
 711  				p.As = AJALR
 712  				p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
 713  				p.Reg = 0
 714  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
 715  			}
 716  
 717  			// "Add back" the stack removed in the previous instruction.
 718  			//
 719  			// This is to avoid confusing pctospadj, which sums
 720  			// Spadj from function entry to each PC, and shouldn't
 721  			// count adjustments from earlier epilogues, since they
 722  			// won't affect later PCs.
 723  			p.Spadj = int32(stacksize)
 724  
 725  		case AADDI:
 726  			// Refine Spadjs account for adjustment via ADDI instruction.
 727  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.From.Type == obj.TYPE_CONST {
 728  				p.Spadj = int32(-p.From.Offset)
 729  			}
 730  		}
 731  	}
 732  
 733  	// Rewrite MOV pseudo-instructions. This cannot be done in
 734  	// progedit, as SP offsets need to be applied before we split
 735  	// up some of the Addrs.
 736  	for p := cursym.Func.Text; p != nil; p = p.Link {
 737  		switch p.As {
 738  		case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
 739  			rewriteMOV(ctxt, newprog, p)
 740  		}
 741  	}
 742  
 743  	// Split immediates larger than 12-bits.
 744  	for p := cursym.Func.Text; p != nil; p = p.Link {
 745  		switch p.As {
 746  		// <opi> $imm, REG, TO
 747  		case AADDI, AANDI, AORI, AXORI:
 748  			// LUI $high, TMP
 749  			// ADDI $low, TMP, TMP
 750  			// <op> TMP, REG, TO
 751  			q := *p
 752  			low, high, err := Split32BitImmediate(p.From.Offset)
 753  			if err != nil {
 754  				ctxt.Diag("%v: constant %d too large", p, p.From.Offset, err)
 755  			}
 756  			if high == 0 {
 757  				break // no need to split
 758  			}
 759  
 760  			p.As = ALUI
 761  			p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high}
 762  			p.Reg = 0
 763  			p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 764  			p.Spadj = 0 // needed if TO is SP
 765  			p = obj.Appendp(p, newprog)
 766  
 767  			p.As = AADDIW
 768  			p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: low}
 769  			p.Reg = REG_TMP
 770  			p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 771  			p = obj.Appendp(p, newprog)
 772  
 773  			switch q.As {
 774  			case AADDI:
 775  				p.As = AADD
 776  			case AANDI:
 777  				p.As = AAND
 778  			case AORI:
 779  				p.As = AOR
 780  			case AXORI:
 781  				p.As = AXOR
 782  			default:
 783  				ctxt.Diag("unsupported instruction %v for splitting", q)
 784  			}
 785  			p.Spadj = q.Spadj
 786  			p.To = q.To
 787  			p.Reg = q.Reg
 788  			p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 789  
 790  		// <load> $imm, REG, TO (load $imm+(REG), TO)
 791  		case ALD, ALB, ALH, ALW, ALBU, ALHU, ALWU, AFLW, AFLD:
 792  			low, high, err := Split32BitImmediate(p.From.Offset)
 793  			if err != nil {
 794  				ctxt.Diag("%v: constant %d too large", p, p.From.Offset)
 795  			}
 796  			if high == 0 {
 797  				break // no need to split
 798  			}
 799  			q := *p
 800  
 801  			// LUI $high, TMP
 802  			// ADD TMP, REG, TMP
 803  			// <load> $low, TMP, TO
 804  			p.As = ALUI
 805  			p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high}
 806  			p.Reg = 0
 807  			p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 808  			p.Spadj = 0 // needed if TO is SP
 809  			p = obj.Appendp(p, newprog)
 810  
 811  			p.As = AADD
 812  			p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 813  			p.Reg = q.From.Reg
 814  			p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 815  			p = obj.Appendp(p, newprog)
 816  
 817  			p.As = q.As
 818  			p.To = q.To
 819  			p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_TMP, Offset: low}
 820  			p.Reg = obj.REG_NONE
 821  
 822  		// <store> $imm, REG, TO (store $imm+(TO), REG)
 823  		case ASD, ASB, ASH, ASW, AFSW, AFSD:
 824  			low, high, err := Split32BitImmediate(p.To.Offset)
 825  			if err != nil {
 826  				ctxt.Diag("%v: constant %d too large", p, p.To.Offset)
 827  			}
 828  			if high == 0 {
 829  				break // no need to split
 830  			}
 831  			q := *p
 832  
 833  			// LUI $high, TMP
 834  			// ADD TMP, TO, TMP
 835  			// <store> $low, REG, TMP
 836  			p.As = ALUI
 837  			p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high}
 838  			p.Reg = 0
 839  			p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 840  			p.Spadj = 0 // needed if TO is SP
 841  			p = obj.Appendp(p, newprog)
 842  
 843  			p.As = AADD
 844  			p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 845  			p.Reg = q.To.Reg
 846  			p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 847  			p = obj.Appendp(p, newprog)
 848  
 849  			p.As = q.As
 850  			p.From = obj.Addr{Type: obj.TYPE_REG, Reg: q.From.Reg, Offset: 0}
 851  			p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_TMP, Offset: low}
 852  		}
 853  	}
 854  
 855  	// Compute instruction addresses.  Once we do that, we need to check for
 856  	// overextended jumps and branches.  Within each iteration, Pc differences
 857  	// are always lower bounds (since the program gets monotonically longer,
 858  	// a fixed point will be reached).  No attempt to handle functions > 2GiB.
 859  	for {
 860  		rescan := false
 861  		setPCs(cursym.Func.Text, 0)
 862  
 863  		for p := cursym.Func.Text; p != nil; p = p.Link {
 864  			switch p.As {
 865  			case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
 866  				if p.To.Type != obj.TYPE_BRANCH {
 867  					panic("assemble: instruction with branch-like opcode lacks destination")
 868  				}
 869  				offset := p.To.Target().Pc - p.Pc
 870  				if offset < -4096 || 4096 <= offset {
 871  					// Branch is long.  Replace it with a jump.
 872  					jmp := obj.Appendp(p, newprog)
 873  					jmp.As = AJAL
 874  					jmp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
 875  					jmp.To = obj.Addr{Type: obj.TYPE_BRANCH}
 876  					jmp.To.SetTarget(p.To.Target())
 877  
 878  					p.As = InvertBranch(p.As)
 879  					p.To.SetTarget(jmp.Link)
 880  
 881  					// We may have made previous branches too long,
 882  					// so recheck them.
 883  					rescan = true
 884  				}
 885  			case AJAL:
 886  				if p.To.Target() == nil {
 887  					panic("intersymbol jumps should be expressed as AUIPC+JALR")
 888  				}
 889  				offset := p.To.Target().Pc - p.Pc
 890  				if offset < -(1<<20) || (1<<20) <= offset {
 891  					// Replace with 2-instruction sequence. This assumes
 892  					// that TMP is not live across J instructions, since
 893  					// it is reserved by SSA.
 894  					jmp := obj.Appendp(p, newprog)
 895  					jmp.As = AJALR
 896  					jmp.From = p.From
 897  					jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 898  
 899  					// p.From is not generally valid, however will be
 900  					// fixed up in the next loop.
 901  					p.As = AAUIPC
 902  					p.From = obj.Addr{Type: obj.TYPE_BRANCH, Sym: p.From.Sym}
 903  					p.From.SetTarget(p.To.Target())
 904  					p.Reg = 0
 905  					p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
 906  
 907  					rescan = true
 908  				}
 909  			}
 910  		}
 911  
 912  		if !rescan {
 913  			break
 914  		}
 915  	}
 916  
 917  	// Now that there are no long branches, resolve branch and jump targets.
 918  	// At this point, instruction rewriting which changes the number of
 919  	// instructions will break everything--don't do it!
 920  	for p := cursym.Func.Text; p != nil; p = p.Link {
 921  		switch p.As {
 922  		case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ, AJAL:
 923  			switch p.To.Type {
 924  			case obj.TYPE_BRANCH:
 925  				p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
 926  			case obj.TYPE_MEM:
 927  				panic("unhandled type")
 928  			}
 929  
 930  		case AAUIPC:
 931  			if p.From.Type == obj.TYPE_BRANCH {
 932  				low, high, err := Split32BitImmediate(p.From.Target().Pc - p.Pc)
 933  				if err != nil {
 934  					ctxt.Diag("%v: jump displacement %d too large", p, p.To.Target().Pc-p.Pc)
 935  				}
 936  				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high, Sym: cursym}
 937  				p.Link.From.Offset = low
 938  			}
 939  		}
 940  	}
 941  
 942  	// Validate all instructions - this provides nice error messages.
 943  	for p := cursym.Func.Text; p != nil; p = p.Link {
 944  		for _, ins := range instructionsForProg(p) {
 945  			ins.validate(ctxt)
 946  		}
 947  	}
 948  }
 949  
 950  func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgAlloc, framesize int64) *obj.Prog {
 951  	// Leaf function with no frame is effectively NOSPLIT.
 952  	if framesize == 0 {
 953  		return p
 954  	}
 955  
 956  	// MOV	g_stackguard(g), X10
 957  	p = obj.Appendp(p, newprog)
 958  	p.As = AMOV
 959  	p.From.Type = obj.TYPE_MEM
 960  	p.From.Reg = REGG
 961  	p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
 962  	if cursym.CFunc() {
 963  		p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
 964  	}
 965  	p.To.Type = obj.TYPE_REG
 966  	p.To.Reg = REG_X10
 967  
 968  	var to_done, to_more *obj.Prog
 969  
 970  	if framesize <= objabi.StackSmall {
 971  		// small stack: SP < stackguard
 972  		//	BLTU	SP, stackguard, done
 973  		p = obj.Appendp(p, newprog)
 974  		p.As = ABLTU
 975  		p.From.Type = obj.TYPE_REG
 976  		p.From.Reg = REG_X10
 977  		p.Reg = REG_SP
 978  		p.To.Type = obj.TYPE_BRANCH
 979  		to_done = p
 980  	} else if framesize <= objabi.StackBig {
 981  		// large stack: SP-framesize < stackguard-StackSmall
 982  		//	ADD	$-(framesize-StackSmall), SP, X11
 983  		//	BLTU	X11, stackguard, done
 984  		p = obj.Appendp(p, newprog)
 985  		// TODO(sorear): logic inconsistent with comment, but both match all non-x86 arches
 986  		p.As = AADDI
 987  		p.From.Type = obj.TYPE_CONST
 988  		p.From.Offset = -(int64(framesize) - objabi.StackSmall)
 989  		p.Reg = REG_SP
 990  		p.To.Type = obj.TYPE_REG
 991  		p.To.Reg = REG_X11
 992  
 993  		p = obj.Appendp(p, newprog)
 994  		p.As = ABLTU
 995  		p.From.Type = obj.TYPE_REG
 996  		p.From.Reg = REG_X10
 997  		p.Reg = REG_X11
 998  		p.To.Type = obj.TYPE_BRANCH
 999  		to_done = p
1000  	} else {
1001  		// Such a large stack we need to protect against wraparound.
1002  		// If SP is close to zero:
1003  		//	SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
1004  		// The +StackGuard on both sides is required to keep the left side positive:
1005  		// SP is allowed to be slightly below stackguard. See stack.h.
1006  		//
1007  		// Preemption sets stackguard to StackPreempt, a very large value.
1008  		// That breaks the math above, so we have to check for that explicitly.
1009  		//	// stackguard is X10
1010  		//	MOV	$StackPreempt, X11
1011  		//	BEQ	X10, X11, more
1012  		//	ADD	$StackGuard, SP, X11
1013  		//	SUB	X10, X11
1014  		//	MOV	$(framesize+(StackGuard-StackSmall)), X10
1015  		//	BGTU	X11, X10, done
1016  		p = obj.Appendp(p, newprog)
1017  		p.As = AMOV
1018  		p.From.Type = obj.TYPE_CONST
1019  		p.From.Offset = objabi.StackPreempt
1020  		p.To.Type = obj.TYPE_REG
1021  		p.To.Reg = REG_X11
1022  
1023  		p = obj.Appendp(p, newprog)
1024  		to_more = p
1025  		p.As = ABEQ
1026  		p.From.Type = obj.TYPE_REG
1027  		p.From.Reg = REG_X10
1028  		p.Reg = REG_X11
1029  		p.To.Type = obj.TYPE_BRANCH
1030  
1031  		p = obj.Appendp(p, newprog)
1032  		p.As = AADDI
1033  		p.From.Type = obj.TYPE_CONST
1034  		p.From.Offset = int64(objabi.StackGuard)
1035  		p.Reg = REG_SP
1036  		p.To.Type = obj.TYPE_REG
1037  		p.To.Reg = REG_X11
1038  
1039  		p = obj.Appendp(p, newprog)
1040  		p.As = ASUB
1041  		p.From.Type = obj.TYPE_REG
1042  		p.From.Reg = REG_X10
1043  		p.Reg = REG_X11
1044  		p.To.Type = obj.TYPE_REG
1045  		p.To.Reg = REG_X11
1046  
1047  		p = obj.Appendp(p, newprog)
1048  		p.As = AMOV
1049  		p.From.Type = obj.TYPE_CONST
1050  		p.From.Offset = int64(framesize) + int64(objabi.StackGuard) - objabi.StackSmall
1051  		p.To.Type = obj.TYPE_REG
1052  		p.To.Reg = REG_X10
1053  
1054  		p = obj.Appendp(p, newprog)
1055  		p.As = ABLTU
1056  		p.From.Type = obj.TYPE_REG
1057  		p.From.Reg = REG_X10
1058  		p.Reg = REG_X11
1059  		p.To.Type = obj.TYPE_BRANCH
1060  		to_done = p
1061  	}
1062  
1063  	p = ctxt.EmitEntryLiveness(cursym, p, newprog)
1064  
1065  	// CALL runtime.morestack(SB)
1066  	p = obj.Appendp(p, newprog)
1067  	p.As = obj.ACALL
1068  	p.To.Type = obj.TYPE_BRANCH
1069  	if cursym.CFunc() {
1070  		p.To.Sym = ctxt.Lookup("runtime.morestackc")
1071  	} else if !cursym.Func.Text.From.Sym.NeedCtxt() {
1072  		p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt")
1073  	} else {
1074  		p.To.Sym = ctxt.Lookup("runtime.morestack")
1075  	}
1076  	if to_more != nil {
1077  		to_more.To.SetTarget(p)
1078  	}
1079  	p = jalrToSym(ctxt, p, newprog, REG_X5)
1080  
1081  	// JMP start
1082  	p = obj.Appendp(p, newprog)
1083  	p.As = AJAL
1084  	p.To = obj.Addr{Type: obj.TYPE_BRANCH}
1085  	p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
1086  	p.To.SetTarget(cursym.Func.Text.Link)
1087  
1088  	// placeholder for to_done's jump target
1089  	p = obj.Appendp(p, newprog)
1090  	p.As = obj.ANOP // zero-width place holder
1091  	to_done.To.SetTarget(p)
1092  
1093  	return p
1094  }
1095  
1096  // signExtend sign extends val starting at bit bit.
1097  func signExtend(val int64, bit uint) int64 {
1098  	return val << (64 - bit) >> (64 - bit)
1099  }
1100  
1101  // Split32BitImmediate splits a signed 32-bit immediate into a signed 20-bit
1102  // upper immediate and a signed 12-bit lower immediate to be added to the upper
1103  // result. For example, high may be used in LUI and low in a following ADDI to
1104  // generate a full 32-bit constant.
1105  func Split32BitImmediate(imm int64) (low, high int64, err error) {
1106  	if !immIFits(imm, 32) {
1107  		return 0, 0, fmt.Errorf("immediate does not fit in 32-bits: %d", imm)
1108  	}
1109  
1110  	// Nothing special needs to be done if the immediate fits in 12-bits.
1111  	if immIFits(imm, 12) {
1112  		return imm, 0, nil
1113  	}
1114  
1115  	high = imm >> 12
1116  
1117  	// The bottom 12 bits will be treated as signed.
1118  	//
1119  	// If that will result in a negative 12 bit number, add 1 to
1120  	// our upper bits to adjust for the borrow.
1121  	//
1122  	// It is not possible for this increment to overflow. To
1123  	// overflow, the 20 top bits would be 1, and the sign bit for
1124  	// the low 12 bits would be set, in which case the entire 32
1125  	// bit pattern fits in a 12 bit signed value.
1126  	if imm&(1<<11) != 0 {
1127  		high++
1128  	}
1129  
1130  	low = signExtend(imm, 12)
1131  	high = signExtend(high, 20)
1132  
1133  	return low, high, nil
1134  }
1135  
1136  func regVal(r, min, max uint32) uint32 {
1137  	if r < min || r > max {
1138  		panic(fmt.Sprintf("register out of range, want %d < %d < %d", min, r, max))
1139  	}
1140  	return r - min
1141  }
1142  
1143  // regI returns an integer register.
1144  func regI(r uint32) uint32 {
1145  	return regVal(r, REG_X0, REG_X31)
1146  }
1147  
1148  // regF returns a float register.
1149  func regF(r uint32) uint32 {
1150  	return regVal(r, REG_F0, REG_F31)
1151  }
1152  
1153  // regAddr extracts a register from an Addr.
1154  func regAddr(a obj.Addr, min, max uint32) uint32 {
1155  	if a.Type != obj.TYPE_REG {
1156  		panic(fmt.Sprintf("ill typed: %+v", a))
1157  	}
1158  	return regVal(uint32(a.Reg), min, max)
1159  }
1160  
1161  // regIAddr extracts the integer register from an Addr.
1162  func regIAddr(a obj.Addr) uint32 {
1163  	return regAddr(a, REG_X0, REG_X31)
1164  }
1165  
1166  // regFAddr extracts the float register from an Addr.
1167  func regFAddr(a obj.Addr) uint32 {
1168  	return regAddr(a, REG_F0, REG_F31)
1169  }
1170  
1171  // immIFits reports whether immediate value x fits in nbits bits
1172  // as a signed integer.
1173  func immIFits(x int64, nbits uint) bool {
1174  	nbits--
1175  	var min int64 = -1 << nbits
1176  	var max int64 = 1<<nbits - 1
1177  	return min <= x && x <= max
1178  }
1179  
1180  // immI extracts the signed integer of the specified size from an immediate.
1181  func immI(as obj.As, imm int64, nbits uint) uint32 {
1182  	if !immIFits(imm, nbits) {
1183  		panic(fmt.Sprintf("%v\tsigned immediate %d cannot fit in %d bits", as, imm, nbits))
1184  	}
1185  	return uint32(imm)
1186  }
1187  
1188  func wantImmI(ctxt *obj.Link, as obj.As, imm int64, nbits uint) {
1189  	if !immIFits(imm, nbits) {
1190  		ctxt.Diag("%v\tsigned immediate cannot be larger than %d bits but got %d", as, nbits, imm)
1191  	}
1192  }
1193  
1194  func wantReg(ctxt *obj.Link, as obj.As, pos string, descr string, r, min, max uint32) {
1195  	if r < min || r > max {
1196  		var suffix string
1197  		if r != obj.REG_NONE {
1198  			suffix = fmt.Sprintf(" but got non-%s register %s", descr, RegName(int(r)))
1199  		}
1200  		ctxt.Diag("%v\texpected %s register in %s position%s", as, descr, pos, suffix)
1201  	}
1202  }
1203  
1204  func wantNoneReg(ctxt *obj.Link, as obj.As, pos string, r uint32) {
1205  	if r != obj.REG_NONE {
1206  		ctxt.Diag("%v\texpected no register in %s but got register %s", as, pos, RegName(int(r)))
1207  	}
1208  }
1209  
1210  // wantIntReg checks that r is an integer register.
1211  func wantIntReg(ctxt *obj.Link, as obj.As, pos string, r uint32) {
1212  	wantReg(ctxt, as, pos, "integer", r, REG_X0, REG_X31)
1213  }
1214  
1215  // wantFloatReg checks that r is a floating-point register.
1216  func wantFloatReg(ctxt *obj.Link, as obj.As, pos string, r uint32) {
1217  	wantReg(ctxt, as, pos, "float", r, REG_F0, REG_F31)
1218  }
1219  
1220  // wantEvenOffset checks that the offset is a multiple of two.
1221  func wantEvenOffset(ctxt *obj.Link, as obj.As, offset int64) {
1222  	if offset%1 != 0 {
1223  		ctxt.Diag("%v\tjump offset %v must be even", as, offset)
1224  	}
1225  }
1226  
1227  func validateRIII(ctxt *obj.Link, ins *instruction) {
1228  	wantIntReg(ctxt, ins.as, "rd", ins.rd)
1229  	wantIntReg(ctxt, ins.as, "rs1", ins.rs1)
1230  	wantIntReg(ctxt, ins.as, "rs2", ins.rs2)
1231  }
1232  
1233  func validateRFFF(ctxt *obj.Link, ins *instruction) {
1234  	wantFloatReg(ctxt, ins.as, "rd", ins.rd)
1235  	wantFloatReg(ctxt, ins.as, "rs1", ins.rs1)
1236  	wantFloatReg(ctxt, ins.as, "rs2", ins.rs2)
1237  }
1238  
1239  func validateRFFI(ctxt *obj.Link, ins *instruction) {
1240  	wantIntReg(ctxt, ins.as, "rd", ins.rd)
1241  	wantFloatReg(ctxt, ins.as, "rs1", ins.rs1)
1242  	wantFloatReg(ctxt, ins.as, "rs2", ins.rs2)
1243  }
1244  
1245  func validateRFI(ctxt *obj.Link, ins *instruction) {
1246  	wantIntReg(ctxt, ins.as, "rd", ins.rd)
1247  	wantNoneReg(ctxt, ins.as, "rs1", ins.rs1)
1248  	wantFloatReg(ctxt, ins.as, "rs2", ins.rs2)
1249  }
1250  
1251  func validateRIF(ctxt *obj.Link, ins *instruction) {
1252  	wantFloatReg(ctxt, ins.as, "rd", ins.rd)
1253  	wantNoneReg(ctxt, ins.as, "rs1", ins.rs1)
1254  	wantIntReg(ctxt, ins.as, "rs2", ins.rs2)
1255  }
1256  
1257  func validateRFF(ctxt *obj.Link, ins *instruction) {
1258  	wantFloatReg(ctxt, ins.as, "rd", ins.rd)
1259  	wantNoneReg(ctxt, ins.as, "rs1", ins.rs1)
1260  	wantFloatReg(ctxt, ins.as, "rs2", ins.rs2)
1261  }
1262  
1263  func validateII(ctxt *obj.Link, ins *instruction) {
1264  	wantImmI(ctxt, ins.as, ins.imm, 12)
1265  	wantIntReg(ctxt, ins.as, "rd", ins.rd)
1266  	wantIntReg(ctxt, ins.as, "rs1", ins.rs1)
1267  }
1268  
1269  func validateIF(ctxt *obj.Link, ins *instruction) {
1270  	wantImmI(ctxt, ins.as, ins.imm, 12)
1271  	wantFloatReg(ctxt, ins.as, "rd", ins.rd)
1272  	wantIntReg(ctxt, ins.as, "rs1", ins.rs1)
1273  }
1274  
1275  func validateSI(ctxt *obj.Link, ins *instruction) {
1276  	wantImmI(ctxt, ins.as, ins.imm, 12)
1277  	wantIntReg(ctxt, ins.as, "rd", ins.rd)
1278  	wantIntReg(ctxt, ins.as, "rs1", ins.rs1)
1279  }
1280  
1281  func validateSF(ctxt *obj.Link, ins *instruction) {
1282  	wantImmI(ctxt, ins.as, ins.imm, 12)
1283  	wantIntReg(ctxt, ins.as, "rd", ins.rd)
1284  	wantFloatReg(ctxt, ins.as, "rs1", ins.rs1)
1285  }
1286  
1287  func validateB(ctxt *obj.Link, ins *instruction) {
1288  	// Offsets are multiples of two, so accept 13 bit immediates for the
1289  	// 12 bit slot. We implicitly drop the least significant bit in encodeB.
1290  	wantEvenOffset(ctxt, ins.as, ins.imm)
1291  	wantImmI(ctxt, ins.as, ins.imm, 13)
1292  	wantNoneReg(ctxt, ins.as, "rd", ins.rd)
1293  	wantIntReg(ctxt, ins.as, "rs1", ins.rs1)
1294  	wantIntReg(ctxt, ins.as, "rs2", ins.rs2)
1295  }
1296  
1297  func validateU(ctxt *obj.Link, ins *instruction) {
1298  	wantImmI(ctxt, ins.as, ins.imm, 20)
1299  	wantIntReg(ctxt, ins.as, "rd", ins.rd)
1300  	wantNoneReg(ctxt, ins.as, "rs1", ins.rs1)
1301  	wantNoneReg(ctxt, ins.as, "rs2", ins.rs2)
1302  }
1303  
1304  func validateJ(ctxt *obj.Link, ins *instruction) {
1305  	// Offsets are multiples of two, so accept 21 bit immediates for the
1306  	// 20 bit slot. We implicitly drop the least significant bit in encodeJ.
1307  	wantEvenOffset(ctxt, ins.as, ins.imm)
1308  	wantImmI(ctxt, ins.as, ins.imm, 21)
1309  	wantIntReg(ctxt, ins.as, "rd", ins.rd)
1310  	wantNoneReg(ctxt, ins.as, "rs1", ins.rs1)
1311  	wantNoneReg(ctxt, ins.as, "rs2", ins.rs2)
1312  }
1313  
1314  func validateRaw(ctxt *obj.Link, ins *instruction) {
1315  	// Treat the raw value specially as a 32-bit unsigned integer.
1316  	// Nobody wants to enter negative machine code.
1317  	if ins.imm < 0 || 1<<32 <= ins.imm {
1318  		ctxt.Diag("%v\timmediate in raw position cannot be larger than 32 bits but got %d", ins.as, ins.imm)
1319  	}
1320  }
1321  
1322  // encodeR encodes an R-type RISC-V instruction.
1323  func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 {
1324  	enc := encode(as)
1325  	if enc == nil {
1326  		panic("encodeR: could not encode instruction")
1327  	}
1328  	if enc.rs2 != 0 && rs2 != 0 {
1329  		panic("encodeR: instruction uses rs2, but rs2 was nonzero")
1330  	}
1331  	return funct7<<25 | enc.funct7<<25 | enc.rs2<<20 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode
1332  }
1333  
1334  func encodeRIII(ins *instruction) uint32 {
1335  	return encodeR(ins.as, regI(ins.rs1), regI(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
1336  }
1337  
1338  func encodeRFFF(ins *instruction) uint32 {
1339  	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rd), ins.funct3, ins.funct7)
1340  }
1341  
1342  func encodeRFFI(ins *instruction) uint32 {
1343  	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
1344  }
1345  
1346  func encodeRFI(ins *instruction) uint32 {
1347  	return encodeR(ins.as, regF(ins.rs2), 0, regI(ins.rd), ins.funct3, ins.funct7)
1348  }
1349  
1350  func encodeRIF(ins *instruction) uint32 {
1351  	return encodeR(ins.as, regI(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
1352  }
1353  
1354  func encodeRFF(ins *instruction) uint32 {
1355  	return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
1356  }
1357  
1358  // encodeI encodes an I-type RISC-V instruction.
1359  func encodeI(as obj.As, rs1, rd, imm uint32) uint32 {
1360  	enc := encode(as)
1361  	if enc == nil {
1362  		panic("encodeI: could not encode instruction")
1363  	}
1364  	imm |= uint32(enc.csr)
1365  	return imm<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode
1366  }
1367  
1368  func encodeII(ins *instruction) uint32 {
1369  	return encodeI(ins.as, regI(ins.rs1), regI(ins.rd), uint32(ins.imm))
1370  }
1371  
1372  func encodeIF(ins *instruction) uint32 {
1373  	return encodeI(ins.as, regI(ins.rs1), regF(ins.rd), uint32(ins.imm))
1374  }
1375  
1376  // encodeS encodes an S-type RISC-V instruction.
1377  func encodeS(as obj.As, rs1, rs2, imm uint32) uint32 {
1378  	enc := encode(as)
1379  	if enc == nil {
1380  		panic("encodeS: could not encode instruction")
1381  	}
1382  	return (imm>>5)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | (imm&0x1f)<<7 | enc.opcode
1383  }
1384  
1385  func encodeSI(ins *instruction) uint32 {
1386  	return encodeS(ins.as, regI(ins.rd), regI(ins.rs1), uint32(ins.imm))
1387  }
1388  
1389  func encodeSF(ins *instruction) uint32 {
1390  	return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm))
1391  }
1392  
1393  // encodeB encodes a B-type RISC-V instruction.
1394  func encodeB(ins *instruction) uint32 {
1395  	imm := immI(ins.as, ins.imm, 13)
1396  	rs2 := regI(ins.rs1)
1397  	rs1 := regI(ins.rs2)
1398  	enc := encode(ins.as)
1399  	if enc == nil {
1400  		panic("encodeB: could not encode instruction")
1401  	}
1402  	return (imm>>12)<<31 | ((imm>>5)&0x3f)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | ((imm>>1)&0xf)<<8 | ((imm>>11)&0x1)<<7 | enc.opcode
1403  }
1404  
1405  // encodeU encodes a U-type RISC-V instruction.
1406  func encodeU(ins *instruction) uint32 {
1407  	// The immediates for encodeU are the upper 20 bits of a 32 bit value.
1408  	// Rather than have the user/compiler generate a 32 bit constant, the
1409  	// bottommost bits of which must all be zero, instead accept just the
1410  	// top bits.
1411  	imm := immI(ins.as, ins.imm, 20)
1412  	rd := regI(ins.rd)
1413  	enc := encode(ins.as)
1414  	if enc == nil {
1415  		panic("encodeU: could not encode instruction")
1416  	}
1417  	return imm<<12 | rd<<7 | enc.opcode
1418  }
1419  
1420  // encodeJ encodes a J-type RISC-V instruction.
1421  func encodeJ(ins *instruction) uint32 {
1422  	imm := immI(ins.as, ins.imm, 21)
1423  	rd := regI(ins.rd)
1424  	enc := encode(ins.as)
1425  	if enc == nil {
1426  		panic("encodeJ: could not encode instruction")
1427  	}
1428  	return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12 | rd<<7 | enc.opcode
1429  }
1430  
1431  func encodeRawIns(ins *instruction) uint32 {
1432  	// Treat the raw value specially as a 32-bit unsigned integer.
1433  	// Nobody wants to enter negative machine code.
1434  	if ins.imm < 0 || 1<<32 <= ins.imm {
1435  		panic(fmt.Sprintf("immediate %d cannot fit in 32 bits", ins.imm))
1436  	}
1437  	return uint32(ins.imm)
1438  }
1439  
1440  func EncodeIImmediate(imm int64) (int64, error) {
1441  	if !immIFits(imm, 12) {
1442  		return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm)
1443  	}
1444  	return imm << 20, nil
1445  }
1446  
1447  func EncodeSImmediate(imm int64) (int64, error) {
1448  	if !immIFits(imm, 12) {
1449  		return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm)
1450  	}
1451  	return ((imm >> 5) << 25) | ((imm & 0x1f) << 7), nil
1452  }
1453  
1454  func EncodeUImmediate(imm int64) (int64, error) {
1455  	if !immIFits(imm, 20) {
1456  		return 0, fmt.Errorf("immediate %#x does not fit in 20 bits", imm)
1457  	}
1458  	return imm << 12, nil
1459  }
1460  
1461  type encoding struct {
1462  	encode   func(*instruction) uint32     // encode returns the machine code for an instruction
1463  	validate func(*obj.Link, *instruction) // validate validates an instruction
1464  	length   int                           // length of encoded instruction; 0 for pseudo-ops, 4 otherwise
1465  }
1466  
1467  var (
1468  	// Encodings have the following naming convention:
1469  	//
1470  	//  1. the instruction encoding (R/I/S/B/U/J), in lowercase
1471  	//  2. zero or more register operand identifiers (I = integer
1472  	//     register, F = float register), in uppercase
1473  	//  3. the word "Encoding"
1474  	//
1475  	// For example, rIIIEncoding indicates an R-type instruction with two
1476  	// integer register inputs and an integer register output; sFEncoding
1477  	// indicates an S-type instruction with rs2 being a float register.
1478  
1479  	rIIIEncoding = encoding{encode: encodeRIII, validate: validateRIII, length: 4}
1480  	rFFFEncoding = encoding{encode: encodeRFFF, validate: validateRFFF, length: 4}
1481  	rFFIEncoding = encoding{encode: encodeRFFI, validate: validateRFFI, length: 4}
1482  	rFIEncoding  = encoding{encode: encodeRFI, validate: validateRFI, length: 4}
1483  	rIFEncoding  = encoding{encode: encodeRIF, validate: validateRIF, length: 4}
1484  	rFFEncoding  = encoding{encode: encodeRFF, validate: validateRFF, length: 4}
1485  
1486  	iIEncoding = encoding{encode: encodeII, validate: validateII, length: 4}
1487  	iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4}
1488  
1489  	sIEncoding = encoding{encode: encodeSI, validate: validateSI, length: 4}
1490  	sFEncoding = encoding{encode: encodeSF, validate: validateSF, length: 4}
1491  
1492  	bEncoding = encoding{encode: encodeB, validate: validateB, length: 4}
1493  	uEncoding = encoding{encode: encodeU, validate: validateU, length: 4}
1494  	jEncoding = encoding{encode: encodeJ, validate: validateJ, length: 4}
1495  
1496  	// rawEncoding encodes a raw instruction byte sequence.
1497  	rawEncoding = encoding{encode: encodeRawIns, validate: validateRaw, length: 4}
1498  
1499  	// pseudoOpEncoding panics if encoding is attempted, but does no validation.
1500  	pseudoOpEncoding = encoding{encode: nil, validate: func(*obj.Link, *instruction) {}, length: 0}
1501  
1502  	// badEncoding is used when an invalid op is encountered.
1503  	// An error has already been generated, so let anything else through.
1504  	badEncoding = encoding{encode: func(*instruction) uint32 { return 0 }, validate: func(*obj.Link, *instruction) {}, length: 0}
1505  )
1506  
1507  // encodings contains the encodings for RISC-V instructions.
1508  // Instructions are masked with obj.AMask to keep indices small.
1509  var encodings = [ALAST & obj.AMask]encoding{
1510  
1511  	// Unprivileged ISA
1512  
1513  	// 2.4: Integer Computational Instructions
1514  	AADDI & obj.AMask:  iIEncoding,
1515  	ASLTI & obj.AMask:  iIEncoding,
1516  	ASLTIU & obj.AMask: iIEncoding,
1517  	AANDI & obj.AMask:  iIEncoding,
1518  	AORI & obj.AMask:   iIEncoding,
1519  	AXORI & obj.AMask:  iIEncoding,
1520  	ASLLI & obj.AMask:  iIEncoding,
1521  	ASRLI & obj.AMask:  iIEncoding,
1522  	ASRAI & obj.AMask:  iIEncoding,
1523  	ALUI & obj.AMask:   uEncoding,
1524  	AAUIPC & obj.AMask: uEncoding,
1525  	AADD & obj.AMask:   rIIIEncoding,
1526  	ASLT & obj.AMask:   rIIIEncoding,
1527  	ASLTU & obj.AMask:  rIIIEncoding,
1528  	AAND & obj.AMask:   rIIIEncoding,
1529  	AOR & obj.AMask:    rIIIEncoding,
1530  	AXOR & obj.AMask:   rIIIEncoding,
1531  	ASLL & obj.AMask:   rIIIEncoding,
1532  	ASRL & obj.AMask:   rIIIEncoding,
1533  	ASUB & obj.AMask:   rIIIEncoding,
1534  	ASRA & obj.AMask:   rIIIEncoding,
1535  
1536  	// 2.5: Control Transfer Instructions
1537  	AJAL & obj.AMask:  jEncoding,
1538  	AJALR & obj.AMask: iIEncoding,
1539  	ABEQ & obj.AMask:  bEncoding,
1540  	ABNE & obj.AMask:  bEncoding,
1541  	ABLT & obj.AMask:  bEncoding,
1542  	ABLTU & obj.AMask: bEncoding,
1543  	ABGE & obj.AMask:  bEncoding,
1544  	ABGEU & obj.AMask: bEncoding,
1545  
1546  	// 2.6: Load and Store Instructions
1547  	ALW & obj.AMask:  iIEncoding,
1548  	ALWU & obj.AMask: iIEncoding,
1549  	ALH & obj.AMask:  iIEncoding,
1550  	ALHU & obj.AMask: iIEncoding,
1551  	ALB & obj.AMask:  iIEncoding,
1552  	ALBU & obj.AMask: iIEncoding,
1553  	ASW & obj.AMask:  sIEncoding,
1554  	ASH & obj.AMask:  sIEncoding,
1555  	ASB & obj.AMask:  sIEncoding,
1556  
1557  	// 2.7: Memory Ordering
1558  	AFENCE & obj.AMask: iIEncoding,
1559  
1560  	// 5.2: Integer Computational Instructions (RV64I)
1561  	AADDIW & obj.AMask: iIEncoding,
1562  	ASLLIW & obj.AMask: iIEncoding,
1563  	ASRLIW & obj.AMask: iIEncoding,
1564  	ASRAIW & obj.AMask: iIEncoding,
1565  	AADDW & obj.AMask:  rIIIEncoding,
1566  	ASLLW & obj.AMask:  rIIIEncoding,
1567  	ASRLW & obj.AMask:  rIIIEncoding,
1568  	ASUBW & obj.AMask:  rIIIEncoding,
1569  	ASRAW & obj.AMask:  rIIIEncoding,
1570  
1571  	// 5.3: Load and Store Instructions (RV64I)
1572  	ALD & obj.AMask: iIEncoding,
1573  	ASD & obj.AMask: sIEncoding,
1574  
1575  	// 7.1: Multiplication Operations
1576  	AMUL & obj.AMask:    rIIIEncoding,
1577  	AMULH & obj.AMask:   rIIIEncoding,
1578  	AMULHU & obj.AMask:  rIIIEncoding,
1579  	AMULHSU & obj.AMask: rIIIEncoding,
1580  	AMULW & obj.AMask:   rIIIEncoding,
1581  	ADIV & obj.AMask:    rIIIEncoding,
1582  	ADIVU & obj.AMask:   rIIIEncoding,
1583  	AREM & obj.AMask:    rIIIEncoding,
1584  	AREMU & obj.AMask:   rIIIEncoding,
1585  	ADIVW & obj.AMask:   rIIIEncoding,
1586  	ADIVUW & obj.AMask:  rIIIEncoding,
1587  	AREMW & obj.AMask:   rIIIEncoding,
1588  	AREMUW & obj.AMask:  rIIIEncoding,
1589  
1590  	// 8.2: Load-Reserved/Store-Conditional
1591  	ALRW & obj.AMask: rIIIEncoding,
1592  	ALRD & obj.AMask: rIIIEncoding,
1593  	ASCW & obj.AMask: rIIIEncoding,
1594  	ASCD & obj.AMask: rIIIEncoding,
1595  
1596  	// 8.3: Atomic Memory Operations
1597  	AAMOSWAPW & obj.AMask: rIIIEncoding,
1598  	AAMOSWAPD & obj.AMask: rIIIEncoding,
1599  	AAMOADDW & obj.AMask:  rIIIEncoding,
1600  	AAMOADDD & obj.AMask:  rIIIEncoding,
1601  	AAMOANDW & obj.AMask:  rIIIEncoding,
1602  	AAMOANDD & obj.AMask:  rIIIEncoding,
1603  	AAMOORW & obj.AMask:   rIIIEncoding,
1604  	AAMOORD & obj.AMask:   rIIIEncoding,
1605  	AAMOXORW & obj.AMask:  rIIIEncoding,
1606  	AAMOXORD & obj.AMask:  rIIIEncoding,
1607  	AAMOMAXW & obj.AMask:  rIIIEncoding,
1608  	AAMOMAXD & obj.AMask:  rIIIEncoding,
1609  	AAMOMAXUW & obj.AMask: rIIIEncoding,
1610  	AAMOMAXUD & obj.AMask: rIIIEncoding,
1611  	AAMOMINW & obj.AMask:  rIIIEncoding,
1612  	AAMOMIND & obj.AMask:  rIIIEncoding,
1613  	AAMOMINUW & obj.AMask: rIIIEncoding,
1614  	AAMOMINUD & obj.AMask: rIIIEncoding,
1615  
1616  	// 10.1: Base Counters and Timers
1617  	ARDCYCLE & obj.AMask:   iIEncoding,
1618  	ARDTIME & obj.AMask:    iIEncoding,
1619  	ARDINSTRET & obj.AMask: iIEncoding,
1620  
1621  	// 11.5: Single-Precision Load and Store Instructions
1622  	AFLW & obj.AMask: iFEncoding,
1623  	AFSW & obj.AMask: sFEncoding,
1624  
1625  	// 11.6: Single-Precision Floating-Point Computational Instructions
1626  	AFADDS & obj.AMask:  rFFFEncoding,
1627  	AFSUBS & obj.AMask:  rFFFEncoding,
1628  	AFMULS & obj.AMask:  rFFFEncoding,
1629  	AFDIVS & obj.AMask:  rFFFEncoding,
1630  	AFMINS & obj.AMask:  rFFFEncoding,
1631  	AFMAXS & obj.AMask:  rFFFEncoding,
1632  	AFSQRTS & obj.AMask: rFFFEncoding,
1633  
1634  	// 11.7: Single-Precision Floating-Point Conversion and Move Instructions
1635  	AFCVTWS & obj.AMask:  rFIEncoding,
1636  	AFCVTLS & obj.AMask:  rFIEncoding,
1637  	AFCVTSW & obj.AMask:  rIFEncoding,
1638  	AFCVTSL & obj.AMask:  rIFEncoding,
1639  	AFCVTWUS & obj.AMask: rFIEncoding,
1640  	AFCVTLUS & obj.AMask: rFIEncoding,
1641  	AFCVTSWU & obj.AMask: rIFEncoding,
1642  	AFCVTSLU & obj.AMask: rIFEncoding,
1643  	AFSGNJS & obj.AMask:  rFFFEncoding,
1644  	AFSGNJNS & obj.AMask: rFFFEncoding,
1645  	AFSGNJXS & obj.AMask: rFFFEncoding,
1646  	AFMVXS & obj.AMask:   rFIEncoding,
1647  	AFMVSX & obj.AMask:   rIFEncoding,
1648  	AFMVXW & obj.AMask:   rFIEncoding,
1649  	AFMVWX & obj.AMask:   rIFEncoding,
1650  
1651  	// 11.8: Single-Precision Floating-Point Compare Instructions
1652  	AFEQS & obj.AMask: rFFIEncoding,
1653  	AFLTS & obj.AMask: rFFIEncoding,
1654  	AFLES & obj.AMask: rFFIEncoding,
1655  
1656  	// 11.9: Single-Precision Floating-Point Classify Instruction
1657  	AFCLASSS & obj.AMask: rFIEncoding,
1658  
1659  	// 12.3: Double-Precision Load and Store Instructions
1660  	AFLD & obj.AMask: iFEncoding,
1661  	AFSD & obj.AMask: sFEncoding,
1662  
1663  	// 12.4: Double-Precision Floating-Point Computational Instructions
1664  	AFADDD & obj.AMask:  rFFFEncoding,
1665  	AFSUBD & obj.AMask:  rFFFEncoding,
1666  	AFMULD & obj.AMask:  rFFFEncoding,
1667  	AFDIVD & obj.AMask:  rFFFEncoding,
1668  	AFMIND & obj.AMask:  rFFFEncoding,
1669  	AFMAXD & obj.AMask:  rFFFEncoding,
1670  	AFSQRTD & obj.AMask: rFFFEncoding,
1671  
1672  	// 12.5: Double-Precision Floating-Point Conversion and Move Instructions
1673  	AFCVTWD & obj.AMask:  rFIEncoding,
1674  	AFCVTLD & obj.AMask:  rFIEncoding,
1675  	AFCVTDW & obj.AMask:  rIFEncoding,
1676  	AFCVTDL & obj.AMask:  rIFEncoding,
1677  	AFCVTWUD & obj.AMask: rFIEncoding,
1678  	AFCVTLUD & obj.AMask: rFIEncoding,
1679  	AFCVTDWU & obj.AMask: rIFEncoding,
1680  	AFCVTDLU & obj.AMask: rIFEncoding,
1681  	AFCVTSD & obj.AMask:  rFFEncoding,
1682  	AFCVTDS & obj.AMask:  rFFEncoding,
1683  	AFSGNJD & obj.AMask:  rFFFEncoding,
1684  	AFSGNJND & obj.AMask: rFFFEncoding,
1685  	AFSGNJXD & obj.AMask: rFFFEncoding,
1686  	AFMVXD & obj.AMask:   rFIEncoding,
1687  	AFMVDX & obj.AMask:   rIFEncoding,
1688  
1689  	// 12.6: Double-Precision Floating-Point Compare Instructions
1690  	AFEQD & obj.AMask: rFFIEncoding,
1691  	AFLTD & obj.AMask: rFFIEncoding,
1692  	AFLED & obj.AMask: rFFIEncoding,
1693  
1694  	// 12.7: Double-Precision Floating-Point Classify Instruction
1695  	AFCLASSD & obj.AMask: rFIEncoding,
1696  
1697  	// Privileged ISA
1698  
1699  	// 3.2.1: Environment Call and Breakpoint
1700  	AECALL & obj.AMask:  iIEncoding,
1701  	AEBREAK & obj.AMask: iIEncoding,
1702  
1703  	// Escape hatch
1704  	AWORD & obj.AMask: rawEncoding,
1705  
1706  	// Pseudo-operations
1707  	obj.AFUNCDATA: pseudoOpEncoding,
1708  	obj.APCDATA:   pseudoOpEncoding,
1709  	obj.ATEXT:     pseudoOpEncoding,
1710  	obj.ANOP:      pseudoOpEncoding,
1711  }
1712  
1713  // encodingForAs returns the encoding for an obj.As.
1714  func encodingForAs(as obj.As) (encoding, error) {
1715  	if base := as &^ obj.AMask; base != obj.ABaseRISCV && base != 0 {
1716  		return badEncoding, fmt.Errorf("encodingForAs: not a RISC-V instruction %s", as)
1717  	}
1718  	asi := as & obj.AMask
1719  	if int(asi) >= len(encodings) {
1720  		return badEncoding, fmt.Errorf("encodingForAs: bad RISC-V instruction %s", as)
1721  	}
1722  	enc := encodings[asi]
1723  	if enc.validate == nil {
1724  		return badEncoding, fmt.Errorf("encodingForAs: no encoding for instruction %s", as)
1725  	}
1726  	return enc, nil
1727  }
1728  
1729  type instruction struct {
1730  	as     obj.As // Assembler opcode
1731  	rd     uint32 // Destination register
1732  	rs1    uint32 // Source register 1
1733  	rs2    uint32 // Source register 2
1734  	imm    int64  // Immediate
1735  	funct3 uint32 // Function 3
1736  	funct7 uint32 // Function 7
1737  }
1738  
1739  func (ins *instruction) encode() (uint32, error) {
1740  	enc, err := encodingForAs(ins.as)
1741  	if err != nil {
1742  		return 0, err
1743  	}
1744  	if enc.length > 0 {
1745  		return enc.encode(ins), nil
1746  	}
1747  	return 0, fmt.Errorf("fixme")
1748  }
1749  
1750  func (ins *instruction) length() int {
1751  	enc, err := encodingForAs(ins.as)
1752  	if err != nil {
1753  		return 0
1754  	}
1755  	return enc.length
1756  }
1757  
1758  func (ins *instruction) validate(ctxt *obj.Link) {
1759  	enc, err := encodingForAs(ins.as)
1760  	if err != nil {
1761  		ctxt.Diag(err.Error())
1762  		return
1763  	}
1764  	enc.validate(ctxt, ins)
1765  }
1766  
1767  // instructionsForProg returns the machine instructions for an *obj.Prog.
1768  func instructionsForProg(p *obj.Prog) []*instruction {
1769  	ins := &instruction{
1770  		as:  p.As,
1771  		rd:  uint32(p.To.Reg),
1772  		rs1: uint32(p.Reg),
1773  		rs2: uint32(p.From.Reg),
1774  		imm: p.From.Offset,
1775  	}
1776  
1777  	inss := []*instruction{ins}
1778  	switch ins.as {
1779  	case AJAL, AJALR:
1780  		ins.rd, ins.rs1, ins.rs2 = uint32(p.From.Reg), uint32(p.To.Reg), obj.REG_NONE
1781  		ins.imm = p.To.Offset
1782  
1783  	case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
1784  		switch ins.as {
1785  		case ABEQZ:
1786  			ins.as, ins.rs1, ins.rs2 = ABEQ, REG_ZERO, uint32(p.From.Reg)
1787  		case ABGEZ:
1788  			ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg)
1789  		case ABGT:
1790  			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.Reg), uint32(p.From.Reg)
1791  		case ABGTU:
1792  			ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.Reg), uint32(p.From.Reg)
1793  		case ABGTZ:
1794  			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO
1795  		case ABLE:
1796  			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.Reg), uint32(p.From.Reg)
1797  		case ABLEU:
1798  			ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.Reg), uint32(p.From.Reg)
1799  		case ABLEZ:
1800  			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO
1801  		case ABLTZ:
1802  			ins.as, ins.rs1, ins.rs2 = ABLT, REG_ZERO, uint32(p.From.Reg)
1803  		case ABNEZ:
1804  			ins.as, ins.rs1, ins.rs2 = ABNE, REG_ZERO, uint32(p.From.Reg)
1805  		}
1806  		ins.imm = p.To.Offset
1807  
1808  	case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD:
1809  		if p.From.Type != obj.TYPE_MEM {
1810  			p.Ctxt.Diag("%v requires memory for source", p)
1811  			return nil
1812  		}
1813  		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
1814  		ins.imm = p.From.Offset
1815  
1816  	case ASW, ASH, ASB, ASD, AFSW, AFSD:
1817  		if p.To.Type != obj.TYPE_MEM {
1818  			p.Ctxt.Diag("%v requires memory for destination", p)
1819  			return nil
1820  		}
1821  		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
1822  		ins.imm = p.To.Offset
1823  
1824  	case ALRW, ALRD:
1825  		// Set aq to use acquire access ordering, which matches Go's memory requirements.
1826  		ins.funct7 = 2
1827  		ins.rs1, ins.rs2 = uint32(p.From.Reg), REG_ZERO
1828  
1829  	case ASCW, ASCD, AAMOSWAPW, AAMOSWAPD, AAMOADDW, AAMOADDD, AAMOANDW, AAMOANDD, AAMOORW, AAMOORD,
1830  		AAMOXORW, AAMOXORD, AAMOMINW, AAMOMIND, AAMOMINUW, AAMOMINUD, AAMOMAXW, AAMOMAXD, AAMOMAXUW, AAMOMAXUD:
1831  		// Set aq to use acquire access ordering, which matches Go's memory requirements.
1832  		ins.funct7 = 2
1833  		ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
1834  
1835  	case AECALL, AEBREAK, ARDCYCLE, ARDTIME, ARDINSTRET:
1836  		insEnc := encode(p.As)
1837  		if p.To.Type == obj.TYPE_NONE {
1838  			ins.rd = REG_ZERO
1839  		}
1840  		ins.rs1 = REG_ZERO
1841  		ins.imm = insEnc.csr
1842  
1843  	case AFENCE:
1844  		ins.rd, ins.rs1, ins.rs2 = REG_ZERO, REG_ZERO, obj.REG_NONE
1845  		ins.imm = 0x0ff
1846  
1847  	case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD:
1848  		// Set the rounding mode in funct3 to round to zero.
1849  		ins.funct3 = 1
1850  
1851  	case AFNES, AFNED:
1852  		// Replace FNE[SD] with FEQ[SD] and NOT.
1853  		if p.To.Type != obj.TYPE_REG {
1854  			p.Ctxt.Diag("%v needs an integer register output", ins.as)
1855  			return nil
1856  		}
1857  		if ins.as == AFNES {
1858  			ins.as = AFEQS
1859  		} else {
1860  			ins.as = AFEQD
1861  		}
1862  		ins = &instruction{
1863  			as:  AXORI, // [bit] xor 1 = not [bit]
1864  			rd:  ins.rd,
1865  			rs1: ins.rd,
1866  			imm: 1,
1867  		}
1868  		inss = append(inss, ins)
1869  
1870  	case AFSQRTS, AFSQRTD:
1871  		// These instructions expect a zero (i.e. float register 0)
1872  		// to be the second input operand.
1873  		ins.rs1 = uint32(p.From.Reg)
1874  		ins.rs2 = REG_F0
1875  
1876  	case ANEG, ANEGW:
1877  		// NEG rs, rd -> SUB rs, X0, rd
1878  		ins.as = ASUB
1879  		if p.As == ANEGW {
1880  			ins.as = ASUBW
1881  		}
1882  		ins.rs1 = REG_ZERO
1883  		if ins.rd == obj.REG_NONE {
1884  			ins.rd = ins.rs2
1885  		}
1886  
1887  	case ANOT:
1888  		// NOT rs, rd -> XORI $-1, rs, rd
1889  		ins.as = AXORI
1890  		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
1891  		if ins.rd == obj.REG_NONE {
1892  			ins.rd = ins.rs1
1893  		}
1894  		ins.imm = -1
1895  
1896  	case ASEQZ:
1897  		// SEQZ rs, rd -> SLTIU $1, rs, rd
1898  		ins.as = ASLTIU
1899  		ins.rs1 = uint32(p.From.Reg)
1900  		ins.imm = 1
1901  
1902  	case ASNEZ:
1903  		// SNEZ rs, rd -> SLTU rs, x0, rd
1904  		ins.as = ASLTU
1905  		ins.rs1 = REG_ZERO
1906  
1907  	case AFNEGS:
1908  		// FNEGS rs, rd -> FSGNJNS rs, rs, rd
1909  		ins.as = AFSGNJNS
1910  		ins.rs1 = uint32(p.From.Reg)
1911  
1912  	case AFNEGD:
1913  		// FNEGD rs, rd -> FSGNJND rs, rs, rd
1914  		ins.as = AFSGNJND
1915  		ins.rs1 = uint32(p.From.Reg)
1916  	}
1917  	return inss
1918  }
1919  
1920  // assemble emits machine code.
1921  // It is called at the very end of the assembly process.
1922  func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
1923  	if ctxt.Retpoline {
1924  		ctxt.Diag("-spectre=ret not supported on riscv")
1925  		ctxt.Retpoline = false // don't keep printing
1926  	}
1927  
1928  	var symcode []uint32
1929  	for p := cursym.Func.Text; p != nil; p = p.Link {
1930  		switch p.As {
1931  		case AJALR:
1932  			if p.To.Sym != nil {
1933  				// This is a CALL/JMP. We add a relocation only
1934  				// for linker stack checking. No actual
1935  				// relocation is needed.
1936  				rel := obj.Addrel(cursym)
1937  				rel.Off = int32(p.Pc)
1938  				rel.Siz = 4
1939  				rel.Sym = p.To.Sym
1940  				rel.Add = p.To.Offset
1941  				rel.Type = objabi.R_CALLRISCV
1942  			}
1943  		case AAUIPC:
1944  			var rt objabi.RelocType
1945  			if p.Mark&NEED_PCREL_ITYPE_RELOC == NEED_PCREL_ITYPE_RELOC {
1946  				rt = objabi.R_RISCV_PCREL_ITYPE
1947  			} else if p.Mark&NEED_PCREL_STYPE_RELOC == NEED_PCREL_STYPE_RELOC {
1948  				rt = objabi.R_RISCV_PCREL_STYPE
1949  			} else {
1950  				break
1951  			}
1952  			if p.Link == nil {
1953  				ctxt.Diag("AUIPC needing PC-relative reloc missing following instruction")
1954  				break
1955  			}
1956  			addr := p.RestArgs[0]
1957  			if addr.Sym == nil {
1958  				ctxt.Diag("AUIPC needing PC-relative reloc missing symbol")
1959  				break
1960  			}
1961  
1962  			rel := obj.Addrel(cursym)
1963  			rel.Off = int32(p.Pc)
1964  			rel.Siz = 8
1965  			rel.Sym = addr.Sym
1966  			rel.Add = addr.Offset
1967  			rel.Type = rt
1968  		}
1969  
1970  		for _, ins := range instructionsForProg(p) {
1971  			ic, err := ins.encode()
1972  			if err == nil {
1973  				symcode = append(symcode, ic)
1974  			}
1975  		}
1976  	}
1977  	cursym.Size = int64(4 * len(symcode))
1978  
1979  	cursym.Grow(cursym.Size)
1980  	for p, i := cursym.P, 0; i < len(symcode); p, i = p[4:], i+1 {
1981  		ctxt.Arch.ByteOrder.PutUint32(p, symcode[i])
1982  	}
1983  
1984  	obj.MarkUnsafePoints(ctxt, cursym.Func.Text, newprog, isUnsafePoint, nil)
1985  }
1986  
1987  func isUnsafePoint(p *obj.Prog) bool {
1988  	return p.From.Reg == REG_TMP || p.To.Reg == REG_TMP || p.Reg == REG_TMP
1989  }
1990  
1991  var LinkRISCV64 = obj.LinkArch{
1992  	Arch:           sys.ArchRISCV64,
1993  	Init:           buildop,
1994  	Preprocess:     preprocess,
1995  	Assemble:       assemble,
1996  	Progedit:       progedit,
1997  	UnaryDst:       unaryDst,
1998  	DWARFRegisters: RISCV64DWARFRegisters,
1999  }
2000