wasmobj.go raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package wasm
   6  
   7  import (
   8  	"bytes"
   9  	"github.com/twitchyliquid64/golang-asm/obj"
  10  	"github.com/twitchyliquid64/golang-asm/objabi"
  11  	"github.com/twitchyliquid64/golang-asm/sys"
  12  	"encoding/binary"
  13  	"fmt"
  14  	"io"
  15  	"math"
  16  )
  17  
  18  var Register = map[string]int16{
  19  	"SP":    REG_SP,
  20  	"CTXT":  REG_CTXT,
  21  	"g":     REG_g,
  22  	"RET0":  REG_RET0,
  23  	"RET1":  REG_RET1,
  24  	"RET2":  REG_RET2,
  25  	"RET3":  REG_RET3,
  26  	"PAUSE": REG_PAUSE,
  27  
  28  	"R0":  REG_R0,
  29  	"R1":  REG_R1,
  30  	"R2":  REG_R2,
  31  	"R3":  REG_R3,
  32  	"R4":  REG_R4,
  33  	"R5":  REG_R5,
  34  	"R6":  REG_R6,
  35  	"R7":  REG_R7,
  36  	"R8":  REG_R8,
  37  	"R9":  REG_R9,
  38  	"R10": REG_R10,
  39  	"R11": REG_R11,
  40  	"R12": REG_R12,
  41  	"R13": REG_R13,
  42  	"R14": REG_R14,
  43  	"R15": REG_R15,
  44  
  45  	"F0":  REG_F0,
  46  	"F1":  REG_F1,
  47  	"F2":  REG_F2,
  48  	"F3":  REG_F3,
  49  	"F4":  REG_F4,
  50  	"F5":  REG_F5,
  51  	"F6":  REG_F6,
  52  	"F7":  REG_F7,
  53  	"F8":  REG_F8,
  54  	"F9":  REG_F9,
  55  	"F10": REG_F10,
  56  	"F11": REG_F11,
  57  	"F12": REG_F12,
  58  	"F13": REG_F13,
  59  	"F14": REG_F14,
  60  	"F15": REG_F15,
  61  
  62  	"F16": REG_F16,
  63  	"F17": REG_F17,
  64  	"F18": REG_F18,
  65  	"F19": REG_F19,
  66  	"F20": REG_F20,
  67  	"F21": REG_F21,
  68  	"F22": REG_F22,
  69  	"F23": REG_F23,
  70  	"F24": REG_F24,
  71  	"F25": REG_F25,
  72  	"F26": REG_F26,
  73  	"F27": REG_F27,
  74  	"F28": REG_F28,
  75  	"F29": REG_F29,
  76  	"F30": REG_F30,
  77  	"F31": REG_F31,
  78  
  79  	"PC_B": REG_PC_B,
  80  }
  81  
  82  var registerNames []string
  83  
  84  func init() {
  85  	obj.RegisterRegister(MINREG, MAXREG, rconv)
  86  	obj.RegisterOpcode(obj.ABaseWasm, Anames)
  87  
  88  	registerNames = make([]string, MAXREG-MINREG)
  89  	for name, reg := range Register {
  90  		registerNames[reg-MINREG] = name
  91  	}
  92  }
  93  
  94  func rconv(r int) string {
  95  	return registerNames[r-MINREG]
  96  }
  97  
  98  var unaryDst = map[obj.As]bool{
  99  	ASet:          true,
 100  	ATee:          true,
 101  	ACall:         true,
 102  	ACallIndirect: true,
 103  	ACallImport:   true,
 104  	ABr:           true,
 105  	ABrIf:         true,
 106  	ABrTable:      true,
 107  	AI32Store:     true,
 108  	AI64Store:     true,
 109  	AF32Store:     true,
 110  	AF64Store:     true,
 111  	AI32Store8:    true,
 112  	AI32Store16:   true,
 113  	AI64Store8:    true,
 114  	AI64Store16:   true,
 115  	AI64Store32:   true,
 116  	ACALLNORESUME: true,
 117  }
 118  
 119  var Linkwasm = obj.LinkArch{
 120  	Arch:       sys.ArchWasm,
 121  	Init:       instinit,
 122  	Preprocess: preprocess,
 123  	Assemble:   assemble,
 124  	UnaryDst:   unaryDst,
 125  }
 126  
 127  var (
 128  	morestack       *obj.LSym
 129  	morestackNoCtxt *obj.LSym
 130  	gcWriteBarrier  *obj.LSym
 131  	sigpanic        *obj.LSym
 132  	sigpanic0       *obj.LSym
 133  	deferreturn     *obj.LSym
 134  	jmpdefer        *obj.LSym
 135  )
 136  
 137  const (
 138  	/* mark flags */
 139  	WasmImport = 1 << 0
 140  )
 141  
 142  func instinit(ctxt *obj.Link) {
 143  	morestack = ctxt.Lookup("runtime.morestack")
 144  	morestackNoCtxt = ctxt.Lookup("runtime.morestack_noctxt")
 145  	gcWriteBarrier = ctxt.Lookup("runtime.gcWriteBarrier")
 146  	sigpanic = ctxt.LookupABI("runtime.sigpanic", obj.ABIInternal)
 147  	sigpanic0 = ctxt.LookupABI("runtime.sigpanic", 0) // sigpanic called from assembly, which has ABI0
 148  	deferreturn = ctxt.LookupABI("runtime.deferreturn", obj.ABIInternal)
 149  	// jmpdefer is defined in assembly as ABI0, but what we're
 150  	// looking for is the *call* to jmpdefer from the Go function
 151  	// deferreturn, so we're looking for the ABIInternal version
 152  	// of jmpdefer that's called by Go.
 153  	jmpdefer = ctxt.LookupABI(`"".jmpdefer`, obj.ABIInternal)
 154  }
 155  
 156  func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
 157  	appendp := func(p *obj.Prog, as obj.As, args ...obj.Addr) *obj.Prog {
 158  		if p.As != obj.ANOP {
 159  			p2 := obj.Appendp(p, newprog)
 160  			p2.Pc = p.Pc
 161  			p = p2
 162  		}
 163  		p.As = as
 164  		switch len(args) {
 165  		case 0:
 166  			p.From = obj.Addr{}
 167  			p.To = obj.Addr{}
 168  		case 1:
 169  			if unaryDst[as] {
 170  				p.From = obj.Addr{}
 171  				p.To = args[0]
 172  			} else {
 173  				p.From = args[0]
 174  				p.To = obj.Addr{}
 175  			}
 176  		case 2:
 177  			p.From = args[0]
 178  			p.To = args[1]
 179  		default:
 180  			panic("bad args")
 181  		}
 182  		return p
 183  	}
 184  
 185  	framesize := s.Func.Text.To.Offset
 186  	if framesize < 0 {
 187  		panic("bad framesize")
 188  	}
 189  	s.Func.Args = s.Func.Text.To.Val.(int32)
 190  	s.Func.Locals = int32(framesize)
 191  
 192  	if s.Func.Text.From.Sym.Wrapper() {
 193  		// if g._panic != nil && g._panic.argp == FP {
 194  		//   g._panic.argp = bottom-of-frame
 195  		// }
 196  		//
 197  		// MOVD g_panic(g), R0
 198  		// Get R0
 199  		// I64Eqz
 200  		// Not
 201  		// If
 202  		//   Get SP
 203  		//   I64ExtendI32U
 204  		//   I64Const $framesize+8
 205  		//   I64Add
 206  		//   I64Load panic_argp(R0)
 207  		//   I64Eq
 208  		//   If
 209  		//     MOVD SP, panic_argp(R0)
 210  		//   End
 211  		// End
 212  
 213  		gpanic := obj.Addr{
 214  			Type:   obj.TYPE_MEM,
 215  			Reg:    REGG,
 216  			Offset: 4 * 8, // g_panic
 217  		}
 218  
 219  		panicargp := obj.Addr{
 220  			Type:   obj.TYPE_MEM,
 221  			Reg:    REG_R0,
 222  			Offset: 0, // panic.argp
 223  		}
 224  
 225  		p := s.Func.Text
 226  		p = appendp(p, AMOVD, gpanic, regAddr(REG_R0))
 227  
 228  		p = appendp(p, AGet, regAddr(REG_R0))
 229  		p = appendp(p, AI64Eqz)
 230  		p = appendp(p, ANot)
 231  		p = appendp(p, AIf)
 232  
 233  		p = appendp(p, AGet, regAddr(REG_SP))
 234  		p = appendp(p, AI64ExtendI32U)
 235  		p = appendp(p, AI64Const, constAddr(framesize+8))
 236  		p = appendp(p, AI64Add)
 237  		p = appendp(p, AI64Load, panicargp)
 238  
 239  		p = appendp(p, AI64Eq)
 240  		p = appendp(p, AIf)
 241  		p = appendp(p, AMOVD, regAddr(REG_SP), panicargp)
 242  		p = appendp(p, AEnd)
 243  
 244  		p = appendp(p, AEnd)
 245  	}
 246  
 247  	if framesize > 0 {
 248  		p := s.Func.Text
 249  		p = appendp(p, AGet, regAddr(REG_SP))
 250  		p = appendp(p, AI32Const, constAddr(framesize))
 251  		p = appendp(p, AI32Sub)
 252  		p = appendp(p, ASet, regAddr(REG_SP))
 253  		p.Spadj = int32(framesize)
 254  	}
 255  
 256  	// Introduce resume points for CALL instructions
 257  	// and collect other explicit resume points.
 258  	numResumePoints := 0
 259  	explicitBlockDepth := 0
 260  	pc := int64(0) // pc is only incremented when necessary, this avoids bloat of the BrTable instruction
 261  	var tableIdxs []uint64
 262  	tablePC := int64(0)
 263  	base := ctxt.PosTable.Pos(s.Func.Text.Pos).Base()
 264  	for p := s.Func.Text; p != nil; p = p.Link {
 265  		prevBase := base
 266  		base = ctxt.PosTable.Pos(p.Pos).Base()
 267  		switch p.As {
 268  		case ABlock, ALoop, AIf:
 269  			explicitBlockDepth++
 270  
 271  		case AEnd:
 272  			if explicitBlockDepth == 0 {
 273  				panic("End without block")
 274  			}
 275  			explicitBlockDepth--
 276  
 277  		case ARESUMEPOINT:
 278  			if explicitBlockDepth != 0 {
 279  				panic("RESUME can only be used on toplevel")
 280  			}
 281  			p.As = AEnd
 282  			for tablePC <= pc {
 283  				tableIdxs = append(tableIdxs, uint64(numResumePoints))
 284  				tablePC++
 285  			}
 286  			numResumePoints++
 287  			pc++
 288  
 289  		case obj.ACALL:
 290  			if explicitBlockDepth != 0 {
 291  				panic("CALL can only be used on toplevel, try CALLNORESUME instead")
 292  			}
 293  			appendp(p, ARESUMEPOINT)
 294  		}
 295  
 296  		p.Pc = pc
 297  
 298  		// Increase pc whenever some pc-value table needs a new entry. Don't increase it
 299  		// more often to avoid bloat of the BrTable instruction.
 300  		// The "base != prevBase" condition detects inlined instructions. They are an
 301  		// implicit call, so entering and leaving this section affects the stack trace.
 302  		if p.As == ACALLNORESUME || p.As == obj.ANOP || p.As == ANop || p.Spadj != 0 || base != prevBase {
 303  			pc++
 304  			if p.To.Sym == sigpanic {
 305  				// The panic stack trace expects the PC at the call of sigpanic,
 306  				// not the next one. However, runtime.Caller subtracts 1 from the
 307  				// PC. To make both PC and PC-1 work (have the same line number),
 308  				// we advance the PC by 2 at sigpanic.
 309  				pc++
 310  			}
 311  		}
 312  	}
 313  	tableIdxs = append(tableIdxs, uint64(numResumePoints))
 314  	s.Size = pc + 1
 315  
 316  	if !s.Func.Text.From.Sym.NoSplit() {
 317  		p := s.Func.Text
 318  
 319  		if framesize <= objabi.StackSmall {
 320  			// small stack: SP <= stackguard
 321  			// Get SP
 322  			// Get g
 323  			// I32WrapI64
 324  			// I32Load $stackguard0
 325  			// I32GtU
 326  
 327  			p = appendp(p, AGet, regAddr(REG_SP))
 328  			p = appendp(p, AGet, regAddr(REGG))
 329  			p = appendp(p, AI32WrapI64)
 330  			p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0
 331  			p = appendp(p, AI32LeU)
 332  		} else {
 333  			// large stack: SP-framesize <= stackguard-StackSmall
 334  			//              SP <= stackguard+(framesize-StackSmall)
 335  			// Get SP
 336  			// Get g
 337  			// I32WrapI64
 338  			// I32Load $stackguard0
 339  			// I32Const $(framesize-StackSmall)
 340  			// I32Add
 341  			// I32GtU
 342  
 343  			p = appendp(p, AGet, regAddr(REG_SP))
 344  			p = appendp(p, AGet, regAddr(REGG))
 345  			p = appendp(p, AI32WrapI64)
 346  			p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0
 347  			p = appendp(p, AI32Const, constAddr(int64(framesize)-objabi.StackSmall))
 348  			p = appendp(p, AI32Add)
 349  			p = appendp(p, AI32LeU)
 350  		}
 351  		// TODO(neelance): handle wraparound case
 352  
 353  		p = appendp(p, AIf)
 354  		p = appendp(p, obj.ACALL, constAddr(0))
 355  		if s.Func.Text.From.Sym.NeedCtxt() {
 356  			p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestack}
 357  		} else {
 358  			p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestackNoCtxt}
 359  		}
 360  		p = appendp(p, AEnd)
 361  	}
 362  
 363  	// record the branches targeting the entry loop and the unwind exit,
 364  	// their targets with be filled in later
 365  	var entryPointLoopBranches []*obj.Prog
 366  	var unwindExitBranches []*obj.Prog
 367  	currentDepth := 0
 368  	for p := s.Func.Text; p != nil; p = p.Link {
 369  		switch p.As {
 370  		case ABlock, ALoop, AIf:
 371  			currentDepth++
 372  		case AEnd:
 373  			currentDepth--
 374  		}
 375  
 376  		switch p.As {
 377  		case obj.AJMP:
 378  			jmp := *p
 379  			p.As = obj.ANOP
 380  
 381  			if jmp.To.Type == obj.TYPE_BRANCH {
 382  				// jump to basic block
 383  				p = appendp(p, AI32Const, constAddr(jmp.To.Val.(*obj.Prog).Pc))
 384  				p = appendp(p, ASet, regAddr(REG_PC_B)) // write next basic block to PC_B
 385  				p = appendp(p, ABr)                     // jump to beginning of entryPointLoop
 386  				entryPointLoopBranches = append(entryPointLoopBranches, p)
 387  				break
 388  			}
 389  
 390  			// low-level WebAssembly call to function
 391  			switch jmp.To.Type {
 392  			case obj.TYPE_MEM:
 393  				if !notUsePC_B[jmp.To.Sym.Name] {
 394  					// Set PC_B parameter to function entry.
 395  					p = appendp(p, AI32Const, constAddr(0))
 396  				}
 397  				p = appendp(p, ACall, jmp.To)
 398  
 399  			case obj.TYPE_NONE:
 400  				// (target PC is on stack)
 401  				p = appendp(p, AI32WrapI64)
 402  				p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
 403  				p = appendp(p, AI32ShrU)
 404  
 405  				// Set PC_B parameter to function entry.
 406  				// We need to push this before pushing the target PC_F,
 407  				// so temporarily pop PC_F, using our REG_PC_B as a
 408  				// scratch register, and push it back after pushing 0.
 409  				p = appendp(p, ASet, regAddr(REG_PC_B))
 410  				p = appendp(p, AI32Const, constAddr(0))
 411  				p = appendp(p, AGet, regAddr(REG_PC_B))
 412  
 413  				p = appendp(p, ACallIndirect)
 414  
 415  			default:
 416  				panic("bad target for JMP")
 417  			}
 418  
 419  			p = appendp(p, AReturn)
 420  
 421  		case obj.ACALL, ACALLNORESUME:
 422  			call := *p
 423  			p.As = obj.ANOP
 424  
 425  			pcAfterCall := call.Link.Pc
 426  			if call.To.Sym == sigpanic {
 427  				pcAfterCall-- // sigpanic expects to be called without advancing the pc
 428  			}
 429  
 430  			// jmpdefer manipulates the return address on the stack so deferreturn gets called repeatedly.
 431  			// Model this in WebAssembly with a loop.
 432  			if call.To.Sym == deferreturn {
 433  				p = appendp(p, ALoop)
 434  			}
 435  
 436  			// SP -= 8
 437  			p = appendp(p, AGet, regAddr(REG_SP))
 438  			p = appendp(p, AI32Const, constAddr(8))
 439  			p = appendp(p, AI32Sub)
 440  			p = appendp(p, ASet, regAddr(REG_SP))
 441  
 442  			// write return address to Go stack
 443  			p = appendp(p, AGet, regAddr(REG_SP))
 444  			p = appendp(p, AI64Const, obj.Addr{
 445  				Type:   obj.TYPE_ADDR,
 446  				Name:   obj.NAME_EXTERN,
 447  				Sym:    s,           // PC_F
 448  				Offset: pcAfterCall, // PC_B
 449  			})
 450  			p = appendp(p, AI64Store, constAddr(0))
 451  
 452  			// low-level WebAssembly call to function
 453  			switch call.To.Type {
 454  			case obj.TYPE_MEM:
 455  				if !notUsePC_B[call.To.Sym.Name] {
 456  					// Set PC_B parameter to function entry.
 457  					p = appendp(p, AI32Const, constAddr(0))
 458  				}
 459  				p = appendp(p, ACall, call.To)
 460  
 461  			case obj.TYPE_NONE:
 462  				// (target PC is on stack)
 463  				p = appendp(p, AI32WrapI64)
 464  				p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
 465  				p = appendp(p, AI32ShrU)
 466  
 467  				// Set PC_B parameter to function entry.
 468  				// We need to push this before pushing the target PC_F,
 469  				// so temporarily pop PC_F, using our PC_B as a
 470  				// scratch register, and push it back after pushing 0.
 471  				p = appendp(p, ASet, regAddr(REG_PC_B))
 472  				p = appendp(p, AI32Const, constAddr(0))
 473  				p = appendp(p, AGet, regAddr(REG_PC_B))
 474  
 475  				p = appendp(p, ACallIndirect)
 476  
 477  			default:
 478  				panic("bad target for CALL")
 479  			}
 480  
 481  			// gcWriteBarrier has no return value, it never unwinds the stack
 482  			if call.To.Sym == gcWriteBarrier {
 483  				break
 484  			}
 485  
 486  			// jmpdefer removes the frame of deferreturn from the Go stack.
 487  			// However, its WebAssembly function still returns normally,
 488  			// so we need to return from deferreturn without removing its
 489  			// stack frame (no RET), because the frame is already gone.
 490  			if call.To.Sym == jmpdefer {
 491  				p = appendp(p, AReturn)
 492  				break
 493  			}
 494  
 495  			// return value of call is on the top of the stack, indicating whether to unwind the WebAssembly stack
 496  			if call.As == ACALLNORESUME && call.To.Sym != sigpanic && call.To.Sym != sigpanic0 { // sigpanic unwinds the stack, but it never resumes
 497  				// trying to unwind WebAssembly stack but call has no resume point, terminate with error
 498  				p = appendp(p, AIf)
 499  				p = appendp(p, obj.AUNDEF)
 500  				p = appendp(p, AEnd)
 501  			} else {
 502  				// unwinding WebAssembly stack to switch goroutine, return 1
 503  				p = appendp(p, ABrIf)
 504  				unwindExitBranches = append(unwindExitBranches, p)
 505  			}
 506  
 507  			// jump to before the call if jmpdefer has reset the return address to the call's PC
 508  			if call.To.Sym == deferreturn {
 509  				// get PC_B from -8(SP)
 510  				p = appendp(p, AGet, regAddr(REG_SP))
 511  				p = appendp(p, AI32Const, constAddr(8))
 512  				p = appendp(p, AI32Sub)
 513  				p = appendp(p, AI32Load16U, constAddr(0))
 514  				p = appendp(p, ATee, regAddr(REG_PC_B))
 515  
 516  				p = appendp(p, AI32Const, constAddr(call.Pc))
 517  				p = appendp(p, AI32Eq)
 518  				p = appendp(p, ABrIf, constAddr(0))
 519  				p = appendp(p, AEnd) // end of Loop
 520  			}
 521  
 522  		case obj.ARET, ARETUNWIND:
 523  			ret := *p
 524  			p.As = obj.ANOP
 525  
 526  			if framesize > 0 {
 527  				// SP += framesize
 528  				p = appendp(p, AGet, regAddr(REG_SP))
 529  				p = appendp(p, AI32Const, constAddr(framesize))
 530  				p = appendp(p, AI32Add)
 531  				p = appendp(p, ASet, regAddr(REG_SP))
 532  				// TODO(neelance): This should theoretically set Spadj, but it only works without.
 533  				// p.Spadj = int32(-framesize)
 534  			}
 535  
 536  			if ret.To.Type == obj.TYPE_MEM {
 537  				// Set PC_B parameter to function entry.
 538  				p = appendp(p, AI32Const, constAddr(0))
 539  
 540  				// low-level WebAssembly call to function
 541  				p = appendp(p, ACall, ret.To)
 542  				p = appendp(p, AReturn)
 543  				break
 544  			}
 545  
 546  			// SP += 8
 547  			p = appendp(p, AGet, regAddr(REG_SP))
 548  			p = appendp(p, AI32Const, constAddr(8))
 549  			p = appendp(p, AI32Add)
 550  			p = appendp(p, ASet, regAddr(REG_SP))
 551  
 552  			if ret.As == ARETUNWIND {
 553  				// function needs to unwind the WebAssembly stack, return 1
 554  				p = appendp(p, AI32Const, constAddr(1))
 555  				p = appendp(p, AReturn)
 556  				break
 557  			}
 558  
 559  			// not unwinding the WebAssembly stack, return 0
 560  			p = appendp(p, AI32Const, constAddr(0))
 561  			p = appendp(p, AReturn)
 562  		}
 563  	}
 564  
 565  	for p := s.Func.Text; p != nil; p = p.Link {
 566  		switch p.From.Name {
 567  		case obj.NAME_AUTO:
 568  			p.From.Offset += int64(framesize)
 569  		case obj.NAME_PARAM:
 570  			p.From.Reg = REG_SP
 571  			p.From.Offset += int64(framesize) + 8 // parameters are after the frame and the 8-byte return address
 572  		}
 573  
 574  		switch p.To.Name {
 575  		case obj.NAME_AUTO:
 576  			p.To.Offset += int64(framesize)
 577  		case obj.NAME_PARAM:
 578  			p.To.Reg = REG_SP
 579  			p.To.Offset += int64(framesize) + 8 // parameters are after the frame and the 8-byte return address
 580  		}
 581  
 582  		switch p.As {
 583  		case AGet:
 584  			if p.From.Type == obj.TYPE_ADDR {
 585  				get := *p
 586  				p.As = obj.ANOP
 587  
 588  				switch get.From.Name {
 589  				case obj.NAME_EXTERN:
 590  					p = appendp(p, AI64Const, get.From)
 591  				case obj.NAME_AUTO, obj.NAME_PARAM:
 592  					p = appendp(p, AGet, regAddr(get.From.Reg))
 593  					if get.From.Reg == REG_SP {
 594  						p = appendp(p, AI64ExtendI32U)
 595  					}
 596  					if get.From.Offset != 0 {
 597  						p = appendp(p, AI64Const, constAddr(get.From.Offset))
 598  						p = appendp(p, AI64Add)
 599  					}
 600  				default:
 601  					panic("bad Get: invalid name")
 602  				}
 603  			}
 604  
 605  		case AI32Load, AI64Load, AF32Load, AF64Load, AI32Load8S, AI32Load8U, AI32Load16S, AI32Load16U, AI64Load8S, AI64Load8U, AI64Load16S, AI64Load16U, AI64Load32S, AI64Load32U:
 606  			if p.From.Type == obj.TYPE_MEM {
 607  				as := p.As
 608  				from := p.From
 609  
 610  				p.As = AGet
 611  				p.From = regAddr(from.Reg)
 612  
 613  				if from.Reg != REG_SP {
 614  					p = appendp(p, AI32WrapI64)
 615  				}
 616  
 617  				p = appendp(p, as, constAddr(from.Offset))
 618  			}
 619  
 620  		case AMOVB, AMOVH, AMOVW, AMOVD:
 621  			mov := *p
 622  			p.As = obj.ANOP
 623  
 624  			var loadAs obj.As
 625  			var storeAs obj.As
 626  			switch mov.As {
 627  			case AMOVB:
 628  				loadAs = AI64Load8U
 629  				storeAs = AI64Store8
 630  			case AMOVH:
 631  				loadAs = AI64Load16U
 632  				storeAs = AI64Store16
 633  			case AMOVW:
 634  				loadAs = AI64Load32U
 635  				storeAs = AI64Store32
 636  			case AMOVD:
 637  				loadAs = AI64Load
 638  				storeAs = AI64Store
 639  			}
 640  
 641  			appendValue := func() {
 642  				switch mov.From.Type {
 643  				case obj.TYPE_CONST:
 644  					p = appendp(p, AI64Const, constAddr(mov.From.Offset))
 645  
 646  				case obj.TYPE_ADDR:
 647  					switch mov.From.Name {
 648  					case obj.NAME_NONE, obj.NAME_PARAM, obj.NAME_AUTO:
 649  						p = appendp(p, AGet, regAddr(mov.From.Reg))
 650  						if mov.From.Reg == REG_SP {
 651  							p = appendp(p, AI64ExtendI32U)
 652  						}
 653  						p = appendp(p, AI64Const, constAddr(mov.From.Offset))
 654  						p = appendp(p, AI64Add)
 655  					case obj.NAME_EXTERN:
 656  						p = appendp(p, AI64Const, mov.From)
 657  					default:
 658  						panic("bad name for MOV")
 659  					}
 660  
 661  				case obj.TYPE_REG:
 662  					p = appendp(p, AGet, mov.From)
 663  					if mov.From.Reg == REG_SP {
 664  						p = appendp(p, AI64ExtendI32U)
 665  					}
 666  
 667  				case obj.TYPE_MEM:
 668  					p = appendp(p, AGet, regAddr(mov.From.Reg))
 669  					if mov.From.Reg != REG_SP {
 670  						p = appendp(p, AI32WrapI64)
 671  					}
 672  					p = appendp(p, loadAs, constAddr(mov.From.Offset))
 673  
 674  				default:
 675  					panic("bad MOV type")
 676  				}
 677  			}
 678  
 679  			switch mov.To.Type {
 680  			case obj.TYPE_REG:
 681  				appendValue()
 682  				if mov.To.Reg == REG_SP {
 683  					p = appendp(p, AI32WrapI64)
 684  				}
 685  				p = appendp(p, ASet, mov.To)
 686  
 687  			case obj.TYPE_MEM:
 688  				switch mov.To.Name {
 689  				case obj.NAME_NONE, obj.NAME_PARAM:
 690  					p = appendp(p, AGet, regAddr(mov.To.Reg))
 691  					if mov.To.Reg != REG_SP {
 692  						p = appendp(p, AI32WrapI64)
 693  					}
 694  				case obj.NAME_EXTERN:
 695  					p = appendp(p, AI32Const, obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN, Sym: mov.To.Sym})
 696  				default:
 697  					panic("bad MOV name")
 698  				}
 699  				appendValue()
 700  				p = appendp(p, storeAs, constAddr(mov.To.Offset))
 701  
 702  			default:
 703  				panic("bad MOV type")
 704  			}
 705  
 706  		case ACallImport:
 707  			p.As = obj.ANOP
 708  			p = appendp(p, AGet, regAddr(REG_SP))
 709  			p = appendp(p, ACall, obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: s})
 710  			p.Mark = WasmImport
 711  		}
 712  	}
 713  
 714  	{
 715  		p := s.Func.Text
 716  		if len(unwindExitBranches) > 0 {
 717  			p = appendp(p, ABlock) // unwindExit, used to return 1 when unwinding the stack
 718  			for _, b := range unwindExitBranches {
 719  				b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p}
 720  			}
 721  		}
 722  		if len(entryPointLoopBranches) > 0 {
 723  			p = appendp(p, ALoop) // entryPointLoop, used to jump between basic blocks
 724  			for _, b := range entryPointLoopBranches {
 725  				b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p}
 726  			}
 727  		}
 728  		if numResumePoints > 0 {
 729  			// Add Block instructions for resume points and BrTable to jump to selected resume point.
 730  			for i := 0; i < numResumePoints+1; i++ {
 731  				p = appendp(p, ABlock)
 732  			}
 733  			p = appendp(p, AGet, regAddr(REG_PC_B)) // read next basic block from PC_B
 734  			p = appendp(p, ABrTable, obj.Addr{Val: tableIdxs})
 735  			p = appendp(p, AEnd) // end of Block
 736  		}
 737  		for p.Link != nil {
 738  			p = p.Link // function instructions
 739  		}
 740  		if len(entryPointLoopBranches) > 0 {
 741  			p = appendp(p, AEnd) // end of entryPointLoop
 742  		}
 743  		p = appendp(p, obj.AUNDEF)
 744  		if len(unwindExitBranches) > 0 {
 745  			p = appendp(p, AEnd) // end of unwindExit
 746  			p = appendp(p, AI32Const, constAddr(1))
 747  		}
 748  	}
 749  
 750  	currentDepth = 0
 751  	blockDepths := make(map[*obj.Prog]int)
 752  	for p := s.Func.Text; p != nil; p = p.Link {
 753  		switch p.As {
 754  		case ABlock, ALoop, AIf:
 755  			currentDepth++
 756  			blockDepths[p] = currentDepth
 757  		case AEnd:
 758  			currentDepth--
 759  		}
 760  
 761  		switch p.As {
 762  		case ABr, ABrIf:
 763  			if p.To.Type == obj.TYPE_BRANCH {
 764  				blockDepth, ok := blockDepths[p.To.Val.(*obj.Prog)]
 765  				if !ok {
 766  					panic("label not at block")
 767  				}
 768  				p.To = constAddr(int64(currentDepth - blockDepth))
 769  			}
 770  		}
 771  	}
 772  }
 773  
 774  func constAddr(value int64) obj.Addr {
 775  	return obj.Addr{Type: obj.TYPE_CONST, Offset: value}
 776  }
 777  
 778  func regAddr(reg int16) obj.Addr {
 779  	return obj.Addr{Type: obj.TYPE_REG, Reg: reg}
 780  }
 781  
 782  // Most of the Go functions has a single parameter (PC_B) in
 783  // Wasm ABI. This is a list of exceptions.
 784  var notUsePC_B = map[string]bool{
 785  	"_rt0_wasm_js":           true,
 786  	"wasm_export_run":        true,
 787  	"wasm_export_resume":     true,
 788  	"wasm_export_getsp":      true,
 789  	"wasm_pc_f_loop":         true,
 790  	"runtime.wasmMove":       true,
 791  	"runtime.wasmZero":       true,
 792  	"runtime.wasmDiv":        true,
 793  	"runtime.wasmTruncS":     true,
 794  	"runtime.wasmTruncU":     true,
 795  	"runtime.gcWriteBarrier": true,
 796  	"cmpbody":                true,
 797  	"memeqbody":              true,
 798  	"memcmp":                 true,
 799  	"memchr":                 true,
 800  }
 801  
 802  func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
 803  	type regVar struct {
 804  		global bool
 805  		index  uint64
 806  	}
 807  
 808  	type varDecl struct {
 809  		count uint64
 810  		typ   valueType
 811  	}
 812  
 813  	hasLocalSP := false
 814  	regVars := [MAXREG - MINREG]*regVar{
 815  		REG_SP - MINREG:    {true, 0},
 816  		REG_CTXT - MINREG:  {true, 1},
 817  		REG_g - MINREG:     {true, 2},
 818  		REG_RET0 - MINREG:  {true, 3},
 819  		REG_RET1 - MINREG:  {true, 4},
 820  		REG_RET2 - MINREG:  {true, 5},
 821  		REG_RET3 - MINREG:  {true, 6},
 822  		REG_PAUSE - MINREG: {true, 7},
 823  	}
 824  	var varDecls []*varDecl
 825  	useAssemblyRegMap := func() {
 826  		for i := int16(0); i < 16; i++ {
 827  			regVars[REG_R0+i-MINREG] = &regVar{false, uint64(i)}
 828  		}
 829  	}
 830  
 831  	// Function starts with declaration of locals: numbers and types.
 832  	// Some functions use a special calling convention.
 833  	switch s.Name {
 834  	case "_rt0_wasm_js", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp", "wasm_pc_f_loop",
 835  		"runtime.wasmMove", "runtime.wasmZero", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
 836  		varDecls = []*varDecl{}
 837  		useAssemblyRegMap()
 838  	case "memchr", "memcmp":
 839  		varDecls = []*varDecl{{count: 2, typ: i32}}
 840  		useAssemblyRegMap()
 841  	case "cmpbody":
 842  		varDecls = []*varDecl{{count: 2, typ: i64}}
 843  		useAssemblyRegMap()
 844  	case "runtime.gcWriteBarrier":
 845  		varDecls = []*varDecl{{count: 4, typ: i64}}
 846  		useAssemblyRegMap()
 847  	default:
 848  		// Normal calling convention: PC_B as WebAssembly parameter. First local variable is local SP cache.
 849  		regVars[REG_PC_B-MINREG] = &regVar{false, 0}
 850  		hasLocalSP = true
 851  
 852  		var regUsed [MAXREG - MINREG]bool
 853  		for p := s.Func.Text; p != nil; p = p.Link {
 854  			if p.From.Reg != 0 {
 855  				regUsed[p.From.Reg-MINREG] = true
 856  			}
 857  			if p.To.Reg != 0 {
 858  				regUsed[p.To.Reg-MINREG] = true
 859  			}
 860  		}
 861  
 862  		regs := []int16{REG_SP}
 863  		for reg := int16(REG_R0); reg <= REG_F31; reg++ {
 864  			if regUsed[reg-MINREG] {
 865  				regs = append(regs, reg)
 866  			}
 867  		}
 868  
 869  		var lastDecl *varDecl
 870  		for i, reg := range regs {
 871  			t := regType(reg)
 872  			if lastDecl == nil || lastDecl.typ != t {
 873  				lastDecl = &varDecl{
 874  					count: 0,
 875  					typ:   t,
 876  				}
 877  				varDecls = append(varDecls, lastDecl)
 878  			}
 879  			lastDecl.count++
 880  			if reg != REG_SP {
 881  				regVars[reg-MINREG] = &regVar{false, 1 + uint64(i)}
 882  			}
 883  		}
 884  	}
 885  
 886  	w := new(bytes.Buffer)
 887  
 888  	writeUleb128(w, uint64(len(varDecls)))
 889  	for _, decl := range varDecls {
 890  		writeUleb128(w, decl.count)
 891  		w.WriteByte(byte(decl.typ))
 892  	}
 893  
 894  	if hasLocalSP {
 895  		// Copy SP from its global variable into a local variable. Accessing a local variable is more efficient.
 896  		updateLocalSP(w)
 897  	}
 898  
 899  	for p := s.Func.Text; p != nil; p = p.Link {
 900  		switch p.As {
 901  		case AGet:
 902  			if p.From.Type != obj.TYPE_REG {
 903  				panic("bad Get: argument is not a register")
 904  			}
 905  			reg := p.From.Reg
 906  			v := regVars[reg-MINREG]
 907  			if v == nil {
 908  				panic("bad Get: invalid register")
 909  			}
 910  			if reg == REG_SP && hasLocalSP {
 911  				writeOpcode(w, ALocalGet)
 912  				writeUleb128(w, 1) // local SP
 913  				continue
 914  			}
 915  			if v.global {
 916  				writeOpcode(w, AGlobalGet)
 917  			} else {
 918  				writeOpcode(w, ALocalGet)
 919  			}
 920  			writeUleb128(w, v.index)
 921  			continue
 922  
 923  		case ASet:
 924  			if p.To.Type != obj.TYPE_REG {
 925  				panic("bad Set: argument is not a register")
 926  			}
 927  			reg := p.To.Reg
 928  			v := regVars[reg-MINREG]
 929  			if v == nil {
 930  				panic("bad Set: invalid register")
 931  			}
 932  			if reg == REG_SP && hasLocalSP {
 933  				writeOpcode(w, ALocalTee)
 934  				writeUleb128(w, 1) // local SP
 935  			}
 936  			if v.global {
 937  				writeOpcode(w, AGlobalSet)
 938  			} else {
 939  				if p.Link.As == AGet && p.Link.From.Reg == reg {
 940  					writeOpcode(w, ALocalTee)
 941  					p = p.Link
 942  				} else {
 943  					writeOpcode(w, ALocalSet)
 944  				}
 945  			}
 946  			writeUleb128(w, v.index)
 947  			continue
 948  
 949  		case ATee:
 950  			if p.To.Type != obj.TYPE_REG {
 951  				panic("bad Tee: argument is not a register")
 952  			}
 953  			reg := p.To.Reg
 954  			v := regVars[reg-MINREG]
 955  			if v == nil {
 956  				panic("bad Tee: invalid register")
 957  			}
 958  			writeOpcode(w, ALocalTee)
 959  			writeUleb128(w, v.index)
 960  			continue
 961  
 962  		case ANot:
 963  			writeOpcode(w, AI32Eqz)
 964  			continue
 965  
 966  		case obj.AUNDEF:
 967  			writeOpcode(w, AUnreachable)
 968  			continue
 969  
 970  		case obj.ANOP, obj.ATEXT, obj.AFUNCDATA, obj.APCDATA:
 971  			// ignore
 972  			continue
 973  		}
 974  
 975  		writeOpcode(w, p.As)
 976  
 977  		switch p.As {
 978  		case ABlock, ALoop, AIf:
 979  			if p.From.Offset != 0 {
 980  				// block type, rarely used, e.g. for code compiled with emscripten
 981  				w.WriteByte(0x80 - byte(p.From.Offset))
 982  				continue
 983  			}
 984  			w.WriteByte(0x40)
 985  
 986  		case ABr, ABrIf:
 987  			if p.To.Type != obj.TYPE_CONST {
 988  				panic("bad Br/BrIf")
 989  			}
 990  			writeUleb128(w, uint64(p.To.Offset))
 991  
 992  		case ABrTable:
 993  			idxs := p.To.Val.([]uint64)
 994  			writeUleb128(w, uint64(len(idxs)-1))
 995  			for _, idx := range idxs {
 996  				writeUleb128(w, idx)
 997  			}
 998  
 999  		case ACall:
1000  			switch p.To.Type {
1001  			case obj.TYPE_CONST:
1002  				writeUleb128(w, uint64(p.To.Offset))
1003  
1004  			case obj.TYPE_MEM:
1005  				if p.To.Name != obj.NAME_EXTERN && p.To.Name != obj.NAME_STATIC {
1006  					fmt.Println(p.To)
1007  					panic("bad name for Call")
1008  				}
1009  				r := obj.Addrel(s)
1010  				r.Off = int32(w.Len())
1011  				r.Type = objabi.R_CALL
1012  				if p.Mark&WasmImport != 0 {
1013  					r.Type = objabi.R_WASMIMPORT
1014  				}
1015  				r.Sym = p.To.Sym
1016  				if hasLocalSP {
1017  					// The stack may have moved, which changes SP. Update the local SP variable.
1018  					updateLocalSP(w)
1019  				}
1020  
1021  			default:
1022  				panic("bad type for Call")
1023  			}
1024  
1025  		case ACallIndirect:
1026  			writeUleb128(w, uint64(p.To.Offset))
1027  			w.WriteByte(0x00) // reserved value
1028  			if hasLocalSP {
1029  				// The stack may have moved, which changes SP. Update the local SP variable.
1030  				updateLocalSP(w)
1031  			}
1032  
1033  		case AI32Const, AI64Const:
1034  			if p.From.Name == obj.NAME_EXTERN {
1035  				r := obj.Addrel(s)
1036  				r.Off = int32(w.Len())
1037  				r.Type = objabi.R_ADDR
1038  				r.Sym = p.From.Sym
1039  				r.Add = p.From.Offset
1040  				break
1041  			}
1042  			writeSleb128(w, p.From.Offset)
1043  
1044  		case AF32Const:
1045  			b := make([]byte, 4)
1046  			binary.LittleEndian.PutUint32(b, math.Float32bits(float32(p.From.Val.(float64))))
1047  			w.Write(b)
1048  
1049  		case AF64Const:
1050  			b := make([]byte, 8)
1051  			binary.LittleEndian.PutUint64(b, math.Float64bits(p.From.Val.(float64)))
1052  			w.Write(b)
1053  
1054  		case AI32Load, AI64Load, AF32Load, AF64Load, AI32Load8S, AI32Load8U, AI32Load16S, AI32Load16U, AI64Load8S, AI64Load8U, AI64Load16S, AI64Load16U, AI64Load32S, AI64Load32U:
1055  			if p.From.Offset < 0 {
1056  				panic("negative offset for *Load")
1057  			}
1058  			if p.From.Type != obj.TYPE_CONST {
1059  				panic("bad type for *Load")
1060  			}
1061  			if p.From.Offset > math.MaxUint32 {
1062  				ctxt.Diag("bad offset in %v", p)
1063  			}
1064  			writeUleb128(w, align(p.As))
1065  			writeUleb128(w, uint64(p.From.Offset))
1066  
1067  		case AI32Store, AI64Store, AF32Store, AF64Store, AI32Store8, AI32Store16, AI64Store8, AI64Store16, AI64Store32:
1068  			if p.To.Offset < 0 {
1069  				panic("negative offset")
1070  			}
1071  			if p.From.Offset > math.MaxUint32 {
1072  				ctxt.Diag("bad offset in %v", p)
1073  			}
1074  			writeUleb128(w, align(p.As))
1075  			writeUleb128(w, uint64(p.To.Offset))
1076  
1077  		case ACurrentMemory, AGrowMemory:
1078  			w.WriteByte(0x00)
1079  
1080  		}
1081  	}
1082  
1083  	w.WriteByte(0x0b) // end
1084  
1085  	s.P = w.Bytes()
1086  }
1087  
1088  func updateLocalSP(w *bytes.Buffer) {
1089  	writeOpcode(w, AGlobalGet)
1090  	writeUleb128(w, 0) // global SP
1091  	writeOpcode(w, ALocalSet)
1092  	writeUleb128(w, 1) // local SP
1093  }
1094  
1095  func writeOpcode(w *bytes.Buffer, as obj.As) {
1096  	switch {
1097  	case as < AUnreachable:
1098  		panic(fmt.Sprintf("unexpected assembler op: %s", as))
1099  	case as < AEnd:
1100  		w.WriteByte(byte(as - AUnreachable + 0x00))
1101  	case as < ADrop:
1102  		w.WriteByte(byte(as - AEnd + 0x0B))
1103  	case as < ALocalGet:
1104  		w.WriteByte(byte(as - ADrop + 0x1A))
1105  	case as < AI32Load:
1106  		w.WriteByte(byte(as - ALocalGet + 0x20))
1107  	case as < AI32TruncSatF32S:
1108  		w.WriteByte(byte(as - AI32Load + 0x28))
1109  	case as < ALast:
1110  		w.WriteByte(0xFC)
1111  		w.WriteByte(byte(as - AI32TruncSatF32S + 0x00))
1112  	default:
1113  		panic(fmt.Sprintf("unexpected assembler op: %s", as))
1114  	}
1115  }
1116  
1117  type valueType byte
1118  
1119  const (
1120  	i32 valueType = 0x7F
1121  	i64 valueType = 0x7E
1122  	f32 valueType = 0x7D
1123  	f64 valueType = 0x7C
1124  )
1125  
1126  func regType(reg int16) valueType {
1127  	switch {
1128  	case reg == REG_SP:
1129  		return i32
1130  	case reg >= REG_R0 && reg <= REG_R15:
1131  		return i64
1132  	case reg >= REG_F0 && reg <= REG_F15:
1133  		return f32
1134  	case reg >= REG_F16 && reg <= REG_F31:
1135  		return f64
1136  	default:
1137  		panic("invalid register")
1138  	}
1139  }
1140  
1141  func align(as obj.As) uint64 {
1142  	switch as {
1143  	case AI32Load8S, AI32Load8U, AI64Load8S, AI64Load8U, AI32Store8, AI64Store8:
1144  		return 0
1145  	case AI32Load16S, AI32Load16U, AI64Load16S, AI64Load16U, AI32Store16, AI64Store16:
1146  		return 1
1147  	case AI32Load, AF32Load, AI64Load32S, AI64Load32U, AI32Store, AF32Store, AI64Store32:
1148  		return 2
1149  	case AI64Load, AF64Load, AI64Store, AF64Store:
1150  		return 3
1151  	default:
1152  		panic("align: bad op")
1153  	}
1154  }
1155  
1156  func writeUleb128(w io.ByteWriter, v uint64) {
1157  	if v < 128 {
1158  		w.WriteByte(uint8(v))
1159  		return
1160  	}
1161  	more := true
1162  	for more {
1163  		c := uint8(v & 0x7f)
1164  		v >>= 7
1165  		more = v != 0
1166  		if more {
1167  			c |= 0x80
1168  		}
1169  		w.WriteByte(c)
1170  	}
1171  }
1172  
1173  func writeSleb128(w io.ByteWriter, v int64) {
1174  	more := true
1175  	for more {
1176  		c := uint8(v & 0x7f)
1177  		s := uint8(v & 0x40)
1178  		v >>= 7
1179  		more = !((v == 0 && s == 0) || (v == -1 && s != 0))
1180  		if more {
1181  			c |= 0x80
1182  		}
1183  		w.WriteByte(c)
1184  	}
1185  }
1186