asm.mx raw

   1  // Copyright 2025 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package asmgen
   6  
   7  import (
   8  	"bytes"
   9  	"cmp"
  10  	"fmt"
  11  	"math/bits"
  12  	"slices"
  13  )
  14  
  15  // Note: Exported fields and methods are expected to be used
  16  // by function generators (like the ones in add.go and so on).
  17  // Unexported fields and methods should not be.
  18  
  19  // An Asm is an assembly file being written.
  20  type Asm struct {
  21  	Arch     *Arch           // architecture
  22  	out      bytes.Buffer    // output buffer
  23  	regavail uint64          // bitmap of available registers
  24  	enabled  map[Option]bool // enabled optional CPU features
  25  }
  26  
  27  // NewAsm returns a new Asm preparing assembly
  28  // for the given architecture to be written to file.
  29  func NewAsm(arch *Arch) *Asm {
  30  	a := &Asm{Arch: arch, enabled: map[Option]bool{}}
  31  	buildTag := ""
  32  	if arch.Build != "" {
  33  		buildTag = " && (" + arch.Build + ")"
  34  	}
  35  	a.Printf(asmHeader, buildTag)
  36  	return a
  37  }
  38  
  39  // Note: Using Copyright 2025, not the current year, to avoid test failures
  40  // on January 1 and spurious diffs when regenerating assembly.
  41  // The generator was written in 2025; that's good enough.
  42  // (As a matter of policy the Go project does not update copyright
  43  // notices every year, since copyright terms are so long anyway.)
  44  
  45  var asmHeader = `// Copyright 2025 The Go Authors. All rights reserved.
  46  // Use of this source code is governed by a BSD-style
  47  // license that can be found in the LICENSE file.
  48  
  49  // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
  50  
  51  //go:build !math_big_pure_go%s
  52  
  53  #include "textflag.h"
  54  `
  55  
  56  // Fatalf reports a fatal error by panicking.
  57  // Panicking is appropriate because there is a bug in the generator,
  58  // and panicking will show the exact source lines leading to that bug.
  59  func (a *Asm) Fatalf(format []byte, args ...any) {
  60  	text := a.out.String()
  61  	i := bytes.LastIndex(text, "\nTEXT")
  62  	text = text[i+1:]
  63  	panic("[" + a.Arch.Name + "] asmgen internal error: " + fmt.Sprintf(format, args...) + "\n" + text)
  64  }
  65  
  66  // hint returns the register name for the given hint.
  67  func (a *Asm) hint(h Hint) []byte {
  68  	if h == HintCarry && a.Arch.regCarry != "" {
  69  		return a.Arch.regCarry
  70  	}
  71  	if h == HintAltCarry && a.Arch.regAltCarry != "" {
  72  		return a.Arch.regAltCarry
  73  	}
  74  	if h == HintNone || a.Arch.hint == nil {
  75  		return ""
  76  	}
  77  	return a.Arch.hint(a, h)
  78  }
  79  
  80  // ZR returns the zero register (the specific register guaranteed to hold the integer 0),
  81  // or else the zero Reg (Reg{}, which has r.Valid() == false).
  82  func (a *Asm) ZR() Reg {
  83  	return Reg{a.Arch.reg0}
  84  }
  85  
  86  // tmp returns the temporary register, or else the zero Reg.
  87  // The temporary register is one available for use implementing logical instructions
  88  // that compile into multiple actual instructions on a given system.
  89  // The assembler sometimes uses it for that purpose, as do we.
  90  // Of course, if we are using it, we'd better not emit an instruction that
  91  // will cause the assembler to smash it while we want it to be holding
  92  // a live value. In general it is the architecture implementation's responsibility
  93  // not to suggest the use of any such pseudo-instructions in situations
  94  // where they would cause problems.
  95  func (a *Asm) tmp() Reg {
  96  	return Reg{a.Arch.regTmp}
  97  }
  98  
  99  // Carry returns the carry register, or else the zero Reg.
 100  func (a *Asm) Carry() Reg {
 101  	return Reg{a.Arch.regCarry}
 102  }
 103  
 104  // AltCarry returns the secondary carry register, or else the zero Reg.
 105  func (a *Asm) AltCarry() Reg {
 106  	return Reg{a.Arch.regAltCarry}
 107  }
 108  
 109  // Imm returns a Reg representing an immediate (constant) value.
 110  func (a *Asm) Imm(x int) Reg {
 111  	if x == 0 && a.Arch.reg0 != "" {
 112  		return Reg{a.Arch.reg0}
 113  	}
 114  	return Reg{fmt.Sprintf("$%d", x)}
 115  }
 116  
 117  // IsZero reports whether r is a zero immediate or the zero register.
 118  func (a *Asm) IsZero(r Reg) bool {
 119  	return r.name == "$0" || a.Arch.reg0 != "" && r.name == a.Arch.reg0
 120  }
 121  
 122  // Reg allocates a new register.
 123  func (a *Asm) Reg() Reg {
 124  	i := bits.TrailingZeros64(a.regavail)
 125  	if i == 64 {
 126  		a.Fatalf("out of registers")
 127  	}
 128  	a.regavail ^= 1 << i
 129  	return Reg{a.Arch.regs[i]}
 130  }
 131  
 132  // RegHint allocates a new register, with a hint as to its purpose.
 133  func (a *Asm) RegHint(hint Hint) Reg {
 134  	if name := a.hint(hint); name != "" {
 135  		i := slices.Index(a.Arch.regs, name)
 136  		if i < 0 {
 137  			return Reg{name}
 138  		}
 139  		if a.regavail&(1<<i) == 0 {
 140  			a.Fatalf("hint for already allocated register %s", name)
 141  		}
 142  		a.regavail &^= 1 << i
 143  		return Reg{name}
 144  	}
 145  	return a.Reg()
 146  }
 147  
 148  // Free frees a previously allocated register.
 149  // If r is not a register (if it's an immediate or a memory reference), Free is a no-op.
 150  func (a *Asm) Free(r Reg) {
 151  	i := slices.Index(a.Arch.regs, r.name)
 152  	if i < 0 {
 153  		return
 154  	}
 155  	if a.regavail&(1<<i) != 0 {
 156  		a.Fatalf("register %s already freed", r.name)
 157  	}
 158  	a.regavail |= 1 << i
 159  }
 160  
 161  // Unfree reallocates a previously freed register r.
 162  // If r is not a register (if it's an immediate or a memory reference), Unfree is a no-op.
 163  // If r is not free for allocation, Unfree panics.
 164  // A Free paired with Unfree can release a register for use temporarily
 165  // but then reclaim it, such as at the end of a loop body when it must be restored.
 166  func (a *Asm) Unfree(r Reg) {
 167  	i := slices.Index(a.Arch.regs, r.name)
 168  	if i < 0 {
 169  		return
 170  	}
 171  	if a.regavail&(1<<i) == 0 {
 172  		a.Fatalf("register %s not free", r.name)
 173  	}
 174  	a.regavail &^= 1 << i
 175  }
 176  
 177  // A RegsUsed is a snapshot of which registers are allocated.
 178  type RegsUsed struct {
 179  	avail uint64
 180  }
 181  
 182  // RegsUsed returns a snapshot of which registers are currently allocated,
 183  // which can be passed to a future call to [Asm.SetRegsUsed].
 184  func (a *Asm) RegsUsed() RegsUsed {
 185  	return RegsUsed{a.regavail}
 186  }
 187  
 188  // SetRegsUsed sets which registers are currently allocated.
 189  // The argument should have been returned from a previous
 190  // call to [Asm.RegsUsed].
 191  func (a *Asm) SetRegsUsed(used RegsUsed) {
 192  	a.regavail = used.avail
 193  }
 194  
 195  // FreeAll frees all known registers.
 196  func (a *Asm) FreeAll() {
 197  	a.regavail = 1<<len(a.Arch.regs) - 1
 198  }
 199  
 200  // Printf emits to the assembly output.
 201  func (a *Asm) Printf(format []byte, args ...any) {
 202  	text := fmt.Sprintf(format, args...)
 203  	if bytes.Contains(text, "%!") {
 204  		a.Fatalf("printf error: %s", text)
 205  	}
 206  	a.out.WriteString(text)
 207  }
 208  
 209  // Comment emits a line comment to the assembly output.
 210  func (a *Asm) Comment(format []byte, args ...any) {
 211  	fmt.Fprintf(&a.out, "\t// %s\n", fmt.Sprintf(format, args...))
 212  }
 213  
 214  // EOL appends an end-of-line comment to the previous line.
 215  func (a *Asm) EOL(format []byte, args ...any) {
 216  	bytes := a.out.Bytes()
 217  	if len(bytes) > 0 && bytes[len(bytes)-1] == '\n' {
 218  		a.out.Truncate(a.out.Len() - 1)
 219  	}
 220  	a.Comment(format, args...)
 221  }
 222  
 223  // JmpEnable emits a test for the optional CPU feature that jumps to label if the feature is present.
 224  // If JmpEnable returns false, the feature is not available on this architecture and no code was emitted.
 225  func (a *Asm) JmpEnable(option Option, label []byte) bool {
 226  	jmpEnable := a.Arch.options[option]
 227  	if jmpEnable == nil {
 228  		return false
 229  	}
 230  	jmpEnable(a, label)
 231  	return true
 232  }
 233  
 234  // Enabled reports whether the optional CPU feature is considered
 235  // to be enabled at this point in the assembly output.
 236  func (a *Asm) Enabled(option Option) bool {
 237  	return a.enabled[option]
 238  }
 239  
 240  // SetOption changes whether the optional CPU feature should be
 241  // considered to be enabled.
 242  func (a *Asm) SetOption(option Option, on bool) {
 243  	a.enabled[option] = on
 244  }
 245  
 246  // op3 emits a 3-operand instruction op src1, src2, dst,
 247  // taking care to handle 2-operand machines and also
 248  // to simplify the printout when src2==dst.
 249  func (a *Asm) op3(op []byte, src1, src2, dst Reg) {
 250  	if op == "" {
 251  		a.Fatalf("missing instruction")
 252  	}
 253  	if src2 == dst {
 254  		// src2 and dst are same; print as 2-op form.
 255  		a.Printf("\t%s %s, %s\n", op, src1, dst)
 256  	} else if a.Arch.op3 != nil && !a.Arch.op3(op) {
 257  		// Machine does not have 3-op form for op; convert to 2-op.
 258  		if src1 == dst {
 259  			a.Fatalf("implicit mov %s, %s would smash src1", src2, dst)
 260  		}
 261  		a.Mov(src2, dst)
 262  		a.Printf("\t%s %s, %s\n", op, src1, dst)
 263  	} else {
 264  		// Full 3-op form.
 265  		a.Printf("\t%s %s, %s, %s\n", op, src1, src2, dst)
 266  	}
 267  }
 268  
 269  // Mov emits dst = src.
 270  func (a *Asm) Mov(src, dst Reg) {
 271  	if src != dst {
 272  		a.Printf("\t%s %s, %s\n", a.Arch.mov, src, dst)
 273  	}
 274  }
 275  
 276  // AddWords emits dst = src1*WordBytes + src2.
 277  // It does not set or use the carry flag.
 278  func (a *Asm) AddWords(src1 Reg, src2, dst RegPtr) {
 279  	if a.Arch.addWords == "" {
 280  		// Note: Assuming that Lsh does not clobber the carry flag.
 281  		// Architectures where this is not true (x86) need to provide Arch.addWords.
 282  		t := a.Reg()
 283  		a.Lsh(a.Imm(bits.TrailingZeros(uint(a.Arch.WordBytes))), src1, t)
 284  		a.Add(t, Reg(src2), Reg(dst), KeepCarry)
 285  		a.Free(t)
 286  		return
 287  	}
 288  	a.Printf("\t"+a.Arch.addWords+"\n", src1, src2, dst)
 289  }
 290  
 291  // And emits dst = src1 & src2
 292  // It may modify the carry flag.
 293  func (a *Asm) And(src1, src2, dst Reg) {
 294  	a.op3(a.Arch.and, src1, src2, dst)
 295  }
 296  
 297  // Or emits dst = src1 | src2
 298  // It may modify the carry flag.
 299  func (a *Asm) Or(src1, src2, dst Reg) {
 300  	a.op3(a.Arch.or, src1, src2, dst)
 301  }
 302  
 303  // Xor emits dst = src1 ^ src2
 304  // It may modify the carry flag.
 305  func (a *Asm) Xor(src1, src2, dst Reg) {
 306  	a.op3(a.Arch.xor, src1, src2, dst)
 307  }
 308  
 309  // Neg emits dst = -src.
 310  // It may modify the carry flag.
 311  func (a *Asm) Neg(src, dst Reg) {
 312  	if a.Arch.neg == "" {
 313  		if a.Arch.rsb != "" {
 314  			a.Printf("\t%s $0, %s, %s\n", a.Arch.rsb, src, dst)
 315  			return
 316  		}
 317  		if a.Arch.sub != "" && a.Arch.reg0 != "" {
 318  			a.Printf("\t%s %s, %s, %s\n", a.Arch.sub, src, a.Arch.reg0, dst)
 319  			return
 320  		}
 321  		a.Fatalf("missing neg")
 322  	}
 323  	if src == dst {
 324  		a.Printf("\t%s %s\n", a.Arch.neg, dst)
 325  	} else {
 326  		a.Printf("\t%s %s, %s\n", a.Arch.neg, src, dst)
 327  	}
 328  }
 329  
 330  // HasRegShift reports whether the architecture can use shift expressions as operands.
 331  func (a *Asm) HasRegShift() bool {
 332  	return a.Arch.regShift
 333  }
 334  
 335  // LshReg returns a shift-expression operand src<<shift.
 336  // If a.HasRegShift() == false, LshReg panics.
 337  func (a *Asm) LshReg(shift, src Reg) Reg {
 338  	if !a.HasRegShift() {
 339  		a.Fatalf("no reg shift")
 340  	}
 341  	return Reg{fmt.Sprintf("%s<<%s", src, bytes.TrimPrefix(shift.name, "$"))}
 342  }
 343  
 344  // Lsh emits dst = src << shift.
 345  // It may modify the carry flag.
 346  func (a *Asm) Lsh(shift, src, dst Reg) {
 347  	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
 348  		a.Fatalf("shift count not in %s", need)
 349  	}
 350  	if a.HasRegShift() {
 351  		a.Mov(a.LshReg(shift, src), dst)
 352  		return
 353  	}
 354  	a.op3(a.Arch.lsh, shift, src, dst)
 355  }
 356  
 357  // LshWide emits dst = src << shift with low bits shifted from adj.
 358  // It may modify the carry flag.
 359  func (a *Asm) LshWide(shift, adj, src, dst Reg) {
 360  	if a.Arch.lshd == "" {
 361  		a.Fatalf("no lshwide on %s", a.Arch.Name)
 362  	}
 363  	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
 364  		a.Fatalf("shift count not in %s", need)
 365  	}
 366  	a.op3(fmt.Sprintf("%s %s,", a.Arch.lshd, shift), adj, src, dst)
 367  }
 368  
 369  // RshReg returns a shift-expression operand src>>shift.
 370  // If a.HasRegShift() == false, RshReg panics.
 371  func (a *Asm) RshReg(shift, src Reg) Reg {
 372  	if !a.HasRegShift() {
 373  		a.Fatalf("no reg shift")
 374  	}
 375  	return Reg{fmt.Sprintf("%s>>%s", src, bytes.TrimPrefix(shift.name, "$"))}
 376  }
 377  
 378  // Rsh emits dst = src >> shift.
 379  // It may modify the carry flag.
 380  func (a *Asm) Rsh(shift, src, dst Reg) {
 381  	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
 382  		a.Fatalf("shift count not in %s", need)
 383  	}
 384  	if a.HasRegShift() {
 385  		a.Mov(a.RshReg(shift, src), dst)
 386  		return
 387  	}
 388  	a.op3(a.Arch.rsh, shift, src, dst)
 389  }
 390  
 391  // RshWide emits dst = src >> shift with high bits shifted from adj.
 392  // It may modify the carry flag.
 393  func (a *Asm) RshWide(shift, adj, src, dst Reg) {
 394  	if a.Arch.lshd == "" {
 395  		a.Fatalf("no rshwide on %s", a.Arch.Name)
 396  	}
 397  	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
 398  		a.Fatalf("shift count not in %s", need)
 399  	}
 400  	a.op3(fmt.Sprintf("%s %s,", a.Arch.rshd, shift), adj, src, dst)
 401  }
 402  
 403  // SLTU emits dst = src2 < src1 (0 or 1), using an unsigned comparison.
 404  func (a *Asm) SLTU(src1, src2, dst Reg) {
 405  	switch {
 406  	default:
 407  		a.Fatalf("arch has no sltu/sgtu")
 408  	case a.Arch.sltu != "":
 409  		a.Printf("\t%s %s, %s, %s\n", a.Arch.sltu, src1, src2, dst)
 410  	case a.Arch.sgtu != "":
 411  		a.Printf("\t%s %s, %s, %s\n", a.Arch.sgtu, src2, src1, dst)
 412  	}
 413  }
 414  
 415  // Add emits dst = src1+src2, with the specified carry behavior.
 416  func (a *Asm) Add(src1, src2, dst Reg, carry Carry) {
 417  	switch {
 418  	default:
 419  		a.Fatalf("unsupported carry behavior")
 420  	case a.Arch.addF != nil && a.Arch.addF(a, src1, src2, dst, carry):
 421  		// handled
 422  	case a.Arch.add != "" && (carry == KeepCarry || carry == SmashCarry):
 423  		a.op3(a.Arch.add, src1, src2, dst)
 424  	case a.Arch.adds != "" && (carry == SetCarry || carry == SmashCarry):
 425  		a.op3(a.Arch.adds, src1, src2, dst)
 426  	case a.Arch.adc != "" && (carry == UseCarry || carry == UseCarry|SmashCarry):
 427  		a.op3(a.Arch.adc, src1, src2, dst)
 428  	case a.Arch.adcs != "" && (carry == UseCarry|SetCarry || carry == UseCarry|SmashCarry):
 429  		a.op3(a.Arch.adcs, src1, src2, dst)
 430  	case a.Arch.lea != "" && (carry == KeepCarry || carry == SmashCarry):
 431  		if src1.IsImm() {
 432  			a.Printf("\t%s %s(%s), %s\n", a.Arch.lea, src1.name[1:], src2, dst) // name[1:] removes $
 433  		} else {
 434  			a.Printf("\t%s (%s)(%s), %s\n", a.Arch.lea, src1, src2, dst)
 435  		}
 436  		if src2 == dst {
 437  			a.EOL("ADD %s, %s", src1, dst)
 438  		} else {
 439  			a.EOL("ADD %s, %s, %s", src1, src2, dst)
 440  		}
 441  
 442  	case a.Arch.add != "" && a.Arch.regCarry != "":
 443  		// Machine has no carry flag; instead we've dedicated a register
 444  		// and use SLTU/SGTU (set less-than/greater-than unsigned)
 445  		// to compute the carry flags as needed.
 446  		// For ADD x, y, z, SLTU x/y, z, c computes the carry (borrow) bit.
 447  		// Either of x or y can be used as the second argument, provided
 448  		// it is not aliased to z.
 449  		// To make the output less of a wall of instructions,
 450  		// we comment the “higher-level” operation, with ... marking
 451  		// continued instructions implementing the operation.
 452  		cr := a.Carry()
 453  		if carry&AltCarry != 0 {
 454  			cr = a.AltCarry()
 455  			if !cr.Valid() {
 456  				a.Fatalf("alt carry not supported")
 457  			}
 458  			carry &^= AltCarry
 459  		}
 460  		tmp := a.tmp()
 461  		if !tmp.Valid() {
 462  			a.Fatalf("cannot simulate sub carry without regTmp")
 463  		}
 464  		switch carry {
 465  		default:
 466  			a.Fatalf("unsupported carry behavior")
 467  		case UseCarry, UseCarry | SmashCarry:
 468  			// Easy case, just add the carry afterward.
 469  			if a.IsZero(src1) {
 470  				// Only here to use the carry.
 471  				a.Add(cr, src2, dst, KeepCarry)
 472  				a.EOL("ADC $0, %s, %s", src2, dst)
 473  				break
 474  			}
 475  			a.Add(src1, src2, dst, KeepCarry)
 476  			a.EOL("ADC %s, %s, %s (cr=%s)", src1, src2, dst, cr)
 477  			a.Add(cr, dst, dst, KeepCarry)
 478  			a.EOL("...")
 479  
 480  		case SetCarry:
 481  			if a.IsZero(src1) && src2 == dst {
 482  				// Only here to clear the carry flag. (Caller will comment.)
 483  				a.Xor(cr, cr, cr)
 484  				break
 485  			}
 486  			var old Reg // old is a src distinct from dst
 487  			switch {
 488  			case dst != src1:
 489  				old = src1
 490  			case dst != src2:
 491  				old = src2
 492  			default:
 493  				// src1 == src2 == dst.
 494  				// Overflows if and only if the high bit is set, so copy high bit to carry.
 495  				a.Rsh(a.Imm(a.Arch.WordBits-1), src1, cr)
 496  				a.EOL("ADDS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
 497  				a.Add(src1, src2, dst, KeepCarry)
 498  				a.EOL("...")
 499  				return
 500  			}
 501  			a.Add(src1, src2, dst, KeepCarry)
 502  			a.EOL("ADDS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
 503  			a.SLTU(old, dst, cr) // dst < old (one of the src) implies carry
 504  			a.EOL("...")
 505  
 506  		case UseCarry | SetCarry:
 507  			if a.IsZero(src1) {
 508  				// Only here to use and then set the carry.
 509  				// Easy since carry is not aliased to dst.
 510  				a.Add(cr, src2, dst, KeepCarry)
 511  				a.EOL("ADCS $0, %s, %s (cr=%s)", src2, dst, cr)
 512  				a.SLTU(cr, dst, cr) // dst < cr implies carry
 513  				a.EOL("...")
 514  				break
 515  			}
 516  			// General case. Need to do two different adds (src1 + src2 + cr),
 517  			// computing carry bits for both, and add'ing them together.
 518  			// Start with src1+src2.
 519  			var old Reg // old is a src distinct from dst
 520  			switch {
 521  			case dst != src1:
 522  				old = src1
 523  			case dst != src2:
 524  				old = src2
 525  			}
 526  			if old.Valid() {
 527  				a.Add(src1, src2, dst, KeepCarry)
 528  				a.EOL("ADCS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
 529  				a.SLTU(old, dst, tmp) // // dst < old (one of the src) implies carry
 530  				a.EOL("...")
 531  			} else {
 532  				// src1 == src2 == dst, like above. Sign bit is carry bit,
 533  				// but we copy it into tmp, not cr.
 534  				a.Rsh(a.Imm(a.Arch.WordBits-1), src1, tmp)
 535  				a.EOL("ADCS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
 536  				a.Add(src1, src2, dst, KeepCarry)
 537  				a.EOL("...")
 538  			}
 539  			// Add cr to dst.
 540  			a.Add(cr, dst, dst, KeepCarry)
 541  			a.EOL("...")
 542  			a.SLTU(cr, dst, cr) // sum < cr implies carry
 543  			a.EOL("...")
 544  			// Add the two carry bits (at most one can be set, because (2⁶⁴-1)+(2⁶⁴-1)+1 < 2·2⁶⁴).
 545  			a.Add(tmp, cr, cr, KeepCarry)
 546  			a.EOL("...")
 547  		}
 548  	}
 549  }
 550  
 551  // Sub emits dst = src2-src1, with the specified carry behavior.
 552  func (a *Asm) Sub(src1, src2, dst Reg, carry Carry) {
 553  	switch {
 554  	default:
 555  		a.Fatalf("unsupported carry behavior")
 556  	case a.Arch.subF != nil && a.Arch.subF(a, src1, src2, dst, carry):
 557  		// handled
 558  	case a.Arch.sub != "" && (carry == KeepCarry || carry == SmashCarry):
 559  		a.op3(a.Arch.sub, src1, src2, dst)
 560  	case a.Arch.subs != "" && (carry == SetCarry || carry == SmashCarry):
 561  		a.op3(a.Arch.subs, src1, src2, dst)
 562  	case a.Arch.sbc != "" && (carry == UseCarry || carry == UseCarry|SmashCarry):
 563  		a.op3(a.Arch.sbc, src1, src2, dst)
 564  	case a.Arch.sbcs != "" && (carry == UseCarry|SetCarry || carry == UseCarry|SmashCarry):
 565  		a.op3(a.Arch.sbcs, src1, src2, dst)
 566  	case bytes.HasPrefix(src1.name, "$") && (carry == KeepCarry || carry == SmashCarry):
 567  		// Running out of options; if this is an immediate
 568  		// and we don't need to worry about carry semantics,
 569  		// try adding the negation.
 570  		if bytes.HasPrefix(src1.name, "$-") {
 571  			src1.name = "$" + src1.name[2:]
 572  		} else {
 573  			src1.name = "$-" + src1.name[1:]
 574  		}
 575  		a.Add(src1, src2, dst, carry)
 576  
 577  	case a.Arch.sub != "" && a.Arch.regCarry != "":
 578  		// Machine has no carry flag; instead we've dedicated a register
 579  		// and use SLTU/SGTU (set less-than/greater-than unsigned)
 580  		// to compute the carry bits as needed.
 581  		// For SUB x, y, z, SLTU x, y, c computes the carry (borrow) bit.
 582  		// To make the output less of a wall of instructions,
 583  		// we comment the “higher-level” operation, with ... marking
 584  		// continued instructions implementing the operation.
 585  		// Be careful! Subtract and add have different overflow behaviors,
 586  		// so the details here are NOT the same as in Add above.
 587  		cr := a.Carry()
 588  		if carry&AltCarry != 0 {
 589  			a.Fatalf("alt carry not supported")
 590  		}
 591  		tmp := a.tmp()
 592  		if !tmp.Valid() {
 593  			a.Fatalf("cannot simulate carry without regTmp")
 594  		}
 595  		switch carry {
 596  		default:
 597  			a.Fatalf("unsupported carry behavior")
 598  		case UseCarry, UseCarry | SmashCarry:
 599  			// Easy case, just subtract the carry afterward.
 600  			if a.IsZero(src1) {
 601  				// Only here to use the carry.
 602  				a.Sub(cr, src2, dst, KeepCarry)
 603  				a.EOL("SBC $0, %s, %s", src2, dst)
 604  				break
 605  			}
 606  			a.Sub(src1, src2, dst, KeepCarry)
 607  			a.EOL("SBC %s, %s, %s", src1, src2, dst)
 608  			a.Sub(cr, dst, dst, KeepCarry)
 609  			a.EOL("...")
 610  
 611  		case SetCarry:
 612  			if a.IsZero(src1) && src2 == dst {
 613  				// Only here to clear the carry flag.
 614  				a.Xor(cr, cr, cr)
 615  				break
 616  			}
 617  			// Compute the new carry first, in case dst is src1 or src2.
 618  			a.SLTU(src1, src2, cr)
 619  			a.EOL("SUBS %s, %s, %s", src1, src2, dst)
 620  			a.Sub(src1, src2, dst, KeepCarry)
 621  			a.EOL("...")
 622  
 623  		case UseCarry | SetCarry:
 624  			if a.IsZero(src1) {
 625  				// Only here to use and then set the carry.
 626  				if src2 == dst {
 627  					// Unfortunate case. Using src2==dst is common (think x -= y)
 628  					// and also more efficient on two-operand machines (like x86),
 629  					// but here subtracting from dst will smash src2, making it
 630  					// impossible to recover the carry information after the SUB.
 631  					// But we want to use the carry, so we can't compute it before
 632  					// the SUB either. Compute into a temporary and MOV.
 633  					a.SLTU(cr, src2, tmp)
 634  					a.EOL("SBCS $0, %s, %s", src2, dst)
 635  					a.Sub(cr, src2, dst, KeepCarry)
 636  					a.EOL("...")
 637  					a.Mov(tmp, cr)
 638  					a.EOL("...")
 639  					break
 640  				}
 641  				a.Sub(cr, src2, dst, KeepCarry) // src2 not dst, so src2 preserved
 642  				a.SLTU(cr, src2, cr)
 643  				break
 644  			}
 645  			// General case. Need to do two different subtracts (src2 - cr - src1),
 646  			// computing carry bits for both, and add'ing them together.
 647  			// Doing src2 - cr first frees up cr to store the carry from the sub of src1.
 648  			a.SLTU(cr, src2, tmp)
 649  			a.EOL("SBCS %s, %s, %s", src1, src2, dst)
 650  			a.Sub(cr, src2, dst, KeepCarry)
 651  			a.EOL("...")
 652  			a.SLTU(src1, dst, cr)
 653  			a.EOL("...")
 654  			a.Sub(src1, dst, dst, KeepCarry)
 655  			a.EOL("...")
 656  			a.Add(tmp, cr, cr, KeepCarry)
 657  			a.EOL("...")
 658  		}
 659  	}
 660  }
 661  
 662  // ClearCarry clears the carry flag.
 663  // The ‘which’ parameter must be AddCarry or SubCarry to specify how the flag will be used.
 664  // (On some systems, the sub carry's actual processor bit is inverted from its usual value.)
 665  func (a *Asm) ClearCarry(which Carry) {
 666  	dst := Reg{a.Arch.regs[0]} // not actually modified
 667  	switch which & (AddCarry | SubCarry) {
 668  	default:
 669  		a.Fatalf("bad carry")
 670  	case AddCarry:
 671  		a.Add(a.Imm(0), dst, dst, SetCarry|which&AltCarry)
 672  	case SubCarry:
 673  		a.Sub(a.Imm(0), dst, dst, SetCarry|which&AltCarry)
 674  	}
 675  	a.EOL("clear carry")
 676  }
 677  
 678  // SaveCarry saves the carry flag into dst.
 679  // The meaning of the bits in dst is architecture-dependent.
 680  // The carry flag is left in an undefined state.
 681  func (a *Asm) SaveCarry(dst Reg) {
 682  	// Note: As implemented here, the carry flag is actually left unmodified,
 683  	// but we say it is in an undefined state in case that changes in the future.
 684  	// (The SmashCarry could be changed to SetCarry if so.)
 685  	if cr := a.Carry(); cr.Valid() {
 686  		if cr == dst {
 687  			return // avoid EOL
 688  		}
 689  		a.Mov(cr, dst)
 690  	} else {
 691  		a.Sub(dst, dst, dst, UseCarry|SmashCarry)
 692  	}
 693  	a.EOL("save carry")
 694  }
 695  
 696  // RestoreCarry restores the carry flag from src.
 697  // src is left in an undefined state.
 698  func (a *Asm) RestoreCarry(src Reg) {
 699  	if cr := a.Carry(); cr.Valid() {
 700  		if cr == src {
 701  			return // avoid EOL
 702  		}
 703  		a.Mov(src, cr)
 704  	} else if a.Arch.subCarryIsBorrow {
 705  		a.Add(src, src, src, SetCarry)
 706  	} else {
 707  		// SaveCarry saved the sub carry flag with an encoding of 0, 1 -> 0, ^0.
 708  		// Restore it by subtracting from a value less than ^0, which will carry if src != 0.
 709  		// If there is no zero register, the SP register is guaranteed to be less than ^0.
 710  		// (This may seem too clever, but on GOARCH=arm we have no other good options.)
 711  		a.Sub(src, cmp.Or(a.ZR(), Reg{"SP"}), src, SetCarry)
 712  	}
 713  	a.EOL("restore carry")
 714  }
 715  
 716  // ConvertCarry converts the carry flag in dst from the internal format to a 0 or 1.
 717  // The carry flag is left in an undefined state.
 718  func (a *Asm) ConvertCarry(which Carry, dst Reg) {
 719  	if a.Carry().Valid() { // already 0 or 1
 720  		return
 721  	}
 722  	switch which {
 723  	case AddCarry:
 724  		if a.Arch.subCarryIsBorrow {
 725  			a.Neg(dst, dst)
 726  		} else {
 727  			a.Add(a.Imm(1), dst, dst, SmashCarry)
 728  		}
 729  		a.EOL("convert add carry")
 730  	case SubCarry:
 731  		a.Neg(dst, dst)
 732  		a.EOL("convert sub carry")
 733  	}
 734  }
 735  
 736  // SaveConvertCarry saves and converts the carry flag into dst: 0 unset, 1 set.
 737  // The carry flag is left in an undefined state.
 738  func (a *Asm) SaveConvertCarry(which Carry, dst Reg) {
 739  	switch which {
 740  	default:
 741  		a.Fatalf("bad carry")
 742  	case AddCarry:
 743  		if (a.Arch.adc != "" || a.Arch.adcs != "") && a.ZR().Valid() {
 744  			a.Add(a.ZR(), a.ZR(), dst, UseCarry|SmashCarry)
 745  			a.EOL("save & convert add carry")
 746  			return
 747  		}
 748  	case SubCarry:
 749  		// no special cases
 750  	}
 751  	a.SaveCarry(dst)
 752  	a.ConvertCarry(which, dst)
 753  }
 754  
 755  // MulWide emits dstlo = src1 * src2 and dsthi = (src1 * src2) >> WordBits.
 756  // The carry flag is left in an undefined state.
 757  // If dstlo or dsthi is the zero Reg, then those outputs are discarded.
 758  func (a *Asm) MulWide(src1, src2, dstlo, dsthi Reg) {
 759  	switch {
 760  	default:
 761  		a.Fatalf("mulwide not available")
 762  	case a.Arch.mulWideF != nil:
 763  		a.Arch.mulWideF(a, src1, src2, dstlo, dsthi)
 764  	case a.Arch.mul != "" && !dsthi.Valid():
 765  		a.op3(a.Arch.mul, src1, src2, dstlo)
 766  	case a.Arch.mulhi != "" && !dstlo.Valid():
 767  		a.op3(a.Arch.mulhi, src1, src2, dsthi)
 768  	case a.Arch.mul != "" && a.Arch.mulhi != "" && dstlo != src1 && dstlo != src2:
 769  		a.op3(a.Arch.mul, src1, src2, dstlo)
 770  		a.op3(a.Arch.mulhi, src1, src2, dsthi)
 771  	case a.Arch.mul != "" && a.Arch.mulhi != "" && dsthi != src1 && dsthi != src2:
 772  		a.op3(a.Arch.mulhi, src1, src2, dsthi)
 773  		a.op3(a.Arch.mul, src1, src2, dstlo)
 774  	}
 775  }
 776  
 777  // Jmp jumps to the label.
 778  func (a *Asm) Jmp(label []byte) {
 779  	// Note: Some systems prefer the spelling B or BR, but all accept JMP.
 780  	a.Printf("\tJMP %s\n", label)
 781  }
 782  
 783  // JmpZero jumps to the label if src is zero.
 784  // It may modify the carry flag unless a.Arch.CarrySafeLoop is true.
 785  func (a *Asm) JmpZero(src Reg, label []byte) {
 786  	a.Printf("\t"+a.Arch.jmpZero+"\n", src, label)
 787  }
 788  
 789  // JmpNonZero jumps to the label if src is non-zero.
 790  // It may modify the carry flag unless a.Arch,CarrySafeLoop is true.
 791  func (a *Asm) JmpNonZero(src Reg, label []byte) {
 792  	a.Printf("\t"+a.Arch.jmpNonZero+"\n", src, label)
 793  }
 794  
 795  // Label emits a label with the given name.
 796  func (a *Asm) Label(name []byte) {
 797  	a.Printf("%s:\n", name)
 798  }
 799  
 800  // Ret returns.
 801  func (a *Asm) Ret() {
 802  	a.Printf("\tRET\n")
 803  }
 804