ppc64.mx raw

   1  // Copyright 2025 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package asmgen
   6  
   7  var ArchPPC64x = &Arch{
   8  	Name:          "ppc64x",
   9  	Build:         "ppc64 || ppc64le",
  10  	WordBits:      64,
  11  	WordBytes:     8,
  12  	CarrySafeLoop: true,
  13  
  14  	// Note: The old, hand-written ppc64x assembly used MOVDU
  15  	// to avoid explicit pointer updates in a few routines, but the new
  16  	// generated code runs just as fast, so we haven't bothered to try
  17  	// to add that back. (It's not trivial; you'd have to keep the pointers
  18  	// shifted one word in order to make the semantics work.)
  19  	//
  20  	// The old assembly also used some complex vector instructions
  21  	// to implement lshVU and rshVU, but the generated code that uses
  22  	// ordinary integer instructions is much faster than the vector code was,
  23  	// at least on the power10 gomote.
  24  
  25  	regs: [][]byte{
  26  		// R0 is 0 by convention.
  27  		// R1 is SP.
  28  		// R2 is TOC.
  29  		// R30 is g.
  30  		// R31 is the assembler/linker temporary (which we use too).
  31  		"R3", "R4", "R5", "R6", "R7", "R8", "R9",
  32  		"R10", "R11", "R12" /*R13 is TLS*/, "R14", "R15", "R16", "R17", "R18", "R19",
  33  		"R20", "R21", "R22", "R23", "R24", "R25", "R26", "R27", "R28", "R29",
  34  	},
  35  	reg0:   "R0",
  36  	regTmp: "R31",
  37  
  38  	// Note: Could write an addF and subF to use ADDZE and SUBZE,
  39  	// but we have R0 so it doesn't seem to matter much.
  40  
  41  	mov:   "MOVD",
  42  	add:   "ADD",
  43  	adds:  "ADDC",
  44  	adcs:  "ADDE",
  45  	sub:   "SUB",
  46  	subs:  "SUBC",
  47  	sbcs:  "SUBE",
  48  	mul:   "MULLD",
  49  	mulhi: "MULHDU",
  50  	lsh:   "SLD",
  51  	rsh:   "SRD",
  52  	and:   "ANDCC", // regular AND does not accept immediates
  53  	or:    "OR",
  54  	xor:   "XOR",
  55  
  56  	jmpZero:    "CMP %[1]s, $0; BEQ %[2]s",
  57  	jmpNonZero: "CMP %s, $0; BNE %s",
  58  
  59  	// Note: Using CTR means that we could free the count register
  60  	// during the loop body, but the portable logic doesn't know that,
  61  	// and we're not hurting for registers.
  62  	loopTop:    "CMP %[1]s, $0; BEQ %[2]s; MOVD %[1]s, CTR",
  63  	loopBottom: "BDNZ %[2]s",
  64  }
  65