arith_ppc64x.s raw

   1  // Copyright 2025 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
   6  
   7  //go:build !math_big_pure_go && (ppc64 || ppc64le)
   8  
   9  #include "textflag.h"
  10  
  11  // func addVV(z, x, y []Word) (c Word)
  12  TEXT ·addVV(SB), NOSPLIT, $0
  13  	MOVD z_len+8(FP), R3
  14  	MOVD x_base+24(FP), R4
  15  	MOVD y_base+48(FP), R5
  16  	MOVD z_base+0(FP), R6
  17  	// compute unrolled loop lengths
  18  	ANDCC $3, R3, R7
  19  	SRD $2, R3
  20  	ADDC R0, R3	// clear carry
  21  loop1:
  22  	CMP R7, $0; BEQ loop1done; MOVD R7, CTR
  23  loop1cont:
  24  	// unroll 1X
  25  	MOVD 0(R4), R8
  26  	MOVD 0(R5), R9
  27  	ADDE R9, R8
  28  	MOVD R8, 0(R6)
  29  	ADD $8, R4
  30  	ADD $8, R5
  31  	ADD $8, R6
  32  	BDNZ loop1cont
  33  loop1done:
  34  loop4:
  35  	CMP R3, $0; BEQ loop4done; MOVD R3, CTR
  36  loop4cont:
  37  	// unroll 4X
  38  	MOVD 0(R4), R7
  39  	MOVD 8(R4), R8
  40  	MOVD 16(R4), R9
  41  	MOVD 24(R4), R10
  42  	MOVD 0(R5), R11
  43  	MOVD 8(R5), R12
  44  	MOVD 16(R5), R14
  45  	MOVD 24(R5), R15
  46  	ADDE R11, R7
  47  	ADDE R12, R8
  48  	ADDE R14, R9
  49  	ADDE R15, R10
  50  	MOVD R7, 0(R6)
  51  	MOVD R8, 8(R6)
  52  	MOVD R9, 16(R6)
  53  	MOVD R10, 24(R6)
  54  	ADD $32, R4
  55  	ADD $32, R5
  56  	ADD $32, R6
  57  	BDNZ loop4cont
  58  loop4done:
  59  	ADDE R0, R0, R4	// save & convert add carry
  60  	MOVD R4, c+72(FP)
  61  	RET
  62  
  63  // func subVV(z, x, y []Word) (c Word)
  64  TEXT ·subVV(SB), NOSPLIT, $0
  65  	MOVD z_len+8(FP), R3
  66  	MOVD x_base+24(FP), R4
  67  	MOVD y_base+48(FP), R5
  68  	MOVD z_base+0(FP), R6
  69  	// compute unrolled loop lengths
  70  	ANDCC $3, R3, R7
  71  	SRD $2, R3
  72  	SUBC R0, R3	// clear carry
  73  loop1:
  74  	CMP R7, $0; BEQ loop1done; MOVD R7, CTR
  75  loop1cont:
  76  	// unroll 1X
  77  	MOVD 0(R4), R8
  78  	MOVD 0(R5), R9
  79  	SUBE R9, R8
  80  	MOVD R8, 0(R6)
  81  	ADD $8, R4
  82  	ADD $8, R5
  83  	ADD $8, R6
  84  	BDNZ loop1cont
  85  loop1done:
  86  loop4:
  87  	CMP R3, $0; BEQ loop4done; MOVD R3, CTR
  88  loop4cont:
  89  	// unroll 4X
  90  	MOVD 0(R4), R7
  91  	MOVD 8(R4), R8
  92  	MOVD 16(R4), R9
  93  	MOVD 24(R4), R10
  94  	MOVD 0(R5), R11
  95  	MOVD 8(R5), R12
  96  	MOVD 16(R5), R14
  97  	MOVD 24(R5), R15
  98  	SUBE R11, R7
  99  	SUBE R12, R8
 100  	SUBE R14, R9
 101  	SUBE R15, R10
 102  	MOVD R7, 0(R6)
 103  	MOVD R8, 8(R6)
 104  	MOVD R9, 16(R6)
 105  	MOVD R10, 24(R6)
 106  	ADD $32, R4
 107  	ADD $32, R5
 108  	ADD $32, R6
 109  	BDNZ loop4cont
 110  loop4done:
 111  	SUBE R4, R4	// save carry
 112  	SUB R4, R0, R4	// convert sub carry
 113  	MOVD R4, c+72(FP)
 114  	RET
 115  
 116  // func lshVU(z, x []Word, s uint) (c Word)
 117  TEXT ·lshVU(SB), NOSPLIT, $0
 118  	MOVD z_len+8(FP), R3
 119  	CMP R3, $0; BEQ ret0
 120  	MOVD s+48(FP), R4
 121  	MOVD x_base+24(FP), R5
 122  	MOVD z_base+0(FP), R6
 123  	// run loop backward
 124  	SLD $3, R3, R7
 125  	ADD R7, R5
 126  	SLD $3, R3, R7
 127  	ADD R7, R6
 128  	// shift first word into carry
 129  	MOVD -8(R5), R7
 130  	MOVD $64, R8
 131  	SUB R4, R8
 132  	SRD R8, R7, R9
 133  	SLD R4, R7
 134  	MOVD R9, c+56(FP)
 135  	// shift remaining words
 136  	SUB $1, R3
 137  	// compute unrolled loop lengths
 138  	ANDCC $3, R3, R9
 139  	SRD $2, R3
 140  loop1:
 141  	CMP R9, $0; BEQ loop1done; MOVD R9, CTR
 142  loop1cont:
 143  	// unroll 1X
 144  	MOVD -16(R5), R10
 145  	SRD R8, R10, R11
 146  	OR R7, R11
 147  	SLD R4, R10, R7
 148  	MOVD R11, -8(R6)
 149  	ADD $-8, R5
 150  	ADD $-8, R6
 151  	BDNZ loop1cont
 152  loop1done:
 153  loop4:
 154  	CMP R3, $0; BEQ loop4done; MOVD R3, CTR
 155  loop4cont:
 156  	// unroll 4X
 157  	MOVD -16(R5), R9
 158  	MOVD -24(R5), R10
 159  	MOVD -32(R5), R11
 160  	MOVD -40(R5), R12
 161  	SRD R8, R9, R14
 162  	OR R7, R14
 163  	SLD R4, R9, R7
 164  	SRD R8, R10, R9
 165  	OR R7, R9
 166  	SLD R4, R10, R7
 167  	SRD R8, R11, R10
 168  	OR R7, R10
 169  	SLD R4, R11, R7
 170  	SRD R8, R12, R11
 171  	OR R7, R11
 172  	SLD R4, R12, R7
 173  	MOVD R14, -8(R6)
 174  	MOVD R9, -16(R6)
 175  	MOVD R10, -24(R6)
 176  	MOVD R11, -32(R6)
 177  	ADD $-32, R5
 178  	ADD $-32, R6
 179  	BDNZ loop4cont
 180  loop4done:
 181  	// store final shifted bits
 182  	MOVD R7, -8(R6)
 183  	RET
 184  ret0:
 185  	MOVD R0, c+56(FP)
 186  	RET
 187  
 188  // func rshVU(z, x []Word, s uint) (c Word)
 189  TEXT ·rshVU(SB), NOSPLIT, $0
 190  	MOVD z_len+8(FP), R3
 191  	CMP R3, $0; BEQ ret0
 192  	MOVD s+48(FP), R4
 193  	MOVD x_base+24(FP), R5
 194  	MOVD z_base+0(FP), R6
 195  	// shift first word into carry
 196  	MOVD 0(R5), R7
 197  	MOVD $64, R8
 198  	SUB R4, R8
 199  	SLD R8, R7, R9
 200  	SRD R4, R7
 201  	MOVD R9, c+56(FP)
 202  	// shift remaining words
 203  	SUB $1, R3
 204  	// compute unrolled loop lengths
 205  	ANDCC $3, R3, R9
 206  	SRD $2, R3
 207  loop1:
 208  	CMP R9, $0; BEQ loop1done; MOVD R9, CTR
 209  loop1cont:
 210  	// unroll 1X
 211  	MOVD 8(R5), R10
 212  	SLD R8, R10, R11
 213  	OR R7, R11
 214  	SRD R4, R10, R7
 215  	MOVD R11, 0(R6)
 216  	ADD $8, R5
 217  	ADD $8, R6
 218  	BDNZ loop1cont
 219  loop1done:
 220  loop4:
 221  	CMP R3, $0; BEQ loop4done; MOVD R3, CTR
 222  loop4cont:
 223  	// unroll 4X
 224  	MOVD 8(R5), R9
 225  	MOVD 16(R5), R10
 226  	MOVD 24(R5), R11
 227  	MOVD 32(R5), R12
 228  	SLD R8, R9, R14
 229  	OR R7, R14
 230  	SRD R4, R9, R7
 231  	SLD R8, R10, R9
 232  	OR R7, R9
 233  	SRD R4, R10, R7
 234  	SLD R8, R11, R10
 235  	OR R7, R10
 236  	SRD R4, R11, R7
 237  	SLD R8, R12, R11
 238  	OR R7, R11
 239  	SRD R4, R12, R7
 240  	MOVD R14, 0(R6)
 241  	MOVD R9, 8(R6)
 242  	MOVD R10, 16(R6)
 243  	MOVD R11, 24(R6)
 244  	ADD $32, R5
 245  	ADD $32, R6
 246  	BDNZ loop4cont
 247  loop4done:
 248  	// store final shifted bits
 249  	MOVD R7, 0(R6)
 250  	RET
 251  ret0:
 252  	MOVD R0, c+56(FP)
 253  	RET
 254  
 255  // func mulAddVWW(z, x []Word, m, a Word) (c Word)
 256  TEXT ·mulAddVWW(SB), NOSPLIT, $0
 257  	MOVD m+48(FP), R3
 258  	MOVD a+56(FP), R4
 259  	MOVD z_len+8(FP), R5
 260  	MOVD x_base+24(FP), R6
 261  	MOVD z_base+0(FP), R7
 262  	// compute unrolled loop lengths
 263  	ANDCC $3, R5, R8
 264  	SRD $2, R5
 265  loop1:
 266  	CMP R8, $0; BEQ loop1done; MOVD R8, CTR
 267  loop1cont:
 268  	// unroll 1X
 269  	MOVD 0(R6), R9
 270  	// multiply
 271  	MULHDU R3, R9, R10
 272  	MULLD R3, R9
 273  	ADDC R4, R9
 274  	ADDE R0, R10, R4
 275  	MOVD R9, 0(R7)
 276  	ADD $8, R6
 277  	ADD $8, R7
 278  	BDNZ loop1cont
 279  loop1done:
 280  loop4:
 281  	CMP R5, $0; BEQ loop4done; MOVD R5, CTR
 282  loop4cont:
 283  	// unroll 4X
 284  	MOVD 0(R6), R8
 285  	MOVD 8(R6), R9
 286  	MOVD 16(R6), R10
 287  	MOVD 24(R6), R11
 288  	// multiply
 289  	MULHDU R3, R8, R12
 290  	MULLD R3, R8
 291  	ADDC R4, R8
 292  	MULHDU R3, R9, R14
 293  	MULLD R3, R9
 294  	ADDE R12, R9
 295  	MULHDU R3, R10, R12
 296  	MULLD R3, R10
 297  	ADDE R14, R10
 298  	MULHDU R3, R11, R14
 299  	MULLD R3, R11
 300  	ADDE R12, R11
 301  	ADDE R0, R14, R4
 302  	MOVD R8, 0(R7)
 303  	MOVD R9, 8(R7)
 304  	MOVD R10, 16(R7)
 305  	MOVD R11, 24(R7)
 306  	ADD $32, R6
 307  	ADD $32, R7
 308  	BDNZ loop4cont
 309  loop4done:
 310  	MOVD R4, c+64(FP)
 311  	RET
 312  
 313  // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
 314  TEXT ·addMulVVWW(SB), NOSPLIT, $0
 315  	MOVD m+72(FP), R3
 316  	MOVD a+80(FP), R4
 317  	MOVD z_len+8(FP), R5
 318  	MOVD x_base+24(FP), R6
 319  	MOVD y_base+48(FP), R7
 320  	MOVD z_base+0(FP), R8
 321  	// compute unrolled loop lengths
 322  	ANDCC $3, R5, R9
 323  	SRD $2, R5
 324  loop1:
 325  	CMP R9, $0; BEQ loop1done; MOVD R9, CTR
 326  loop1cont:
 327  	// unroll 1X
 328  	MOVD 0(R6), R10
 329  	MOVD 0(R7), R11
 330  	// multiply
 331  	MULHDU R3, R11, R12
 332  	MULLD R3, R11
 333  	ADDC R4, R11
 334  	ADDE R0, R12, R4
 335  	// add
 336  	ADDC R10, R11
 337  	ADDE R0, R4
 338  	MOVD R11, 0(R8)
 339  	ADD $8, R6
 340  	ADD $8, R7
 341  	ADD $8, R8
 342  	BDNZ loop1cont
 343  loop1done:
 344  loop4:
 345  	CMP R5, $0; BEQ loop4done; MOVD R5, CTR
 346  loop4cont:
 347  	// unroll 4X
 348  	MOVD 0(R6), R9
 349  	MOVD 8(R6), R10
 350  	MOVD 16(R6), R11
 351  	MOVD 24(R6), R12
 352  	MOVD 0(R7), R14
 353  	MOVD 8(R7), R15
 354  	MOVD 16(R7), R16
 355  	MOVD 24(R7), R17
 356  	// multiply
 357  	MULHDU R3, R14, R18
 358  	MULLD R3, R14
 359  	ADDC R4, R14
 360  	MULHDU R3, R15, R19
 361  	MULLD R3, R15
 362  	ADDE R18, R15
 363  	MULHDU R3, R16, R18
 364  	MULLD R3, R16
 365  	ADDE R19, R16
 366  	MULHDU R3, R17, R19
 367  	MULLD R3, R17
 368  	ADDE R18, R17
 369  	ADDE R0, R19, R4
 370  	// add
 371  	ADDC R9, R14
 372  	ADDE R10, R15
 373  	ADDE R11, R16
 374  	ADDE R12, R17
 375  	ADDE R0, R4
 376  	MOVD R14, 0(R8)
 377  	MOVD R15, 8(R8)
 378  	MOVD R16, 16(R8)
 379  	MOVD R17, 24(R8)
 380  	ADD $32, R6
 381  	ADD $32, R7
 382  	ADD $32, R8
 383  	BDNZ loop4cont
 384  loop4done:
 385  	MOVD R4, c+88(FP)
 386  	RET
 387