arith_s390x.s raw

   1  // Copyright 2025 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
   6  
   7  //go:build !math_big_pure_go
   8  
   9  #include "textflag.h"
  10  
  11  // func addVV(z, x, y []Word) (c Word)
  12  TEXT ·addVV(SB), NOSPLIT, $0
  13  	MOVB ·hasVX(SB), R1
  14  	CMPBEQ R1, $0, novec
  15  	JMP ·addVVvec(SB)
  16  novec:
  17  	MOVD $0, R0
  18  	MOVD z_len+8(FP), R1
  19  	MOVD x_base+24(FP), R2
  20  	MOVD y_base+48(FP), R3
  21  	MOVD z_base+0(FP), R4
  22  	// compute unrolled loop lengths
  23  	MOVD R1, R5
  24  	AND $3, R5
  25  	SRD $2, R1
  26  	ADDC R0, R1	// clear carry
  27  loop1:
  28  	CMPBEQ R5, $0, loop1done
  29  loop1cont:
  30  	// unroll 1X
  31  	MOVD 0(R2), R6
  32  	MOVD 0(R3), R7
  33  	ADDE R7, R6
  34  	MOVD R6, 0(R4)
  35  	LAY 8(R2), R2	// ADD $8, R2
  36  	LAY 8(R3), R3	// ADD $8, R3
  37  	LAY 8(R4), R4	// ADD $8, R4
  38  	LAY -1(R5), R5	// ADD $-1, R5
  39  	CMPBNE R5, $0, loop1cont
  40  loop1done:
  41  loop4:
  42  	CMPBEQ R1, $0, loop4done
  43  loop4cont:
  44  	// unroll 4X in batches of 2
  45  	MOVD 0(R2), R5
  46  	MOVD 8(R2), R6
  47  	MOVD 0(R3), R7
  48  	MOVD 8(R3), R8
  49  	ADDE R7, R5
  50  	ADDE R8, R6
  51  	MOVD R5, 0(R4)
  52  	MOVD R6, 8(R4)
  53  	MOVD 16(R2), R5
  54  	MOVD 24(R2), R6
  55  	MOVD 16(R3), R7
  56  	MOVD 24(R3), R8
  57  	ADDE R7, R5
  58  	ADDE R8, R6
  59  	MOVD R5, 16(R4)
  60  	MOVD R6, 24(R4)
  61  	LAY 32(R2), R2	// ADD $32, R2
  62  	LAY 32(R3), R3	// ADD $32, R3
  63  	LAY 32(R4), R4	// ADD $32, R4
  64  	LAY -1(R1), R1	// ADD $-1, R1
  65  	CMPBNE R1, $0, loop4cont
  66  loop4done:
  67  	ADDE R0, R0, R2	// save & convert add carry
  68  	MOVD R2, c+72(FP)
  69  	RET
  70  
  71  // func subVV(z, x, y []Word) (c Word)
  72  TEXT ·subVV(SB), NOSPLIT, $0
  73  	MOVB ·hasVX(SB), R1
  74  	CMPBEQ R1, $0, novec
  75  	JMP ·subVVvec(SB)
  76  novec:
  77  	MOVD $0, R0
  78  	MOVD z_len+8(FP), R1
  79  	MOVD x_base+24(FP), R2
  80  	MOVD y_base+48(FP), R3
  81  	MOVD z_base+0(FP), R4
  82  	// compute unrolled loop lengths
  83  	MOVD R1, R5
  84  	AND $3, R5
  85  	SRD $2, R1
  86  	SUBC R0, R1	// clear carry
  87  loop1:
  88  	CMPBEQ R5, $0, loop1done
  89  loop1cont:
  90  	// unroll 1X
  91  	MOVD 0(R2), R6
  92  	MOVD 0(R3), R7
  93  	SUBE R7, R6
  94  	MOVD R6, 0(R4)
  95  	LAY 8(R2), R2	// ADD $8, R2
  96  	LAY 8(R3), R3	// ADD $8, R3
  97  	LAY 8(R4), R4	// ADD $8, R4
  98  	LAY -1(R5), R5	// ADD $-1, R5
  99  	CMPBNE R5, $0, loop1cont
 100  loop1done:
 101  loop4:
 102  	CMPBEQ R1, $0, loop4done
 103  loop4cont:
 104  	// unroll 4X in batches of 2
 105  	MOVD 0(R2), R5
 106  	MOVD 8(R2), R6
 107  	MOVD 0(R3), R7
 108  	MOVD 8(R3), R8
 109  	SUBE R7, R5
 110  	SUBE R8, R6
 111  	MOVD R5, 0(R4)
 112  	MOVD R6, 8(R4)
 113  	MOVD 16(R2), R5
 114  	MOVD 24(R2), R6
 115  	MOVD 16(R3), R7
 116  	MOVD 24(R3), R8
 117  	SUBE R7, R5
 118  	SUBE R8, R6
 119  	MOVD R5, 16(R4)
 120  	MOVD R6, 24(R4)
 121  	LAY 32(R2), R2	// ADD $32, R2
 122  	LAY 32(R3), R3	// ADD $32, R3
 123  	LAY 32(R4), R4	// ADD $32, R4
 124  	LAY -1(R1), R1	// ADD $-1, R1
 125  	CMPBNE R1, $0, loop4cont
 126  loop4done:
 127  	SUBE R2, R2	// save carry
 128  	NEG R2	// convert sub carry
 129  	MOVD R2, c+72(FP)
 130  	RET
 131  
 132  // func lshVU(z, x []Word, s uint) (c Word)
 133  TEXT ·lshVU(SB), NOSPLIT, $0
 134  	MOVD $0, R0
 135  	MOVD z_len+8(FP), R1
 136  	CMPBEQ R1, $0, ret0
 137  	MOVD s+48(FP), R2
 138  	MOVD x_base+24(FP), R3
 139  	MOVD z_base+0(FP), R4
 140  	// run loop backward
 141  	SLD $3, R1, R5
 142  	LAY (R5)(R3), R3	// ADD R5, R3
 143  	SLD $3, R1, R5
 144  	LAY (R5)(R4), R4	// ADD R5, R4
 145  	// shift first word into carry
 146  	MOVD -8(R3), R5
 147  	MOVD $64, R6
 148  	SUBC R2, R6
 149  	SRD R6, R5, R7
 150  	SLD R2, R5
 151  	MOVD R7, c+56(FP)
 152  	// shift remaining words
 153  	SUBC $1, R1
 154  	// compute unrolled loop lengths
 155  	MOVD R1, R7
 156  	AND $3, R7
 157  	SRD $2, R1
 158  loop1:
 159  	CMPBEQ R7, $0, loop1done
 160  loop1cont:
 161  	// unroll 1X
 162  	MOVD -16(R3), R8
 163  	SRD R6, R8, R9
 164  	OR R5, R9
 165  	SLD R2, R8, R5
 166  	MOVD R9, -8(R4)
 167  	LAY -8(R3), R3	// ADD $-8, R3
 168  	LAY -8(R4), R4	// ADD $-8, R4
 169  	LAY -1(R7), R7	// ADD $-1, R7
 170  	CMPBNE R7, $0, loop1cont
 171  loop1done:
 172  loop4:
 173  	CMPBEQ R1, $0, loop4done
 174  loop4cont:
 175  	// unroll 4X in batches of 2
 176  	MOVD -16(R3), R7
 177  	MOVD -24(R3), R8
 178  	SRD R6, R7, R9
 179  	OR R5, R9
 180  	SLD R2, R7, R5
 181  	SRD R6, R8, R7
 182  	OR R5, R7
 183  	SLD R2, R8, R5
 184  	MOVD R9, -8(R4)
 185  	MOVD R7, -16(R4)
 186  	MOVD -32(R3), R7
 187  	MOVD -40(R3), R8
 188  	SRD R6, R7, R9
 189  	OR R5, R9
 190  	SLD R2, R7, R5
 191  	SRD R6, R8, R7
 192  	OR R5, R7
 193  	SLD R2, R8, R5
 194  	MOVD R9, -24(R4)
 195  	MOVD R7, -32(R4)
 196  	LAY -32(R3), R3	// ADD $-32, R3
 197  	LAY -32(R4), R4	// ADD $-32, R4
 198  	LAY -1(R1), R1	// ADD $-1, R1
 199  	CMPBNE R1, $0, loop4cont
 200  loop4done:
 201  	// store final shifted bits
 202  	MOVD R5, -8(R4)
 203  	RET
 204  ret0:
 205  	MOVD R0, c+56(FP)
 206  	RET
 207  
 208  // func rshVU(z, x []Word, s uint) (c Word)
 209  TEXT ·rshVU(SB), NOSPLIT, $0
 210  	MOVD $0, R0
 211  	MOVD z_len+8(FP), R1
 212  	CMPBEQ R1, $0, ret0
 213  	MOVD s+48(FP), R2
 214  	MOVD x_base+24(FP), R3
 215  	MOVD z_base+0(FP), R4
 216  	// shift first word into carry
 217  	MOVD 0(R3), R5
 218  	MOVD $64, R6
 219  	SUBC R2, R6
 220  	SLD R6, R5, R7
 221  	SRD R2, R5
 222  	MOVD R7, c+56(FP)
 223  	// shift remaining words
 224  	SUBC $1, R1
 225  	// compute unrolled loop lengths
 226  	MOVD R1, R7
 227  	AND $3, R7
 228  	SRD $2, R1
 229  loop1:
 230  	CMPBEQ R7, $0, loop1done
 231  loop1cont:
 232  	// unroll 1X
 233  	MOVD 8(R3), R8
 234  	SLD R6, R8, R9
 235  	OR R5, R9
 236  	SRD R2, R8, R5
 237  	MOVD R9, 0(R4)
 238  	LAY 8(R3), R3	// ADD $8, R3
 239  	LAY 8(R4), R4	// ADD $8, R4
 240  	LAY -1(R7), R7	// ADD $-1, R7
 241  	CMPBNE R7, $0, loop1cont
 242  loop1done:
 243  loop4:
 244  	CMPBEQ R1, $0, loop4done
 245  loop4cont:
 246  	// unroll 4X in batches of 2
 247  	MOVD 8(R3), R7
 248  	MOVD 16(R3), R8
 249  	SLD R6, R7, R9
 250  	OR R5, R9
 251  	SRD R2, R7, R5
 252  	SLD R6, R8, R7
 253  	OR R5, R7
 254  	SRD R2, R8, R5
 255  	MOVD R9, 0(R4)
 256  	MOVD R7, 8(R4)
 257  	MOVD 24(R3), R7
 258  	MOVD 32(R3), R8
 259  	SLD R6, R7, R9
 260  	OR R5, R9
 261  	SRD R2, R7, R5
 262  	SLD R6, R8, R7
 263  	OR R5, R7
 264  	SRD R2, R8, R5
 265  	MOVD R9, 16(R4)
 266  	MOVD R7, 24(R4)
 267  	LAY 32(R3), R3	// ADD $32, R3
 268  	LAY 32(R4), R4	// ADD $32, R4
 269  	LAY -1(R1), R1	// ADD $-1, R1
 270  	CMPBNE R1, $0, loop4cont
 271  loop4done:
 272  	// store final shifted bits
 273  	MOVD R5, 0(R4)
 274  	RET
 275  ret0:
 276  	MOVD R0, c+56(FP)
 277  	RET
 278  
 279  // func mulAddVWW(z, x []Word, m, a Word) (c Word)
 280  TEXT ·mulAddVWW(SB), NOSPLIT, $0
 281  	MOVD $0, R0
 282  	MOVD m+48(FP), R1
 283  	MOVD a+56(FP), R2
 284  	MOVD z_len+8(FP), R3
 285  	MOVD x_base+24(FP), R4
 286  	MOVD z_base+0(FP), R5
 287  	// compute unrolled loop lengths
 288  	MOVD R3, R6
 289  	AND $3, R6
 290  	SRD $2, R3
 291  loop1:
 292  	CMPBEQ R6, $0, loop1done
 293  loop1cont:
 294  	// unroll 1X in batches of 1
 295  	MOVD 0(R4), R11
 296  	// multiply
 297  	MLGR R1, R10
 298  	ADDC R2, R11
 299  	ADDE R0, R10, R2
 300  	MOVD R11, 0(R5)
 301  	LAY 8(R4), R4	// ADD $8, R4
 302  	LAY 8(R5), R5	// ADD $8, R5
 303  	LAY -1(R6), R6	// ADD $-1, R6
 304  	CMPBNE R6, $0, loop1cont
 305  loop1done:
 306  loop4:
 307  	CMPBEQ R3, $0, loop4done
 308  loop4cont:
 309  	// unroll 4X in batches of 1
 310  	MOVD 0(R4), R11
 311  	// multiply
 312  	MLGR R1, R10
 313  	ADDC R2, R11
 314  	ADDE R0, R10, R2
 315  	MOVD R11, 0(R5)
 316  	MOVD 8(R4), R11
 317  	// multiply
 318  	MLGR R1, R10
 319  	ADDC R2, R11
 320  	ADDE R0, R10, R2
 321  	MOVD R11, 8(R5)
 322  	MOVD 16(R4), R11
 323  	// multiply
 324  	MLGR R1, R10
 325  	ADDC R2, R11
 326  	ADDE R0, R10, R2
 327  	MOVD R11, 16(R5)
 328  	MOVD 24(R4), R11
 329  	// multiply
 330  	MLGR R1, R10
 331  	ADDC R2, R11
 332  	ADDE R0, R10, R2
 333  	MOVD R11, 24(R5)
 334  	LAY 32(R4), R4	// ADD $32, R4
 335  	LAY 32(R5), R5	// ADD $32, R5
 336  	LAY -1(R3), R3	// ADD $-1, R3
 337  	CMPBNE R3, $0, loop4cont
 338  loop4done:
 339  	MOVD R2, c+64(FP)
 340  	RET
 341  
 342  // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
 343  TEXT ·addMulVVWW(SB), NOSPLIT, $0
 344  	MOVD $0, R0
 345  	MOVD m+72(FP), R1
 346  	MOVD a+80(FP), R2
 347  	MOVD z_len+8(FP), R3
 348  	MOVD x_base+24(FP), R4
 349  	MOVD y_base+48(FP), R5
 350  	MOVD z_base+0(FP), R6
 351  	// compute unrolled loop lengths
 352  	MOVD R3, R7
 353  	AND $3, R7
 354  	SRD $2, R3
 355  loop1:
 356  	CMPBEQ R7, $0, loop1done
 357  loop1cont:
 358  	// unroll 1X in batches of 1
 359  	MOVD 0(R4), R8
 360  	MOVD 0(R5), R11
 361  	// multiply
 362  	MLGR R1, R10
 363  	ADDC R2, R11
 364  	ADDE R0, R10, R2
 365  	// add
 366  	ADDC R8, R11
 367  	ADDE R0, R2
 368  	MOVD R11, 0(R6)
 369  	LAY 8(R4), R4	// ADD $8, R4
 370  	LAY 8(R5), R5	// ADD $8, R5
 371  	LAY 8(R6), R6	// ADD $8, R6
 372  	LAY -1(R7), R7	// ADD $-1, R7
 373  	CMPBNE R7, $0, loop1cont
 374  loop1done:
 375  loop4:
 376  	CMPBEQ R3, $0, loop4done
 377  loop4cont:
 378  	// unroll 4X in batches of 1
 379  	MOVD 0(R4), R7
 380  	MOVD 0(R5), R11
 381  	// multiply
 382  	MLGR R1, R10
 383  	ADDC R2, R11
 384  	ADDE R0, R10, R2
 385  	// add
 386  	ADDC R7, R11
 387  	ADDE R0, R2
 388  	MOVD R11, 0(R6)
 389  	MOVD 8(R4), R7
 390  	MOVD 8(R5), R11
 391  	// multiply
 392  	MLGR R1, R10
 393  	ADDC R2, R11
 394  	ADDE R0, R10, R2
 395  	// add
 396  	ADDC R7, R11
 397  	ADDE R0, R2
 398  	MOVD R11, 8(R6)
 399  	MOVD 16(R4), R7
 400  	MOVD 16(R5), R11
 401  	// multiply
 402  	MLGR R1, R10
 403  	ADDC R2, R11
 404  	ADDE R0, R10, R2
 405  	// add
 406  	ADDC R7, R11
 407  	ADDE R0, R2
 408  	MOVD R11, 16(R6)
 409  	MOVD 24(R4), R7
 410  	MOVD 24(R5), R11
 411  	// multiply
 412  	MLGR R1, R10
 413  	ADDC R2, R11
 414  	ADDE R0, R10, R2
 415  	// add
 416  	ADDC R7, R11
 417  	ADDE R0, R2
 418  	MOVD R11, 24(R6)
 419  	LAY 32(R4), R4	// ADD $32, R4
 420  	LAY 32(R5), R5	// ADD $32, R5
 421  	LAY 32(R6), R6	// ADD $32, R6
 422  	LAY -1(R3), R3	// ADD $-1, R3
 423  	CMPBNE R3, $0, loop4cont
 424  loop4done:
 425  	MOVD R2, c+88(FP)
 426  	RET
 427