arith_mipsx.s raw

   1  // Copyright 2025 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
   6  
   7  //go:build !math_big_pure_go && (mips || mipsle)
   8  
   9  #include "textflag.h"
  10  
  11  // func addVV(z, x, y []Word) (c Word)
  12  TEXT ·addVV(SB), NOSPLIT, $0
  13  	MOVW z_len+4(FP), R1
  14  	MOVW x_base+12(FP), R2
  15  	MOVW y_base+24(FP), R3
  16  	MOVW z_base+0(FP), R4
  17  	// compute unrolled loop lengths
  18  	AND $3, R1, R5
  19  	SRL $2, R1
  20  	XOR R24, R24	// clear carry
  21  loop1:
  22  	BEQ R5, loop1done
  23  loop1cont:
  24  	// unroll 1X
  25  	MOVW 0(R2), R6
  26  	MOVW 0(R3), R7
  27  	ADDU R7, R6	// ADCS R7, R6, R6 (cr=R24)
  28  	SGTU R7, R6, R23	// ...
  29  	ADDU R24, R6	// ...
  30  	SGTU R24, R6, R24	// ...
  31  	ADDU R23, R24	// ...
  32  	MOVW R6, 0(R4)
  33  	ADDU $4, R2
  34  	ADDU $4, R3
  35  	ADDU $4, R4
  36  	SUBU $1, R5
  37  	BNE R5, loop1cont
  38  loop1done:
  39  loop4:
  40  	BEQ R1, loop4done
  41  loop4cont:
  42  	// unroll 4X
  43  	MOVW 0(R2), R5
  44  	MOVW 4(R2), R6
  45  	MOVW 8(R2), R7
  46  	MOVW 12(R2), R8
  47  	MOVW 0(R3), R9
  48  	MOVW 4(R3), R10
  49  	MOVW 8(R3), R11
  50  	MOVW 12(R3), R12
  51  	ADDU R9, R5	// ADCS R9, R5, R5 (cr=R24)
  52  	SGTU R9, R5, R23	// ...
  53  	ADDU R24, R5	// ...
  54  	SGTU R24, R5, R24	// ...
  55  	ADDU R23, R24	// ...
  56  	ADDU R10, R6	// ADCS R10, R6, R6 (cr=R24)
  57  	SGTU R10, R6, R23	// ...
  58  	ADDU R24, R6	// ...
  59  	SGTU R24, R6, R24	// ...
  60  	ADDU R23, R24	// ...
  61  	ADDU R11, R7	// ADCS R11, R7, R7 (cr=R24)
  62  	SGTU R11, R7, R23	// ...
  63  	ADDU R24, R7	// ...
  64  	SGTU R24, R7, R24	// ...
  65  	ADDU R23, R24	// ...
  66  	ADDU R12, R8	// ADCS R12, R8, R8 (cr=R24)
  67  	SGTU R12, R8, R23	// ...
  68  	ADDU R24, R8	// ...
  69  	SGTU R24, R8, R24	// ...
  70  	ADDU R23, R24	// ...
  71  	MOVW R5, 0(R4)
  72  	MOVW R6, 4(R4)
  73  	MOVW R7, 8(R4)
  74  	MOVW R8, 12(R4)
  75  	ADDU $16, R2
  76  	ADDU $16, R3
  77  	ADDU $16, R4
  78  	SUBU $1, R1
  79  	BNE R1, loop4cont
  80  loop4done:
  81  	MOVW R24, c+36(FP)
  82  	RET
  83  
  84  // func subVV(z, x, y []Word) (c Word)
  85  TEXT ·subVV(SB), NOSPLIT, $0
  86  	MOVW z_len+4(FP), R1
  87  	MOVW x_base+12(FP), R2
  88  	MOVW y_base+24(FP), R3
  89  	MOVW z_base+0(FP), R4
  90  	// compute unrolled loop lengths
  91  	AND $3, R1, R5
  92  	SRL $2, R1
  93  	XOR R24, R24	// clear carry
  94  loop1:
  95  	BEQ R5, loop1done
  96  loop1cont:
  97  	// unroll 1X
  98  	MOVW 0(R2), R6
  99  	MOVW 0(R3), R7
 100  	SGTU R24, R6, R23	// SBCS R7, R6, R6
 101  	SUBU R24, R6	// ...
 102  	SGTU R7, R6, R24	// ...
 103  	SUBU R7, R6	// ...
 104  	ADDU R23, R24	// ...
 105  	MOVW R6, 0(R4)
 106  	ADDU $4, R2
 107  	ADDU $4, R3
 108  	ADDU $4, R4
 109  	SUBU $1, R5
 110  	BNE R5, loop1cont
 111  loop1done:
 112  loop4:
 113  	BEQ R1, loop4done
 114  loop4cont:
 115  	// unroll 4X
 116  	MOVW 0(R2), R5
 117  	MOVW 4(R2), R6
 118  	MOVW 8(R2), R7
 119  	MOVW 12(R2), R8
 120  	MOVW 0(R3), R9
 121  	MOVW 4(R3), R10
 122  	MOVW 8(R3), R11
 123  	MOVW 12(R3), R12
 124  	SGTU R24, R5, R23	// SBCS R9, R5, R5
 125  	SUBU R24, R5	// ...
 126  	SGTU R9, R5, R24	// ...
 127  	SUBU R9, R5	// ...
 128  	ADDU R23, R24	// ...
 129  	SGTU R24, R6, R23	// SBCS R10, R6, R6
 130  	SUBU R24, R6	// ...
 131  	SGTU R10, R6, R24	// ...
 132  	SUBU R10, R6	// ...
 133  	ADDU R23, R24	// ...
 134  	SGTU R24, R7, R23	// SBCS R11, R7, R7
 135  	SUBU R24, R7	// ...
 136  	SGTU R11, R7, R24	// ...
 137  	SUBU R11, R7	// ...
 138  	ADDU R23, R24	// ...
 139  	SGTU R24, R8, R23	// SBCS R12, R8, R8
 140  	SUBU R24, R8	// ...
 141  	SGTU R12, R8, R24	// ...
 142  	SUBU R12, R8	// ...
 143  	ADDU R23, R24	// ...
 144  	MOVW R5, 0(R4)
 145  	MOVW R6, 4(R4)
 146  	MOVW R7, 8(R4)
 147  	MOVW R8, 12(R4)
 148  	ADDU $16, R2
 149  	ADDU $16, R3
 150  	ADDU $16, R4
 151  	SUBU $1, R1
 152  	BNE R1, loop4cont
 153  loop4done:
 154  	MOVW R24, c+36(FP)
 155  	RET
 156  
 157  // func lshVU(z, x []Word, s uint) (c Word)
 158  TEXT ·lshVU(SB), NOSPLIT, $0
 159  	MOVW z_len+4(FP), R1
 160  	BEQ R1, ret0
 161  	MOVW s+24(FP), R2
 162  	MOVW x_base+12(FP), R3
 163  	MOVW z_base+0(FP), R4
 164  	// run loop backward
 165  	SLL $2, R1, R5
 166  	ADDU R5, R3
 167  	SLL $2, R1, R5
 168  	ADDU R5, R4
 169  	// shift first word into carry
 170  	MOVW -4(R3), R5
 171  	MOVW $32, R6
 172  	SUBU R2, R6
 173  	SRL R6, R5, R7
 174  	SLL R2, R5
 175  	MOVW R7, c+28(FP)
 176  	// shift remaining words
 177  	SUBU $1, R1
 178  	// compute unrolled loop lengths
 179  	AND $3, R1, R7
 180  	SRL $2, R1
 181  loop1:
 182  	BEQ R7, loop1done
 183  loop1cont:
 184  	// unroll 1X
 185  	MOVW -8(R3), R8
 186  	SRL R6, R8, R9
 187  	OR R5, R9
 188  	SLL R2, R8, R5
 189  	MOVW R9, -4(R4)
 190  	ADDU $-4, R3
 191  	ADDU $-4, R4
 192  	SUBU $1, R7
 193  	BNE R7, loop1cont
 194  loop1done:
 195  loop4:
 196  	BEQ R1, loop4done
 197  loop4cont:
 198  	// unroll 4X
 199  	MOVW -8(R3), R7
 200  	MOVW -12(R3), R8
 201  	MOVW -16(R3), R9
 202  	MOVW -20(R3), R10
 203  	SRL R6, R7, R11
 204  	OR R5, R11
 205  	SLL R2, R7, R5
 206  	SRL R6, R8, R7
 207  	OR R5, R7
 208  	SLL R2, R8, R5
 209  	SRL R6, R9, R8
 210  	OR R5, R8
 211  	SLL R2, R9, R5
 212  	SRL R6, R10, R9
 213  	OR R5, R9
 214  	SLL R2, R10, R5
 215  	MOVW R11, -4(R4)
 216  	MOVW R7, -8(R4)
 217  	MOVW R8, -12(R4)
 218  	MOVW R9, -16(R4)
 219  	ADDU $-16, R3
 220  	ADDU $-16, R4
 221  	SUBU $1, R1
 222  	BNE R1, loop4cont
 223  loop4done:
 224  	// store final shifted bits
 225  	MOVW R5, -4(R4)
 226  	RET
 227  ret0:
 228  	MOVW R0, c+28(FP)
 229  	RET
 230  
 231  // func rshVU(z, x []Word, s uint) (c Word)
 232  TEXT ·rshVU(SB), NOSPLIT, $0
 233  	MOVW z_len+4(FP), R1
 234  	BEQ R1, ret0
 235  	MOVW s+24(FP), R2
 236  	MOVW x_base+12(FP), R3
 237  	MOVW z_base+0(FP), R4
 238  	// shift first word into carry
 239  	MOVW 0(R3), R5
 240  	MOVW $32, R6
 241  	SUBU R2, R6
 242  	SLL R6, R5, R7
 243  	SRL R2, R5
 244  	MOVW R7, c+28(FP)
 245  	// shift remaining words
 246  	SUBU $1, R1
 247  	// compute unrolled loop lengths
 248  	AND $3, R1, R7
 249  	SRL $2, R1
 250  loop1:
 251  	BEQ R7, loop1done
 252  loop1cont:
 253  	// unroll 1X
 254  	MOVW 4(R3), R8
 255  	SLL R6, R8, R9
 256  	OR R5, R9
 257  	SRL R2, R8, R5
 258  	MOVW R9, 0(R4)
 259  	ADDU $4, R3
 260  	ADDU $4, R4
 261  	SUBU $1, R7
 262  	BNE R7, loop1cont
 263  loop1done:
 264  loop4:
 265  	BEQ R1, loop4done
 266  loop4cont:
 267  	// unroll 4X
 268  	MOVW 4(R3), R7
 269  	MOVW 8(R3), R8
 270  	MOVW 12(R3), R9
 271  	MOVW 16(R3), R10
 272  	SLL R6, R7, R11
 273  	OR R5, R11
 274  	SRL R2, R7, R5
 275  	SLL R6, R8, R7
 276  	OR R5, R7
 277  	SRL R2, R8, R5
 278  	SLL R6, R9, R8
 279  	OR R5, R8
 280  	SRL R2, R9, R5
 281  	SLL R6, R10, R9
 282  	OR R5, R9
 283  	SRL R2, R10, R5
 284  	MOVW R11, 0(R4)
 285  	MOVW R7, 4(R4)
 286  	MOVW R8, 8(R4)
 287  	MOVW R9, 12(R4)
 288  	ADDU $16, R3
 289  	ADDU $16, R4
 290  	SUBU $1, R1
 291  	BNE R1, loop4cont
 292  loop4done:
 293  	// store final shifted bits
 294  	MOVW R5, 0(R4)
 295  	RET
 296  ret0:
 297  	MOVW R0, c+28(FP)
 298  	RET
 299  
 300  // func mulAddVWW(z, x []Word, m, a Word) (c Word)
 301  TEXT ·mulAddVWW(SB), NOSPLIT, $0
 302  	MOVW m+24(FP), R1
 303  	MOVW a+28(FP), R2
 304  	MOVW z_len+4(FP), R3
 305  	MOVW x_base+12(FP), R4
 306  	MOVW z_base+0(FP), R5
 307  	// compute unrolled loop lengths
 308  	AND $3, R3, R6
 309  	SRL $2, R3
 310  loop1:
 311  	BEQ R6, loop1done
 312  loop1cont:
 313  	// unroll 1X
 314  	MOVW 0(R4), R7
 315  	// synthetic carry, one column at a time
 316  	MULU R1, R7
 317  	MOVW LO, R8
 318  	MOVW HI, R9
 319  	ADDU R2, R8, R7	// ADDS R2, R8, R7 (cr=R24)
 320  	SGTU R2, R7, R24	// ...
 321  	ADDU R24, R9, R2	// ADC $0, R9, R2
 322  	MOVW R7, 0(R5)
 323  	ADDU $4, R4
 324  	ADDU $4, R5
 325  	SUBU $1, R6
 326  	BNE R6, loop1cont
 327  loop1done:
 328  loop4:
 329  	BEQ R3, loop4done
 330  loop4cont:
 331  	// unroll 4X
 332  	MOVW 0(R4), R6
 333  	MOVW 4(R4), R7
 334  	MOVW 8(R4), R8
 335  	MOVW 12(R4), R9
 336  	// synthetic carry, one column at a time
 337  	MULU R1, R6
 338  	MOVW LO, R10
 339  	MOVW HI, R11
 340  	ADDU R2, R10, R6	// ADDS R2, R10, R6 (cr=R24)
 341  	SGTU R2, R6, R24	// ...
 342  	ADDU R24, R11, R2	// ADC $0, R11, R2
 343  	MULU R1, R7
 344  	MOVW LO, R10
 345  	MOVW HI, R11
 346  	ADDU R2, R10, R7	// ADDS R2, R10, R7 (cr=R24)
 347  	SGTU R2, R7, R24	// ...
 348  	ADDU R24, R11, R2	// ADC $0, R11, R2
 349  	MULU R1, R8
 350  	MOVW LO, R10
 351  	MOVW HI, R11
 352  	ADDU R2, R10, R8	// ADDS R2, R10, R8 (cr=R24)
 353  	SGTU R2, R8, R24	// ...
 354  	ADDU R24, R11, R2	// ADC $0, R11, R2
 355  	MULU R1, R9
 356  	MOVW LO, R10
 357  	MOVW HI, R11
 358  	ADDU R2, R10, R9	// ADDS R2, R10, R9 (cr=R24)
 359  	SGTU R2, R9, R24	// ...
 360  	ADDU R24, R11, R2	// ADC $0, R11, R2
 361  	MOVW R6, 0(R5)
 362  	MOVW R7, 4(R5)
 363  	MOVW R8, 8(R5)
 364  	MOVW R9, 12(R5)
 365  	ADDU $16, R4
 366  	ADDU $16, R5
 367  	SUBU $1, R3
 368  	BNE R3, loop4cont
 369  loop4done:
 370  	MOVW R2, c+32(FP)
 371  	RET
 372  
 373  // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
 374  TEXT ·addMulVVWW(SB), NOSPLIT, $0
 375  	MOVW m+36(FP), R1
 376  	MOVW a+40(FP), R2
 377  	MOVW z_len+4(FP), R3
 378  	MOVW x_base+12(FP), R4
 379  	MOVW y_base+24(FP), R5
 380  	MOVW z_base+0(FP), R6
 381  	// compute unrolled loop lengths
 382  	AND $3, R3, R7
 383  	SRL $2, R3
 384  loop1:
 385  	BEQ R7, loop1done
 386  loop1cont:
 387  	// unroll 1X
 388  	MOVW 0(R4), R8
 389  	MOVW 0(R5), R9
 390  	// synthetic carry, one column at a time
 391  	MULU R1, R9
 392  	MOVW LO, R10
 393  	MOVW HI, R11
 394  	ADDU R8, R10	// ADDS R8, R10, R10 (cr=R24)
 395  	SGTU R8, R10, R24	// ...
 396  	ADDU R24, R11	// ADC $0, R11, R11
 397  	ADDU R2, R10, R9	// ADDS R2, R10, R9 (cr=R24)
 398  	SGTU R2, R9, R24	// ...
 399  	ADDU R24, R11, R2	// ADC $0, R11, R2
 400  	MOVW R9, 0(R6)
 401  	ADDU $4, R4
 402  	ADDU $4, R5
 403  	ADDU $4, R6
 404  	SUBU $1, R7
 405  	BNE R7, loop1cont
 406  loop1done:
 407  loop4:
 408  	BEQ R3, loop4done
 409  loop4cont:
 410  	// unroll 4X
 411  	MOVW 0(R4), R7
 412  	MOVW 4(R4), R8
 413  	MOVW 8(R4), R9
 414  	MOVW 12(R4), R10
 415  	MOVW 0(R5), R11
 416  	MOVW 4(R5), R12
 417  	MOVW 8(R5), R13
 418  	MOVW 12(R5), R14
 419  	// synthetic carry, one column at a time
 420  	MULU R1, R11
 421  	MOVW LO, R15
 422  	MOVW HI, R16
 423  	ADDU R7, R15	// ADDS R7, R15, R15 (cr=R24)
 424  	SGTU R7, R15, R24	// ...
 425  	ADDU R24, R16	// ADC $0, R16, R16
 426  	ADDU R2, R15, R11	// ADDS R2, R15, R11 (cr=R24)
 427  	SGTU R2, R11, R24	// ...
 428  	ADDU R24, R16, R2	// ADC $0, R16, R2
 429  	MULU R1, R12
 430  	MOVW LO, R15
 431  	MOVW HI, R16
 432  	ADDU R8, R15	// ADDS R8, R15, R15 (cr=R24)
 433  	SGTU R8, R15, R24	// ...
 434  	ADDU R24, R16	// ADC $0, R16, R16
 435  	ADDU R2, R15, R12	// ADDS R2, R15, R12 (cr=R24)
 436  	SGTU R2, R12, R24	// ...
 437  	ADDU R24, R16, R2	// ADC $0, R16, R2
 438  	MULU R1, R13
 439  	MOVW LO, R15
 440  	MOVW HI, R16
 441  	ADDU R9, R15	// ADDS R9, R15, R15 (cr=R24)
 442  	SGTU R9, R15, R24	// ...
 443  	ADDU R24, R16	// ADC $0, R16, R16
 444  	ADDU R2, R15, R13	// ADDS R2, R15, R13 (cr=R24)
 445  	SGTU R2, R13, R24	// ...
 446  	ADDU R24, R16, R2	// ADC $0, R16, R2
 447  	MULU R1, R14
 448  	MOVW LO, R15
 449  	MOVW HI, R16
 450  	ADDU R10, R15	// ADDS R10, R15, R15 (cr=R24)
 451  	SGTU R10, R15, R24	// ...
 452  	ADDU R24, R16	// ADC $0, R16, R16
 453  	ADDU R2, R15, R14	// ADDS R2, R15, R14 (cr=R24)
 454  	SGTU R2, R14, R24	// ...
 455  	ADDU R24, R16, R2	// ADC $0, R16, R2
 456  	MOVW R11, 0(R6)
 457  	MOVW R12, 4(R6)
 458  	MOVW R13, 8(R6)
 459  	MOVW R14, 12(R6)
 460  	ADDU $16, R4
 461  	ADDU $16, R5
 462  	ADDU $16, R6
 463  	SUBU $1, R3
 464  	BNE R3, loop4cont
 465  loop4done:
 466  	MOVW R2, c+44(FP)
 467  	RET
 468