arith_arm.s raw

   1  // Copyright 2025 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
   6  
   7  //go:build !math_big_pure_go
   8  
   9  #include "textflag.h"
  10  
  11  // func addVV(z, x, y []Word) (c Word)
  12  TEXT ·addVV(SB), NOSPLIT, $0
  13  	MOVW z_len+4(FP), R0
  14  	MOVW x_base+12(FP), R1
  15  	MOVW y_base+24(FP), R2
  16  	MOVW z_base+0(FP), R3
  17  	// compute unrolled loop lengths
  18  	AND $3, R0, R4
  19  	MOVW R0>>2, R0
  20  	ADD.S $0, R0	// clear carry
  21  loop1:
  22  	TEQ $0, R4; BEQ loop1done
  23  loop1cont:
  24  	// unroll 1X
  25  	MOVW.P 4(R1), R5
  26  	MOVW.P 4(R2), R6
  27  	ADC.S R6, R5
  28  	MOVW.P R5, 4(R3)
  29  	SUB $1, R4
  30  	TEQ $0, R4; BNE loop1cont
  31  loop1done:
  32  loop4:
  33  	TEQ $0, R0; BEQ loop4done
  34  loop4cont:
  35  	// unroll 4X
  36  	MOVW.P 4(R1), R4
  37  	MOVW.P 4(R1), R5
  38  	MOVW.P 4(R1), R6
  39  	MOVW.P 4(R1), R7
  40  	MOVW.P 4(R2), R8
  41  	MOVW.P 4(R2), R9
  42  	MOVW.P 4(R2), R11
  43  	MOVW.P 4(R2), R12
  44  	ADC.S R8, R4
  45  	ADC.S R9, R5
  46  	ADC.S R11, R6
  47  	ADC.S R12, R7
  48  	MOVW.P R4, 4(R3)
  49  	MOVW.P R5, 4(R3)
  50  	MOVW.P R6, 4(R3)
  51  	MOVW.P R7, 4(R3)
  52  	SUB $1, R0
  53  	TEQ $0, R0; BNE loop4cont
  54  loop4done:
  55  	SBC R1, R1	// save carry
  56  	ADD $1, R1	// convert add carry
  57  	MOVW R1, c+36(FP)
  58  	RET
  59  
  60  // func subVV(z, x, y []Word) (c Word)
  61  TEXT ·subVV(SB), NOSPLIT, $0
  62  	MOVW z_len+4(FP), R0
  63  	MOVW x_base+12(FP), R1
  64  	MOVW y_base+24(FP), R2
  65  	MOVW z_base+0(FP), R3
  66  	// compute unrolled loop lengths
  67  	AND $3, R0, R4
  68  	MOVW R0>>2, R0
  69  	SUB.S $0, R0	// clear carry
  70  loop1:
  71  	TEQ $0, R4; BEQ loop1done
  72  loop1cont:
  73  	// unroll 1X
  74  	MOVW.P 4(R1), R5
  75  	MOVW.P 4(R2), R6
  76  	SBC.S R6, R5
  77  	MOVW.P R5, 4(R3)
  78  	SUB $1, R4
  79  	TEQ $0, R4; BNE loop1cont
  80  loop1done:
  81  loop4:
  82  	TEQ $0, R0; BEQ loop4done
  83  loop4cont:
  84  	// unroll 4X
  85  	MOVW.P 4(R1), R4
  86  	MOVW.P 4(R1), R5
  87  	MOVW.P 4(R1), R6
  88  	MOVW.P 4(R1), R7
  89  	MOVW.P 4(R2), R8
  90  	MOVW.P 4(R2), R9
  91  	MOVW.P 4(R2), R11
  92  	MOVW.P 4(R2), R12
  93  	SBC.S R8, R4
  94  	SBC.S R9, R5
  95  	SBC.S R11, R6
  96  	SBC.S R12, R7
  97  	MOVW.P R4, 4(R3)
  98  	MOVW.P R5, 4(R3)
  99  	MOVW.P R6, 4(R3)
 100  	MOVW.P R7, 4(R3)
 101  	SUB $1, R0
 102  	TEQ $0, R0; BNE loop4cont
 103  loop4done:
 104  	SBC R1, R1	// save carry
 105  	RSB $0, R1, R1	// convert sub carry
 106  	MOVW R1, c+36(FP)
 107  	RET
 108  
 109  // func lshVU(z, x []Word, s uint) (c Word)
 110  TEXT ·lshVU(SB), NOSPLIT, $0
 111  	MOVW z_len+4(FP), R0
 112  	TEQ $0, R0; BEQ ret0
 113  	MOVW s+24(FP), R1
 114  	MOVW x_base+12(FP), R2
 115  	MOVW z_base+0(FP), R3
 116  	// run loop backward
 117  	ADD R0<<2, R2, R2
 118  	ADD R0<<2, R3, R3
 119  	// shift first word into carry
 120  	MOVW.W -4(R2), R4
 121  	MOVW $32, R5
 122  	SUB R1, R5
 123  	MOVW R4>>R5, R6
 124  	MOVW R4<<R1, R4
 125  	MOVW R6, c+28(FP)
 126  	// shift remaining words
 127  	SUB $1, R0
 128  	// compute unrolled loop lengths
 129  	AND $3, R0, R6
 130  	MOVW R0>>2, R0
 131  loop1:
 132  	TEQ $0, R6; BEQ loop1done
 133  loop1cont:
 134  	// unroll 1X
 135  	MOVW.W -4(R2), R7
 136  	ORR R7>>R5, R4
 137  	MOVW.W R4, -4(R3)
 138  	MOVW R7<<R1, R4
 139  	SUB $1, R6
 140  	TEQ $0, R6; BNE loop1cont
 141  loop1done:
 142  loop4:
 143  	TEQ $0, R0; BEQ loop4done
 144  loop4cont:
 145  	// unroll 4X
 146  	MOVW.W -4(R2), R6
 147  	MOVW.W -4(R2), R7
 148  	MOVW.W -4(R2), R8
 149  	MOVW.W -4(R2), R9
 150  	ORR R6>>R5, R4
 151  	MOVW.W R4, -4(R3)
 152  	MOVW R6<<R1, R4
 153  	ORR R7>>R5, R4
 154  	MOVW.W R4, -4(R3)
 155  	MOVW R7<<R1, R4
 156  	ORR R8>>R5, R4
 157  	MOVW.W R4, -4(R3)
 158  	MOVW R8<<R1, R4
 159  	ORR R9>>R5, R4
 160  	MOVW.W R4, -4(R3)
 161  	MOVW R9<<R1, R4
 162  	SUB $1, R0
 163  	TEQ $0, R0; BNE loop4cont
 164  loop4done:
 165  	// store final shifted bits
 166  	MOVW.W R4, -4(R3)
 167  	RET
 168  ret0:
 169  	MOVW $0, R1
 170  	MOVW R1, c+28(FP)
 171  	RET
 172  
 173  // func rshVU(z, x []Word, s uint) (c Word)
 174  TEXT ·rshVU(SB), NOSPLIT, $0
 175  	MOVW z_len+4(FP), R0
 176  	TEQ $0, R0; BEQ ret0
 177  	MOVW s+24(FP), R1
 178  	MOVW x_base+12(FP), R2
 179  	MOVW z_base+0(FP), R3
 180  	// shift first word into carry
 181  	MOVW.P 4(R2), R4
 182  	MOVW $32, R5
 183  	SUB R1, R5
 184  	MOVW R4<<R5, R6
 185  	MOVW R4>>R1, R4
 186  	MOVW R6, c+28(FP)
 187  	// shift remaining words
 188  	SUB $1, R0
 189  	// compute unrolled loop lengths
 190  	AND $3, R0, R6
 191  	MOVW R0>>2, R0
 192  loop1:
 193  	TEQ $0, R6; BEQ loop1done
 194  loop1cont:
 195  	// unroll 1X
 196  	MOVW.P 4(R2), R7
 197  	ORR R7<<R5, R4
 198  	MOVW.P R4, 4(R3)
 199  	MOVW R7>>R1, R4
 200  	SUB $1, R6
 201  	TEQ $0, R6; BNE loop1cont
 202  loop1done:
 203  loop4:
 204  	TEQ $0, R0; BEQ loop4done
 205  loop4cont:
 206  	// unroll 4X
 207  	MOVW.P 4(R2), R6
 208  	MOVW.P 4(R2), R7
 209  	MOVW.P 4(R2), R8
 210  	MOVW.P 4(R2), R9
 211  	ORR R6<<R5, R4
 212  	MOVW.P R4, 4(R3)
 213  	MOVW R6>>R1, R4
 214  	ORR R7<<R5, R4
 215  	MOVW.P R4, 4(R3)
 216  	MOVW R7>>R1, R4
 217  	ORR R8<<R5, R4
 218  	MOVW.P R4, 4(R3)
 219  	MOVW R8>>R1, R4
 220  	ORR R9<<R5, R4
 221  	MOVW.P R4, 4(R3)
 222  	MOVW R9>>R1, R4
 223  	SUB $1, R0
 224  	TEQ $0, R0; BNE loop4cont
 225  loop4done:
 226  	// store final shifted bits
 227  	MOVW.P R4, 4(R3)
 228  	RET
 229  ret0:
 230  	MOVW $0, R1
 231  	MOVW R1, c+28(FP)
 232  	RET
 233  
 234  // func mulAddVWW(z, x []Word, m, a Word) (c Word)
 235  TEXT ·mulAddVWW(SB), NOSPLIT, $0
 236  	MOVW m+24(FP), R0
 237  	MOVW a+28(FP), R1
 238  	MOVW z_len+4(FP), R2
 239  	MOVW x_base+12(FP), R3
 240  	MOVW z_base+0(FP), R4
 241  	// compute unrolled loop lengths
 242  	AND $3, R2, R5
 243  	MOVW R2>>2, R2
 244  loop1:
 245  	TEQ $0, R5; BEQ loop1done
 246  loop1cont:
 247  	// unroll 1X
 248  	MOVW.P 4(R3), R6
 249  	// multiply
 250  	MULLU R0, R6, (R7, R6)
 251  	ADD.S R1, R6
 252  	ADC $0, R7, R1
 253  	MOVW.P R6, 4(R4)
 254  	SUB $1, R5
 255  	TEQ $0, R5; BNE loop1cont
 256  loop1done:
 257  loop4:
 258  	TEQ $0, R2; BEQ loop4done
 259  loop4cont:
 260  	// unroll 4X in batches of 2
 261  	MOVW.P 4(R3), R5
 262  	MOVW.P 4(R3), R6
 263  	// multiply
 264  	MULLU R0, R5, (R7, R5)
 265  	ADD.S R1, R5
 266  	MULLU R0, R6, (R8, R6)
 267  	ADC.S R7, R6
 268  	ADC $0, R8, R1
 269  	MOVW.P R5, 4(R4)
 270  	MOVW.P R6, 4(R4)
 271  	MOVW.P 4(R3), R5
 272  	MOVW.P 4(R3), R6
 273  	// multiply
 274  	MULLU R0, R5, (R7, R5)
 275  	ADD.S R1, R5
 276  	MULLU R0, R6, (R8, R6)
 277  	ADC.S R7, R6
 278  	ADC $0, R8, R1
 279  	MOVW.P R5, 4(R4)
 280  	MOVW.P R6, 4(R4)
 281  	SUB $1, R2
 282  	TEQ $0, R2; BNE loop4cont
 283  loop4done:
 284  	MOVW R1, c+32(FP)
 285  	RET
 286  
 287  // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
 288  TEXT ·addMulVVWW(SB), NOSPLIT, $0
 289  	MOVW m+36(FP), R0
 290  	MOVW a+40(FP), R1
 291  	MOVW z_len+4(FP), R2
 292  	MOVW x_base+12(FP), R3
 293  	MOVW y_base+24(FP), R4
 294  	MOVW z_base+0(FP), R5
 295  	// compute unrolled loop lengths
 296  	AND $3, R2, R6
 297  	MOVW R2>>2, R2
 298  loop1:
 299  	TEQ $0, R6; BEQ loop1done
 300  loop1cont:
 301  	// unroll 1X
 302  	MOVW.P 4(R3), R7
 303  	MOVW.P 4(R4), R8
 304  	// multiply
 305  	MULLU R0, R8, (R9, R8)
 306  	ADD.S R1, R8
 307  	ADC $0, R9, R1
 308  	// add
 309  	ADD.S R7, R8
 310  	ADC $0, R1
 311  	MOVW.P R8, 4(R5)
 312  	SUB $1, R6
 313  	TEQ $0, R6; BNE loop1cont
 314  loop1done:
 315  loop4:
 316  	TEQ $0, R2; BEQ loop4done
 317  loop4cont:
 318  	// unroll 4X in batches of 2
 319  	MOVW.P 4(R3), R6
 320  	MOVW.P 4(R3), R7
 321  	MOVW.P 4(R4), R8
 322  	MOVW.P 4(R4), R9
 323  	// multiply
 324  	MULLU R0, R8, (R11, R8)
 325  	ADD.S R1, R8
 326  	MULLU R0, R9, (R12, R9)
 327  	ADC.S R11, R9
 328  	ADC $0, R12, R1
 329  	// add
 330  	ADD.S R6, R8
 331  	ADC.S R7, R9
 332  	ADC $0, R1
 333  	MOVW.P R8, 4(R5)
 334  	MOVW.P R9, 4(R5)
 335  	MOVW.P 4(R3), R6
 336  	MOVW.P 4(R3), R7
 337  	MOVW.P 4(R4), R8
 338  	MOVW.P 4(R4), R9
 339  	// multiply
 340  	MULLU R0, R8, (R11, R8)
 341  	ADD.S R1, R8
 342  	MULLU R0, R9, (R12, R9)
 343  	ADC.S R11, R9
 344  	ADC $0, R12, R1
 345  	// add
 346  	ADD.S R6, R8
 347  	ADC.S R7, R9
 348  	ADC $0, R1
 349  	MOVW.P R8, 4(R5)
 350  	MOVW.P R9, 4(R5)
 351  	SUB $1, R2
 352  	TEQ $0, R2; BNE loop4cont
 353  loop4done:
 354  	MOVW R1, c+44(FP)
 355  	RET
 356