nat_riscv64.s raw

   1  // Copyright 2023 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build !purego
   6  
   7  #include "textflag.h"
   8  
   9  // func addMulVVW1024(z, x *uint, y uint) (c uint)
  10  TEXT ·addMulVVW1024(SB),$0-32
  11  	MOV	$16, X30
  12  	JMP	addMulVVWx(SB)
  13  
  14  // func addMulVVW1536(z, x *uint, y uint) (c uint)
  15  TEXT ·addMulVVW1536(SB),$0-32
  16  	MOV	$24, X30
  17  	JMP	addMulVVWx(SB)
  18  
  19  // func addMulVVW2048(z, x *uint, y uint) (c uint)
  20  TEXT ·addMulVVW2048(SB),$0-32
  21  	MOV	$32, X30
  22  	JMP	addMulVVWx(SB)
  23  
  24  TEXT addMulVVWx(SB),NOFRAME|NOSPLIT,$0
  25  	MOV	z+0(FP), X5
  26  	MOV	x+8(FP), X7
  27  	MOV	y+16(FP), X6
  28  	MOV	$0, X29
  29  
  30  	BEQZ	X30, done
  31  loop:
  32  	MOV	0*8(X5), X10	// z[0]
  33  	MOV	1*8(X5), X13	// z[1]
  34  	MOV	2*8(X5), X16	// z[2]
  35  	MOV	3*8(X5), X19	// z[3]
  36  
  37  	MOV	0*8(X7), X8	// x[0]
  38  	MOV	1*8(X7), X11	// x[1]
  39  	MOV	2*8(X7), X14	// x[2]
  40  	MOV	3*8(X7), X17	// x[3]
  41  
  42  	MULHU	X8, X6, X9	// z_hi[0] = x[0] * y
  43  	MUL	X8, X6, X8	// z_lo[0] = x[0] * y
  44  	ADD	X8, X10, X21	// z_lo[0] = x[0] * y + z[0]
  45  	SLTU	X8, X21, X22
  46  	ADD	X9, X22, X9	// z_hi[0] = x[0] * y + z[0]
  47  	ADD	X21, X29, X10	// z_lo[0] = x[0] * y + z[0] + c
  48  	SLTU	X21, X10, X22
  49  	ADD	X9, X22, X29	// next c
  50  
  51  	MULHU	X11, X6, X12	// z_hi[1] = x[1] * y
  52  	MUL	X11, X6, X11	// z_lo[1] = x[1] * y
  53  	ADD	X11, X13, X21	// z_lo[1] = x[1] * y + z[1]
  54  	SLTU	X11, X21, X22
  55  	ADD	X12, X22, X12	// z_hi[1] = x[1] * y + z[1]
  56  	ADD	X21, X29, X13	// z_lo[1] = x[1] * y + z[1] + c
  57  	SLTU	X21, X13, X22
  58  	ADD	X12, X22, X29	// next c
  59  
  60  	MULHU	X14, X6, X15	// z_hi[2] = x[2] * y
  61  	MUL	X14, X6, X14	// z_lo[2] = x[2] * y
  62  	ADD	X14, X16, X21	// z_lo[2] = x[2] * y + z[2]
  63  	SLTU	X14, X21, X22
  64  	ADD	X15, X22, X15	// z_hi[2] = x[2] * y + z[2]
  65  	ADD	X21, X29, X16	// z_lo[2] = x[2] * y + z[2] + c
  66  	SLTU	X21, X16, X22
  67  	ADD	X15, X22, X29	// next c
  68  
  69  	MULHU	X17, X6, X18	// z_hi[3] = x[3] * y
  70  	MUL	X17, X6, X17	// z_lo[3] = x[3] * y
  71  	ADD	X17, X19, X21	// z_lo[3] = x[3] * y + z[3]
  72  	SLTU	X17, X21, X22
  73  	ADD	X18, X22, X18	// z_hi[3] = x[3] * y + z[3]
  74  	ADD	X21, X29, X19	// z_lo[3] = x[3] * y + z[3] + c
  75  	SLTU	X21, X19, X22
  76  	ADD	X18, X22, X29	// next c
  77  
  78  	MOV	X10, 0*8(X5)	// z[0]
  79  	MOV	X13, 1*8(X5)	// z[1]
  80  	MOV	X16, 2*8(X5)	// z[2]
  81  	MOV	X19, 3*8(X5)	// z[3]
  82  
  83  	ADD	$32, X5
  84  	ADD	$32, X7
  85  
  86  	SUB	$4, X30
  87  	BNEZ	X30, loop
  88  
  89  done:
  90  	MOV	X29, c+24(FP)
  91  	RET
  92