nat_loong64.s raw

   1  // Copyright 2024 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // derived from crypto/internal/fips140/bigmod/nat_riscv64.s
   6  
   7  //go:build !purego
   8  
   9  #include "textflag.h"
  10  
  11  // func addMulVVW1024(z, x *uint, y uint) (c uint)
  12  TEXT ·addMulVVW1024(SB),$0-32
  13  	MOVV	$16, R8
  14  	JMP	addMulVVWx(SB)
  15  
  16  // func addMulVVW1536(z, x *uint, y uint) (c uint)
  17  TEXT ·addMulVVW1536(SB),$0-32
  18  	MOVV	$24, R8
  19  	JMP	addMulVVWx(SB)
  20  
  21  // func addMulVVW2048(z, x *uint, y uint) (c uint)
  22  TEXT ·addMulVVW2048(SB),$0-32
  23  	MOVV	$32, R8
  24  	JMP	addMulVVWx(SB)
  25  
  26  TEXT addMulVVWx(SB),NOFRAME|NOSPLIT,$0
  27  	MOVV	z+0(FP), R4
  28  	MOVV	x+8(FP), R6
  29  	MOVV	y+16(FP), R5
  30  	MOVV	$0, R7
  31  
  32  	BEQ	R8, R0, done
  33  loop:
  34  	MOVV	0*8(R4), R9	// z[0]
  35  	MOVV	1*8(R4), R10	// z[1]
  36  	MOVV	2*8(R4), R11	// z[2]
  37  	MOVV	3*8(R4), R12	// z[3]
  38  
  39  	MOVV	0*8(R6), R13	// x[0]
  40  	MOVV	1*8(R6), R14	// x[1]
  41  	MOVV	2*8(R6), R15	// x[2]
  42  	MOVV	3*8(R6), R16	// x[3]
  43  
  44  	MULHVU	R13, R5, R17	// z_hi[0] = x[0] * y
  45  	MULV	R13, R5, R13	// z_lo[0] = x[0] * y
  46  	ADDV	R13, R9, R18	// z_lo[0] = x[0] * y + z[0]
  47  	SGTU	R13, R18, R19
  48  	ADDV	R17, R19, R17	// z_hi[0] = x[0] * y + z[0]
  49  	ADDV	R18, R7, R9	// z_lo[0] = x[0] * y + z[0] + c
  50  	SGTU	R18, R9, R19
  51  	ADDV	R17, R19, R7	// next c
  52  
  53  	MULHVU	R14, R5, R24	// z_hi[1] = x[1] * y
  54  	MULV	R14, R5, R14	// z_lo[1] = x[1] * y
  55  	ADDV	R14, R10, R18	// z_lo[1] = x[1] * y + z[1]
  56  	SGTU	R14, R18, R19
  57  	ADDV	R24, R19, R24	// z_hi[1] = x[1] * y + z[1]
  58  	ADDV	R18, R7, R10	// z_lo[1] = x[1] * y + z[1] + c
  59  	SGTU	R18, R10, R19
  60  	ADDV	R24, R19, R7	// next c
  61  
  62  	MULHVU	R15, R5, R25	// z_hi[2] = x[2] * y
  63  	MULV	R15, R5, R15	// z_lo[2] = x[2] * y
  64  	ADDV	R15, R11, R18	// z_lo[2] = x[2] * y + z[2]
  65  	SGTU	R15, R18, R19
  66  	ADDV	R25, R19, R25	// z_hi[2] = x[2] * y + z[2]
  67  	ADDV	R18, R7, R11	// z_lo[2] = x[2] * y + z[2] + c
  68  	SGTU	R18, R11, R19
  69  	ADDV	R25, R19, R7	// next c
  70  
  71  	MULHVU	R16, R5, R26	// z_hi[3] = x[3] * y
  72  	MULV	R16, R5, R16	// z_lo[3] = x[3] * y
  73  	ADDV	R16, R12, R18	// z_lo[3] = x[3] * y + z[3]
  74  	SGTU	R16, R18, R19
  75  	ADDV	R26, R19, R26	// z_hi[3] = x[3] * y + z[3]
  76  	ADDV	R18, R7, R12	// z_lo[3] = x[3] * y + z[3] + c
  77  	SGTU	R18, R12, R19
  78  	ADDV	R26, R19, R7	// next c
  79  
  80  	MOVV	R9, 0*8(R4)	// z[0]
  81  	MOVV	R10, 1*8(R4)	// z[1]
  82  	MOVV	R11, 2*8(R4)	// z[2]
  83  	MOVV	R12, 3*8(R4)	// z[3]
  84  
  85  	ADDV	$32, R4
  86  	ADDV	$32, R6
  87  
  88  	SUBV	$4, R8
  89  	BNE	R8, R0, loop
  90  
  91  done:
  92  	MOVV	R7, c+24(FP)
  93  	RET
  94