sum_loong64.s raw

   1  // Copyright 2025 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build gc && !purego
   6  
   7  // func update(state *macState, msg []byte)
   8  TEXT ·update(SB), $0-32
   9  	MOVV	state+0(FP), R4
  10  	MOVV	msg_base+8(FP), R5
  11  	MOVV	msg_len+16(FP), R6
  12  
  13  	MOVV	$0x10, R7
  14  
  15  	MOVV	(R4), R8	// h0
  16  	MOVV	8(R4), R9	// h1
  17  	MOVV	16(R4), R10	// h2
  18  	MOVV	24(R4), R11	// r0
  19  	MOVV	32(R4), R12	// r1
  20  
  21  	BLT	R6, R7, bytes_between_0_and_15
  22  
  23  loop:
  24  	MOVV	(R5), R14	// msg[0:8]
  25  	MOVV	8(R5), R16	// msg[8:16]
  26  	ADDV	R14, R8, R8	// h0 (x1 + y1 = z1', if z1' < x1 then z1' overflow)
  27  	ADDV	R16, R9, R27
  28  	SGTU	R14, R8, R24	// h0.carry
  29  	SGTU	R9, R27, R28
  30  	ADDV	R27, R24, R9	// h1
  31  	SGTU	R27, R9, R24
  32  	OR	R24, R28, R24	// h1.carry
  33  	ADDV	$0x01, R24, R24
  34  	ADDV	R10, R24, R10	// h2
  35  
  36  	ADDV	$16, R5, R5	// msg = msg[16:]
  37  
  38  multiply:
  39  	MULV	R8, R11, R14	// h0r0.lo
  40  	MULHVU	R8, R11, R15	// h0r0.hi
  41  	MULV	R9, R11, R13	// h1r0.lo
  42  	MULHVU	R9, R11, R16	// h1r0.hi
  43  	ADDV	R13, R15, R15
  44  	SGTU	R13, R15, R24
  45  	ADDV	R24, R16, R16
  46  	MULV	R10, R11, R25
  47  	ADDV	R16, R25, R25
  48  	MULV	R8, R12, R13	// h0r1.lo
  49  	MULHVU	R8, R12, R16	// h0r1.hi
  50  	ADDV	R13, R15, R15
  51  	SGTU	R13, R15, R24
  52  	ADDV	R24, R16, R16
  53  	MOVV	R16, R8
  54  	MULV	R10, R12, R26	// h2r1
  55  	MULV	R9, R12, R13	// h1r1.lo
  56  	MULHVU	R9, R12, R16	// h1r1.hi
  57  	ADDV	R13, R25, R25
  58  	ADDV	R16, R26, R27
  59  	SGTU	R13, R25, R24
  60  	ADDV	R27, R24, R26
  61  	ADDV	R8, R25, R25
  62  	SGTU	R8, R25, R24
  63  	ADDV	R24, R26, R26
  64  	AND	$3, R25, R10
  65  	AND	$-4, R25, R17
  66  	ADDV	R17, R14, R8
  67  	ADDV	R26, R15, R27
  68  	SGTU	R17, R8, R24
  69  	SGTU	R26, R27, R28
  70  	ADDV	R27, R24, R9
  71  	SGTU	R27, R9, R24
  72  	OR	R24, R28, R24
  73  	ADDV	R24, R10, R10
  74  	SLLV	$62, R26, R27
  75  	SRLV	$2, R25, R28
  76  	SRLV	$2, R26, R26
  77  	OR	R27, R28, R25
  78  	ADDV	R25, R8, R8
  79  	ADDV	R26, R9, R27
  80  	SGTU	R25, R8, R24
  81  	SGTU	R26, R27, R28
  82  	ADDV	R27, R24, R9
  83  	SGTU	R27, R9, R24
  84  	OR	R24, R28, R24
  85  	ADDV	R24, R10, R10
  86  
  87  	SUBV	$16, R6, R6
  88  	BGE	R6, R7, loop
  89  
  90  bytes_between_0_and_15:
  91  	BEQ	R6, R0, done
  92  	MOVV	$1, R14
  93  	XOR	R15, R15
  94  	ADDV	R6, R5, R5
  95  
  96  flush_buffer:
  97  	MOVBU	-1(R5), R25
  98  	SRLV	$56, R14, R24
  99  	SLLV	$8, R15, R28
 100  	SLLV	$8, R14, R14
 101  	OR	R24, R28, R15
 102  	XOR	R25, R14, R14
 103  	SUBV	$1, R6, R6
 104  	SUBV	$1, R5, R5
 105  	BNE	R6, R0, flush_buffer
 106  
 107  	ADDV	R14, R8, R8
 108  	SGTU	R14, R8, R24
 109  	ADDV	R15, R9, R27
 110  	SGTU	R15, R27, R28
 111  	ADDV	R27, R24, R9
 112  	SGTU	R27, R9, R24
 113  	OR	R24, R28, R24
 114  	ADDV	R10, R24, R10
 115  
 116  	MOVV	$16, R6
 117  	JMP	multiply
 118  
 119  done:
 120  	MOVV	R8, (R4)
 121  	MOVV	R9, 8(R4)
 122  	MOVV	R10, 16(R4)
 123  	RET
 124