crc32_loong64.s raw

   1  // Copyright 2024 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  #include "textflag.h"
   6  
   7  // castagnoliUpdate updates the non-inverted crc with the given data.
   8  
   9  // func castagnoliUpdate(crc uint32, p []byte) uint32
  10  TEXT ·castagnoliUpdate(SB),NOSPLIT,$0-36
  11  	MOVWU	crc+0(FP), R4		// a0 = CRC value
  12  	MOVV	p+8(FP), R5		// a1 = data pointer
  13  	MOVV	p_len+16(FP), R6	// a2 = len(p)
  14  
  15  	SGT	$8, R6, R12
  16  	BNE	R12, less_than_8
  17  	AND	$7, R5, R12
  18  	BEQ	R12, aligned
  19  
  20  	// Process the first few bytes to 8-byte align the input.
  21  	// t0 = 8 - t0. We need to process this many bytes to align.
  22  	SUB	$1, R12
  23  	XOR	$7, R12
  24  
  25  	AND	$1, R12, R13
  26  	BEQ	R13, align_2
  27  	MOVB	(R5), R13
  28  	CRCCWBW	R4, R13, R4
  29  	ADDV	$1, R5
  30  	ADDV	$-1, R6
  31  
  32  align_2:
  33  	AND	$2, R12, R13
  34  	BEQ	R13, align_4
  35  	MOVH	(R5), R13
  36  	CRCCWHW	R4, R13, R4
  37  	ADDV	$2, R5
  38  	ADDV	$-2, R6
  39  
  40  align_4:
  41  	AND	$4, R12, R13
  42  	BEQ	R13, aligned
  43  	MOVW	(R5), R13
  44  	CRCCWWW	R4, R13, R4
  45  	ADDV	$4, R5
  46  	ADDV	$-4, R6
  47  
  48  aligned:
  49  	// The input is now 8-byte aligned and we can process 8-byte chunks.
  50  	SGT	$8, R6, R12
  51  	BNE	R12, less_than_8
  52  	MOVV	(R5), R13
  53  	CRCCWVW	R4, R13, R4
  54  	ADDV	$8, R5
  55  	ADDV	$-8, R6
  56  	JMP	aligned
  57  
  58  less_than_8:
  59  	// We may have some bytes left over; process 4 bytes, then 2, then 1.
  60  	AND	$4, R6, R12
  61  	BEQ	R12, less_than_4
  62  	MOVW	(R5), R13
  63  	CRCCWWW	R4, R13, R4
  64  	ADDV	$4, R5
  65  	ADDV	$-4, R6
  66  
  67  less_than_4:
  68  	AND	$2, R6, R12
  69  	BEQ	R12, less_than_2
  70  	MOVH	(R5), R13
  71  	CRCCWHW	R4, R13, R4
  72  	ADDV	$2, R5
  73  	ADDV	$-2, R6
  74  
  75  less_than_2:
  76  	BEQ	R6, done
  77  	MOVB	(R5), R13
  78  	CRCCWBW	R4, R13, R4
  79  
  80  done:
  81  	MOVW	R4, ret+32(FP)
  82  	RET
  83  
  84  // ieeeUpdate updates the non-inverted crc with the given data.
  85  
  86  // func ieeeUpdate(crc uint32, p []byte) uint32
  87  TEXT ·ieeeUpdate(SB),NOSPLIT,$0-36
  88  	MOVWU	crc+0(FP), R4		// a0 = CRC value
  89  	MOVV	p+8(FP), R5		// a1 = data pointer
  90  	MOVV	p_len+16(FP), R6	// a2 = len(p)
  91  
  92  	SGT	$8, R6, R12
  93  	BNE	R12, less_than_8
  94  	AND	$7, R5, R12
  95  	BEQ	R12, aligned
  96  
  97  	// Process the first few bytes to 8-byte align the input.
  98  	// t0 = 8 - t0. We need to process this many bytes to align.
  99  	SUB	$1, R12
 100  	XOR	$7, R12
 101  
 102  	AND	$1, R12, R13
 103  	BEQ	R13, align_2
 104  	MOVB	(R5), R13
 105  	CRCWBW	R4, R13, R4
 106  	ADDV	$1, R5
 107  	ADDV	$-1, R6
 108  
 109  align_2:
 110  	AND	$2, R12, R13
 111  	BEQ	R13, align_4
 112  	MOVH	(R5), R13
 113  	CRCWHW	R4, R13, R4
 114  	ADDV	$2, R5
 115  	ADDV	$-2, R6
 116  
 117  align_4:
 118  	AND	$4, R12, R13
 119  	BEQ	R13, aligned
 120  	MOVW	(R5), R13
 121  	CRCWWW	R4, R13, R4
 122  	ADDV	$4, R5
 123  	ADDV	$-4, R6
 124  
 125  aligned:
 126  	// The input is now 8-byte aligned and we can process 8-byte chunks.
 127  	SGT	$8, R6, R12
 128  	BNE	R12, less_than_8
 129  	MOVV	(R5), R13
 130  	CRCWVW	R4, R13, R4
 131  	ADDV	$8, R5
 132  	ADDV	$-8, R6
 133  	JMP	aligned
 134  
 135  less_than_8:
 136  	// We may have some bytes left over; process 4 bytes, then 2, then 1.
 137  	AND	$4, R6, R12
 138  	BEQ	R12, less_than_4
 139  	MOVW	(R5), R13
 140  	CRCWWW	R4, R13, R4
 141  	ADDV	$4, R5
 142  	ADDV	$-4, R6
 143  
 144  less_than_4:
 145  	AND	$2, R6, R12
 146  	BEQ	R12, less_than_2
 147  	MOVH	(R5), R13
 148  	CRCWHW	R4, R13, R4
 149  	ADDV	$2, R5
 150  	ADDV	$-2, R6
 151  
 152  less_than_2:
 153  	BEQ	R6, done
 154  	MOVB	(R5), R13
 155  	CRCWBW	R4, R13, R4
 156  
 157  done:
 158  	MOVW	R4, ret+32(FP)
 159  	RET
 160  
 161