xor_riscv64.s raw

   1  // Copyright 2025 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build !purego
   6  
   7  #include "textflag.h"
   8  
   9  // func xorBytes(dst, a, b *byte, n int)
  10  TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
  11  	MOV	dst+0(FP), X10
  12  	MOV	a+8(FP), X11
  13  	MOV	b+16(FP), X12
  14  	MOV	n+24(FP), X13
  15  
  16  	MOV	$32, X15
  17  	BLT	X13, X15, loop4_check
  18  
  19  	// Check alignment - if alignment differs we have to do one byte at a time.
  20  	AND	$7, X10, X5
  21  	AND	$7, X11, X6
  22  	AND	$7, X12, X7
  23  	BNE	X5, X6, loop4_check
  24  	BNE	X5, X7, loop4_check
  25  	BEQZ	X5, loop64_check
  26  
  27  	// Check one byte at a time until we reach 8 byte alignment.
  28  	MOV	$8, X8
  29  	SUB	X5, X8
  30  	SUB	X8, X13
  31  align:
  32  	MOVBU	0(X11), X16
  33  	MOVBU	0(X12), X17
  34  	XOR	X16, X17
  35  	MOVB	X17, 0(X10)
  36  	ADD	$1, X10
  37  	ADD	$1, X11
  38  	ADD	$1, X12
  39  	SUB	$1, X8
  40  	BNEZ	X8, align
  41  
  42  loop64_check:
  43  	MOV	$64, X15
  44  	BLT	X13, X15, tail32_check
  45  	PCALIGN	$16
  46  loop64:
  47  	MOV	0(X11), X16
  48  	MOV	0(X12), X17
  49  	MOV	8(X11), X18
  50  	MOV	8(X12), X19
  51  	XOR	X16, X17
  52  	XOR	X18, X19
  53  	MOV	X17, 0(X10)
  54  	MOV	X19, 8(X10)
  55  	MOV	16(X11), X20
  56  	MOV	16(X12), X21
  57  	MOV	24(X11), X22
  58  	MOV	24(X12), X23
  59  	XOR	X20, X21
  60  	XOR	X22, X23
  61  	MOV	X21, 16(X10)
  62  	MOV	X23, 24(X10)
  63  	MOV	32(X11), X16
  64  	MOV	32(X12), X17
  65  	MOV	40(X11), X18
  66  	MOV	40(X12), X19
  67  	XOR	X16, X17
  68  	XOR	X18, X19
  69  	MOV	X17, 32(X10)
  70  	MOV	X19, 40(X10)
  71  	MOV	48(X11), X20
  72  	MOV	48(X12), X21
  73  	MOV	56(X11), X22
  74  	MOV	56(X12), X23
  75  	XOR	X20, X21
  76  	XOR	X22, X23
  77  	MOV	X21, 48(X10)
  78  	MOV	X23, 56(X10)
  79  	ADD	$64, X10
  80  	ADD	$64, X11
  81  	ADD	$64, X12
  82  	SUB	$64, X13
  83  	BGE	X13, X15, loop64
  84  	BEQZ	X13, done
  85  
  86  tail32_check:
  87  	MOV	$32, X15
  88  	BLT	X13, X15, tail16_check
  89  	MOV	0(X11), X16
  90  	MOV	0(X12), X17
  91  	MOV	8(X11), X18
  92  	MOV	8(X12), X19
  93  	XOR	X16, X17
  94  	XOR	X18, X19
  95  	MOV	X17, 0(X10)
  96  	MOV	X19, 8(X10)
  97  	MOV	16(X11), X20
  98  	MOV	16(X12), X21
  99  	MOV	24(X11), X22
 100  	MOV	24(X12), X23
 101  	XOR	X20, X21
 102  	XOR	X22, X23
 103  	MOV	X21, 16(X10)
 104  	MOV	X23, 24(X10)
 105  	ADD	$32, X10
 106  	ADD	$32, X11
 107  	ADD	$32, X12
 108  	SUB	$32, X13
 109  	BEQZ	X13, done
 110  
 111  tail16_check:
 112  	MOV	$16, X15
 113  	BLT	X13, X15, loop4_check
 114  	MOV	0(X11), X16
 115  	MOV	0(X12), X17
 116  	MOV	8(X11), X18
 117  	MOV	8(X12), X19
 118  	XOR	X16, X17
 119  	XOR	X18, X19
 120  	MOV	X17, 0(X10)
 121  	MOV	X19, 8(X10)
 122  	ADD	$16, X10
 123  	ADD	$16, X11
 124  	ADD	$16, X12
 125  	SUB	$16, X13
 126  	BEQZ	X13, done
 127  
 128  loop4_check:
 129  	MOV	$4, X15
 130  	BLT	X13, X15, loop1
 131  	PCALIGN	$16
 132  loop4:
 133  	MOVBU	0(X11), X16
 134  	MOVBU	0(X12), X17
 135  	MOVBU	1(X11), X18
 136  	MOVBU	1(X12), X19
 137  	XOR	X16, X17
 138  	XOR	X18, X19
 139  	MOVB	X17, 0(X10)
 140  	MOVB	X19, 1(X10)
 141  	MOVBU	2(X11), X20
 142  	MOVBU	2(X12), X21
 143  	MOVBU	3(X11), X22
 144  	MOVBU	3(X12), X23
 145  	XOR	X20, X21
 146  	XOR	X22, X23
 147  	MOVB	X21, 2(X10)
 148  	MOVB	X23, 3(X10)
 149  	ADD	$4, X10
 150  	ADD	$4, X11
 151  	ADD	$4, X12
 152  	SUB	$4, X13
 153  	BGE	X13, X15, loop4
 154  
 155  	PCALIGN	$16
 156  loop1:
 157  	BEQZ	X13, done
 158  	MOVBU	0(X11), X16
 159  	MOVBU	0(X12), X17
 160  	XOR	X16, X17
 161  	MOVB	X17, 0(X10)
 162  	ADD	$1, X10
 163  	ADD	$1, X11
 164  	ADD	$1, X12
 165  	SUB	$1, X13
 166  	JMP	loop1
 167  
 168  done:
 169  	RET
 170