xor_arm64.s raw

   1  // Copyright 2020 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build !purego
   6  
   7  #include "textflag.h"
   8  
   9  // func xorBytes(dst, a, b *byte, n int)
  10  TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
  11  	MOVD	dst+0(FP), R0
  12  	MOVD	a+8(FP), R1
  13  	MOVD	b+16(FP), R2
  14  	MOVD	n+24(FP), R3
  15  	CMP	$64, R3
  16  	BLT	tail
  17  loop_64:
  18  	VLD1.P	64(R1), [V0.B16, V1.B16, V2.B16, V3.B16]
  19  	VLD1.P	64(R2), [V4.B16, V5.B16, V6.B16, V7.B16]
  20  	VEOR	V0.B16, V4.B16, V4.B16
  21  	VEOR	V1.B16, V5.B16, V5.B16
  22  	VEOR	V2.B16, V6.B16, V6.B16
  23  	VEOR	V3.B16, V7.B16, V7.B16
  24  	VST1.P	[V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
  25  	SUBS	$64, R3
  26  	CMP	$64, R3
  27  	BGE	loop_64
  28  tail:
  29  	// quick end
  30  	CBZ	R3, end
  31  	TBZ	$5, R3, less_than32
  32  	VLD1.P	32(R1), [V0.B16, V1.B16]
  33  	VLD1.P	32(R2), [V2.B16, V3.B16]
  34  	VEOR	V0.B16, V2.B16, V2.B16
  35  	VEOR	V1.B16, V3.B16, V3.B16
  36  	VST1.P	[V2.B16, V3.B16], 32(R0)
  37  less_than32:
  38  	TBZ	$4, R3, less_than16
  39  	LDP.P	16(R1), (R11, R12)
  40  	LDP.P	16(R2), (R13, R14)
  41  	EOR	R11, R13, R13
  42  	EOR	R12, R14, R14
  43  	STP.P	(R13, R14), 16(R0)
  44  less_than16:
  45  	TBZ	$3, R3, less_than8
  46  	MOVD.P	8(R1), R11
  47  	MOVD.P	8(R2), R12
  48  	EOR	R11, R12, R12
  49  	MOVD.P	R12, 8(R0)
  50  less_than8:
  51  	TBZ	$2, R3, less_than4
  52  	MOVWU.P	4(R1), R13
  53  	MOVWU.P	4(R2), R14
  54  	EORW	R13, R14, R14
  55  	MOVWU.P	R14, 4(R0)
  56  less_than4:
  57  	TBZ	$1, R3, less_than2
  58  	MOVHU.P	2(R1), R15
  59  	MOVHU.P	2(R2), R16
  60  	EORW	R15, R16, R16
  61  	MOVHU.P	R16, 2(R0)
  62  less_than2:
  63  	TBZ	$0, R3, end
  64  	MOVBU	(R1), R17
  65  	MOVBU	(R2), R19
  66  	EORW	R17, R19, R19
  67  	MOVBU	R19, (R0)
  68  end:
  69  	RET
  70