xor_amd64.s raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build !purego
   6  
   7  #include "textflag.h"
   8  
   9  // func xorBytes(dst, a, b *byte, n int)
  10  TEXT ·xorBytes(SB), NOSPLIT, $0
  11  	MOVQ  dst+0(FP), BX
  12  	MOVQ  a+8(FP), SI
  13  	MOVQ  b+16(FP), CX
  14  	MOVQ  n+24(FP), DX
  15  	TESTQ $15, DX            // AND 15 & len, if not zero jump to not_aligned.
  16  	JNZ   not_aligned
  17  
  18  aligned:
  19  	MOVQ $0, AX // position in slices
  20  
  21  	PCALIGN $16
  22  loop16b:
  23  	MOVOU (SI)(AX*1), X0   // XOR 16byte forwards.
  24  	MOVOU (CX)(AX*1), X1
  25  	PXOR  X1, X0
  26  	MOVOU X0, (BX)(AX*1)
  27  	ADDQ  $16, AX
  28  	CMPQ  DX, AX
  29  	JNE   loop16b
  30  	RET
  31  
  32  	PCALIGN $16
  33  loop_1b:
  34  	SUBQ  $1, DX           // XOR 1byte backwards.
  35  	MOVB  (SI)(DX*1), DI
  36  	MOVB  (CX)(DX*1), AX
  37  	XORB  AX, DI
  38  	MOVB  DI, (BX)(DX*1)
  39  	TESTQ $7, DX           // AND 7 & len, if not zero jump to loop_1b.
  40  	JNZ   loop_1b
  41  	CMPQ  DX, $0           // if len is 0, ret.
  42  	JE    ret
  43  	TESTQ $15, DX          // AND 15 & len, if zero jump to aligned.
  44  	JZ    aligned
  45  
  46  not_aligned:
  47  	TESTQ $7, DX           // AND $7 & len, if not zero jump to loop_1b.
  48  	JNE   loop_1b
  49  	SUBQ  $8, DX           // XOR 8bytes backwards.
  50  	MOVQ  (SI)(DX*1), DI
  51  	MOVQ  (CX)(DX*1), AX
  52  	XORQ  AX, DI
  53  	MOVQ  DI, (BX)(DX*1)
  54  	CMPQ  DX, $16          // if len is greater or equal 16 here, it must be aligned.
  55  	JGE   aligned
  56  
  57  ret:
  58  	RET
  59