mask_arm64.s raw

   1  #include "textflag.h"
   2  
   3  // func maskAsm(b *byte, len int, key uint32)
   4  TEXT ·maskAsm(SB), NOSPLIT, $0-28
   5  	// R0 = b
   6  	// R1 = len
   7  	// R3 = key (uint32)
   8  	// R2 = uint64(key)<<32 | uint64(key)
   9  	MOVD  b_ptr+0(FP), R0
  10  	MOVD  b_len+8(FP), R1
  11  	MOVWU key+16(FP), R3
  12  	MOVD  R3, R2
  13  	ORR   R2<<32, R2, R2
  14  	VDUP  R2, V0.D2
  15  	CMP   $64, R1
  16  	BLT   less_than_64
  17  
  18  loop_64:
  19  	VLD1   (R0), [V1.B16, V2.B16, V3.B16, V4.B16]
  20  	VEOR   V1.B16, V0.B16, V1.B16
  21  	VEOR   V2.B16, V0.B16, V2.B16
  22  	VEOR   V3.B16, V0.B16, V3.B16
  23  	VEOR   V4.B16, V0.B16, V4.B16
  24  	VST1.P [V1.B16, V2.B16, V3.B16, V4.B16], 64(R0)
  25  	SUBS   $64, R1
  26  	CMP    $64, R1
  27  	BGE    loop_64
  28  
  29  less_than_64:
  30  	CBZ    R1, end
  31  	TBZ    $5, R1, less_than_32
  32  	VLD1   (R0), [V1.B16, V2.B16]
  33  	VEOR   V1.B16, V0.B16, V1.B16
  34  	VEOR   V2.B16, V0.B16, V2.B16
  35  	VST1.P [V1.B16, V2.B16], 32(R0)
  36  
  37  less_than_32:
  38  	TBZ   $4, R1, less_than_16
  39  	LDP   (R0), (R11, R12)
  40  	EOR   R11, R2, R11
  41  	EOR   R12, R2, R12
  42  	STP.P (R11, R12), 16(R0)
  43  
  44  less_than_16:
  45  	TBZ    $3, R1, less_than_8
  46  	MOVD   (R0), R11
  47  	EOR    R2, R11, R11
  48  	MOVD.P R11, 8(R0)
  49  
  50  less_than_8:
  51  	TBZ     $2, R1, less_than_4
  52  	MOVWU   (R0), R11
  53  	EORW    R2, R11, R11
  54  	MOVWU.P R11, 4(R0)
  55  
  56  less_than_4:
  57  	TBZ     $1, R1, less_than_2
  58  	MOVHU   (R0), R11
  59  	EORW    R3, R11, R11
  60  	MOVHU.P R11, 2(R0)
  61  	RORW    $16, R3
  62  
  63  less_than_2:
  64  	TBZ     $0, R1, end
  65  	MOVBU   (R0), R11
  66  	EORW    R3, R11, R11
  67  	MOVBU.P R11, 1(R0)
  68  	RORW    $8, R3
  69  
  70  end:
  71  	MOVWU R3, ret+24(FP)
  72  	RET
  73