arm64.s raw
1 //go:build arm64 && !purego
2
3 #include "go_asm.h"
4 #include "textflag.h"
5
6 // func polyAddARM64(p, a, b *Poly)
7 TEXT ·polyAddARM64(SB), NOSPLIT|NOFRAME, $0-24
8 MOVD p+0(FP), R0
9 MOVD a+8(FP), R1
10 MOVD b+16(FP), R2
11
12 MOVW $(const_N / 32), R3
13
14 loop:
15 VLD1.P (64)(R1), [V0.H8, V1.H8, V2.H8, V3.H8]
16 VLD1.P (64)(R2), [V4.H8, V5.H8, V6.H8, V7.H8]
17
18 VADD V4.H8, V0.H8, V0.H8
19 VADD V5.H8, V1.H8, V1.H8
20 VADD V6.H8, V2.H8, V2.H8
21 VADD V7.H8, V3.H8, V3.H8
22
23 VST1.P [V0.H8, V1.H8, V2.H8, V3.H8], (64)(R0)
24
25 SUBS $1, R3, R3
26 BGT loop
27
28 RET
29
30
31 // func polySubARM64(p, a, b *Poly)
32 TEXT ·polySubARM64(SB), NOSPLIT|NOFRAME, $0-24
33 MOVD p+0(FP), R0
34 MOVD a+8(FP), R1
35 MOVD b+16(FP), R2
36
37 MOVW $(const_N / 32), R3
38
39 loop:
40 VLD1.P (64)(R1), [V0.H8, V1.H8, V2.H8, V3.H8]
41 VLD1.P (64)(R2), [V4.H8, V5.H8, V6.H8, V7.H8]
42
43 VSUB V4.H8, V0.H8, V0.H8
44 VSUB V5.H8, V1.H8, V1.H8
45 VSUB V6.H8, V2.H8, V2.H8
46 VSUB V7.H8, V3.H8, V3.H8
47
48 VST1.P [V0.H8, V1.H8, V2.H8, V3.H8], (64)(R0)
49
50 SUBS $1, R3, R3
51 BGT loop
52
53 RET
54