arm64.s raw

   1  //go:build arm64 && !purego
   2  
   3  #include "go_asm.h"
   4  #include "textflag.h"
   5  
   6  // func polyAddARM64(p, a, b *Poly)
   7  TEXT ·polyAddARM64(SB), NOSPLIT|NOFRAME, $0-24
   8      MOVD    p+0(FP), R0
   9      MOVD    a+8(FP), R1
  10      MOVD    b+16(FP), R2
  11  
  12      MOVW    $(const_N / 32), R3
  13  
  14  loop:
  15      VLD1.P  (64)(R1), [V0.H8, V1.H8, V2.H8, V3.H8]
  16      VLD1.P  (64)(R2), [V4.H8, V5.H8, V6.H8, V7.H8]
  17  
  18      VADD    V4.H8, V0.H8, V0.H8
  19      VADD    V5.H8, V1.H8, V1.H8
  20      VADD    V6.H8, V2.H8, V2.H8
  21      VADD    V7.H8, V3.H8, V3.H8
  22  
  23      VST1.P  [V0.H8, V1.H8, V2.H8, V3.H8], (64)(R0)
  24  
  25      SUBS    $1, R3, R3
  26      BGT     loop
  27  
  28      RET
  29  
  30  
  31  // func polySubARM64(p, a, b *Poly)
  32  TEXT ·polySubARM64(SB), NOSPLIT|NOFRAME, $0-24
  33      MOVD    p+0(FP), R0
  34      MOVD    a+8(FP), R1
  35      MOVD    b+16(FP), R2
  36  
  37      MOVW    $(const_N / 32), R3
  38  
  39  loop:
  40      VLD1.P  (64)(R1), [V0.H8, V1.H8, V2.H8, V3.H8]
  41      VLD1.P  (64)(R2), [V4.H8, V5.H8, V6.H8, V7.H8]
  42  
  43      VSUB    V4.H8, V0.H8, V0.H8
  44      VSUB    V5.H8, V1.H8, V1.H8
  45      VSUB    V6.H8, V2.H8, V2.H8
  46      VSUB    V7.H8, V3.H8, V3.H8
  47  
  48      VST1.P  [V0.H8, V1.H8, V2.H8, V3.H8], (64)(R0)
  49  
  50      SUBS    $1, R3, R3
  51      BGT     loop
  52  
  53      RET
  54