fp_amd64.s raw

   1  //go:build amd64 && !purego
   2  // +build amd64,!purego
   3  
   4  #include "textflag.h"
   5  #include "fp_amd64.h"
   6  
   7  // func cmovAmd64(x, y *Elt, n uint)
   8  TEXT ·cmovAmd64(SB),NOSPLIT,$0-24
   9      MOVQ x+0(FP), DI
  10      MOVQ y+8(FP), SI
  11      MOVQ n+16(FP), BX
  12      cselect(0(DI),0(SI),BX)
  13      RET
  14  
  15  // func cswapAmd64(x, y *Elt, n uint)
  16  TEXT ·cswapAmd64(SB),NOSPLIT,$0-24
  17      MOVQ x+0(FP), DI
  18      MOVQ y+8(FP), SI
  19      MOVQ n+16(FP), BX
  20      cswap(0(DI),0(SI),BX)
  21      RET
  22  
  23  // func subAmd64(z, x, y *Elt)
  24  TEXT ·subAmd64(SB),NOSPLIT,$0-24
  25      MOVQ z+0(FP), DI
  26      MOVQ x+8(FP), SI
  27      MOVQ y+16(FP), BX
  28      subtraction(0(DI),0(SI),0(BX))
  29      RET
  30  
  31  // func addsubAmd64(x, y *Elt)
  32  TEXT ·addsubAmd64(SB),NOSPLIT,$0-16
  33      MOVQ x+0(FP), DI
  34      MOVQ y+8(FP), SI
  35      addSub(0(DI),0(SI))
  36      RET
  37  
  38  #define addLegacy \
  39      additionLeg(0(DI),0(SI),0(BX))
  40  #define addBmi2Adx \
  41      additionAdx(0(DI),0(SI),0(BX))
  42  
  43  #define mulLegacy \
  44      integerMulLeg(0(SP),0(SI),0(BX)) \
  45      reduceFromDoubleLeg(0(DI),0(SP))
  46  #define mulBmi2Adx \
  47      integerMulAdx(0(SP),0(SI),0(BX)) \
  48      reduceFromDoubleAdx(0(DI),0(SP))
  49  
  50  #define sqrLegacy \
  51      integerSqrLeg(0(SP),0(SI)) \
  52      reduceFromDoubleLeg(0(DI),0(SP))
  53  #define sqrBmi2Adx \
  54      integerSqrAdx(0(SP),0(SI)) \
  55      reduceFromDoubleAdx(0(DI),0(SP))
  56  
  57  // func addAmd64(z, x, y *Elt)
  58  TEXT ·addAmd64(SB),NOSPLIT,$0-24
  59      MOVQ z+0(FP), DI
  60      MOVQ x+8(FP), SI
  61      MOVQ y+16(FP), BX
  62      CHECK_BMI2ADX(LADD, addLegacy, addBmi2Adx)
  63  
  64  // func mulAmd64(z, x, y *Elt)
  65  TEXT ·mulAmd64(SB),NOSPLIT,$64-24
  66      MOVQ z+0(FP), DI
  67      MOVQ x+8(FP), SI
  68      MOVQ y+16(FP), BX
  69      CHECK_BMI2ADX(LMUL, mulLegacy, mulBmi2Adx)
  70  
  71  // func sqrAmd64(z, x *Elt)
  72  TEXT ·sqrAmd64(SB),NOSPLIT,$64-16
  73      MOVQ z+0(FP), DI
  74      MOVQ x+8(FP), SI
  75      CHECK_BMI2ADX(LSQR, sqrLegacy, sqrBmi2Adx)
  76  
  77  // func modpAmd64(z *Elt)
  78  TEXT ·modpAmd64(SB),NOSPLIT,$0-8
  79      MOVQ z+0(FP), DI
  80  
  81      MOVQ   (DI),  R8
  82      MOVQ  8(DI),  R9
  83      MOVQ 16(DI), R10
  84      MOVQ 24(DI), R11
  85  
  86      MOVL $19, AX
  87      MOVL $38, CX
  88  
  89      BTRQ $63, R11 // PUT BIT 255 IN CARRY FLAG AND CLEAR
  90      CMOVLCC AX, CX // C[255] ? 38 : 19
  91  
  92      // ADD EITHER 19 OR 38 TO C
  93      ADDQ CX,  R8
  94      ADCQ $0,  R9
  95      ADCQ $0, R10
  96      ADCQ $0, R11
  97  
  98      // TEST FOR BIT 255 AGAIN; ONLY TRIGGERED ON OVERFLOW MODULO 2^255-19
  99      MOVL     $0,  CX
 100      CMOVLPL  AX,  CX // C[255] ? 0 : 19
 101      BTRQ    $63, R11 // CLEAR BIT 255
 102  
 103      // SUBTRACT 19 IF NECESSARY
 104      SUBQ CX,  R8
 105      MOVQ  R8,   (DI)
 106      SBBQ $0,  R9
 107      MOVQ  R9,  8(DI)
 108      SBBQ $0, R10
 109      MOVQ R10, 16(DI)
 110      SBBQ $0, R11
 111      MOVQ R11, 24(DI)
 112      RET
 113