exp_amd64.s raw

   1  // Copyright 2010 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  #include "textflag.h"
   6  
   7  // The method is based on a paper by Naoki Shibata: "Efficient evaluation
   8  // methods of elementary functions suitable for SIMD computation", Proc.
   9  // of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
  10  // (May 2010). The paper is available at
  11  // https://link.springer.com/article/10.1007/s00450-010-0108-2
  12  //
  13  // The original code and the constants below are from the author's
  14  // implementation available at http://freshmeat.net/projects/sleef.
  15  // The README file says, "The software is in public domain.
  16  // You can use the software without any obligation."
  17  //
  18  // This code is a simplified version of the original.
  19  
  20  #define LN2 0.6931471805599453094172321214581766 // log_e(2)
  21  #define LOG2E 1.4426950408889634073599246810018920 // 1/LN2
  22  #define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2
  23  #define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2
  24  #define PosInf 0x7FF0000000000000
  25  #define NegInf 0xFFF0000000000000
  26  #define Overflow 7.09782712893384e+02
  27  
  28  DATA exprodata<>+0(SB)/8, $0.5
  29  DATA exprodata<>+8(SB)/8, $1.0
  30  DATA exprodata<>+16(SB)/8, $2.0
  31  DATA exprodata<>+24(SB)/8, $1.6666666666666666667e-1
  32  DATA exprodata<>+32(SB)/8, $4.1666666666666666667e-2
  33  DATA exprodata<>+40(SB)/8, $8.3333333333333333333e-3
  34  DATA exprodata<>+48(SB)/8, $1.3888888888888888889e-3
  35  DATA exprodata<>+56(SB)/8, $1.9841269841269841270e-4
  36  DATA exprodata<>+64(SB)/8, $2.4801587301587301587e-5
  37  GLOBL exprodata<>+0(SB), RODATA, $72
  38  
  39  // func Exp(x float64) float64
  40  TEXT ·archExp(SB),NOSPLIT,$0
  41  	// test bits for not-finite
  42  	MOVQ    x+0(FP), BX
  43  	MOVQ    $~(1<<63), AX // sign bit mask
  44  	MOVQ    BX, DX
  45  	ANDQ    AX, DX
  46  	MOVQ    $PosInf, AX
  47  	CMPQ    AX, DX
  48  	JLE     notFinite
  49  	// check if argument will overflow
  50  	MOVQ    BX, X0
  51  	MOVSD   $Overflow, X1
  52  	COMISD  X1, X0
  53  	JA      overflow
  54  	MOVSD   $LOG2E, X1
  55  	MULSD   X0, X1
  56  	CVTSD2SL X1, BX // BX = exponent
  57  	CVTSL2SD BX, X1
  58  	CMPB ·useFMA(SB), $1
  59  	JE   avxfma
  60  	MOVSD   $LN2U, X2
  61  	MULSD   X1, X2
  62  	SUBSD   X2, X0
  63  	MOVSD   $LN2L, X2
  64  	MULSD   X1, X2
  65  	SUBSD   X2, X0
  66  	// reduce argument
  67  	MULSD   $0.0625, X0
  68  	// Taylor series evaluation
  69  	MOVSD   exprodata<>+64(SB), X1
  70  	MULSD   X0, X1
  71  	ADDSD   exprodata<>+56(SB), X1
  72  	MULSD   X0, X1
  73  	ADDSD   exprodata<>+48(SB), X1
  74  	MULSD   X0, X1
  75  	ADDSD   exprodata<>+40(SB), X1
  76  	MULSD   X0, X1
  77  	ADDSD   exprodata<>+32(SB), X1
  78  	MULSD   X0, X1
  79  	ADDSD   exprodata<>+24(SB), X1
  80  	MULSD   X0, X1
  81  	ADDSD   exprodata<>+0(SB), X1
  82  	MULSD   X0, X1
  83  	ADDSD   exprodata<>+8(SB), X1
  84  	MULSD   X1, X0
  85  	MOVSD   exprodata<>+16(SB), X1
  86  	ADDSD   X0, X1
  87  	MULSD   X1, X0
  88  	MOVSD   exprodata<>+16(SB), X1
  89  	ADDSD   X0, X1
  90  	MULSD   X1, X0
  91  	MOVSD   exprodata<>+16(SB), X1
  92  	ADDSD   X0, X1
  93  	MULSD   X1, X0
  94  	MOVSD   exprodata<>+16(SB), X1
  95  	ADDSD   X0, X1
  96  	MULSD   X1, X0
  97  	ADDSD exprodata<>+8(SB), X0
  98  	// return fr * 2**exponent
  99  ldexp:
 100  	ADDL    $0x3FF, BX // add bias
 101  	JLE     denormal
 102  	CMPL    BX, $0x7FF
 103  	JGE     overflow
 104  lastStep:
 105  	SHLQ    $52, BX
 106  	MOVQ    BX, X1
 107  	MULSD   X1, X0
 108  	MOVSD   X0, ret+8(FP)
 109  	RET
 110  notFinite:
 111  	// test bits for -Inf
 112  	MOVQ    $NegInf, AX
 113  	CMPQ    AX, BX
 114  	JNE     notNegInf
 115  	// -Inf, return 0
 116  underflow: // return 0
 117  	MOVQ    $0, ret+8(FP)
 118  	RET
 119  overflow: // return +Inf
 120  	MOVQ    $PosInf, BX
 121  notNegInf: // NaN or +Inf, return x
 122  	MOVQ    BX, ret+8(FP)
 123  	RET
 124  denormal:
 125  	CMPL    BX, $-52
 126  	JL      underflow
 127  	ADDL    $0x3FE, BX // add bias - 1
 128  	SHLQ    $52, BX
 129  	MOVQ    BX, X1
 130  	MULSD   X1, X0
 131  	MOVQ    $1, BX
 132  	JMP     lastStep
 133  
 134  avxfma:
 135  	MOVSD   $LN2U, X2
 136  	VFNMADD231SD X2, X1, X0
 137  	MOVSD   $LN2L, X2
 138  	VFNMADD231SD X2, X1, X0
 139  	// reduce argument
 140  	MULSD   $0.0625, X0
 141  	// Taylor series evaluation
 142  	MOVSD   exprodata<>+64(SB), X1
 143  	VFMADD213SD exprodata<>+56(SB), X0, X1
 144  	VFMADD213SD exprodata<>+48(SB), X0, X1
 145  	VFMADD213SD exprodata<>+40(SB), X0, X1
 146  	VFMADD213SD exprodata<>+32(SB), X0, X1
 147  	VFMADD213SD exprodata<>+24(SB), X0, X1
 148  	VFMADD213SD exprodata<>+0(SB), X0, X1
 149  	VFMADD213SD exprodata<>+8(SB), X0, X1
 150  	MULSD   X1, X0
 151  	VADDSD exprodata<>+16(SB), X0, X1
 152  	MULSD   X1, X0
 153  	VADDSD exprodata<>+16(SB), X0, X1
 154  	MULSD   X1, X0
 155  	VADDSD exprodata<>+16(SB), X0, X1
 156  	MULSD   X1, X0
 157  	VADDSD exprodata<>+16(SB), X0, X1
 158  	VFMADD213SD   exprodata<>+8(SB), X1, X0
 159  	JMP ldexp
 160