exp_loong64.s raw

   1  // Copyright 2025 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  #include "textflag.h"
   6  
   7  #define NearZero	0x3e30000000000000	// 2**-28
   8  #define PosInf		0x7ff0000000000000
   9  #define FracMask	0x000fffffffffffff
  10  #define C1		0x3cb0000000000000	// 2**-52
  11  
  12  DATA exprodata<>+0(SB)/8, $0.0
  13  DATA exprodata<>+8(SB)/8, $0.5
  14  DATA exprodata<>+16(SB)/8, $1.0
  15  DATA exprodata<>+24(SB)/8, $2.0
  16  DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01	// Ln2Hi
  17  DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10	// Ln2Lo
  18  DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00	// Log2e
  19  DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02	// Overflow
  20  DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02	// Underflow
  21  DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03	// Overflow2
  22  DATA exprodata<>+80(SB)/8, $-1.0740e+03			// Underflow2
  23  DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09	// NearZero
  24  GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96
  25  
  26  DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01	// P1
  27  DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03	// P2
  28  DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05	// P3
  29  DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06	// P4
  30  DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08	// P5
  31  GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40
  32  
  33  // Exp returns e**x, the base-e exponential of x.
  34  // This is an assembly implementation of the method used for function Exp in file exp.go.
  35  //
  36  // func Exp(x float64) float64
  37  TEXT ·archExp(SB),$0-16
  38  	MOVD	x+0(FP), F0	// F0 = x
  39  
  40  	MOVV	$exprodata<>+0(SB), R10
  41  	MOVD	56(R10), F1	// Overflow
  42  	MOVD	64(R10), F2	// Underflow
  43  	MOVD	88(R10), F3	// NearZero
  44  	MOVD	16(R10), F17	// 1.0
  45  
  46  	CMPEQD	F0, F0, FCC0
  47  	BFPF	isNaN		// x = NaN, return NaN
  48  
  49  	CMPGTD	F0, F1, FCC0
  50  	BFPT	overflow	// x > Overflow, return PosInf
  51  
  52  	CMPGTD	F2, F0, FCC0
  53  	BFPT	underflow	// x < Underflow, return 0
  54  
  55  	ABSD	F0, F5
  56  	CMPGTD	F3, F5, FCC0
  57  	BFPT	nearzero	// fabs(x) < NearZero, return 1 + x
  58  
  59  	// argument reduction, x = k*ln2 + r,  |r| <= 0.5*ln2
  60  	// computed as r = hi - lo for extra precision.
  61  	MOVD	0(R10), F5
  62  	MOVD	8(R10), F3
  63  	MOVD	48(R10), F2
  64  	CMPGTD	F0, F5, FCC0
  65  	BFPT	add		// x > 0
  66  sub:
  67  	FMSUBD	F3, F2, F0, F3	// Log2e*x - 0.5
  68  	JMP	2(PC)
  69  add:
  70  	FMADDD	F3, F2, F0, F3	// Log2e*x + 0.5
  71  
  72  	FTINTRZVD F3, F4	// float64 -> int64
  73  	MOVV	F4, R5		// R5 = int(k)
  74  	FFINTDV	F4, F3		// int64 -> float64
  75  
  76  	MOVD	32(R10), F4
  77  	MOVD	40(R10), F5
  78  	FNMSUBD	F0, F3, F4, F4
  79  	MULD	F3, F5, F5
  80  	SUBD	F5, F4, F6
  81  	MULD	F6, F6, F7
  82  
  83  	// compute c
  84  	MOVV	$expmultirodata<>+0(SB), R11
  85  	MOVD	32(R11), F8
  86  	MOVD	24(R11), F9
  87  	FMADDD	F9, F8, F7, F13
  88  	MOVD	16(R11), F10
  89  	FMADDD	F10, F13, F7, F13
  90  	MOVD	8(R11), F11
  91  	FMADDD	F11, F13, F7, F13
  92  	MOVD	0(R11), F12
  93  	FMADDD	F12, F13, F7, F13
  94  	FNMSUBD	F6, F13, F7, F13
  95  
  96  	// compute y
  97  	MOVD	24(R10), F14
  98  	SUBD	F13, F14, F14
  99  	MULD	F6, F13, F15
 100  	DIVD	F14, F15, F15
 101  	SUBD	F15, F5, F15
 102  	SUBD	F4, F15, F15
 103  	SUBD	F15, F17, F16
 104  
 105  	// inline Ldexp(y, k), benefit:
 106  	// 1, no parameter pass overhead.
 107  	// 2, skip unnecessary checks for Inf/NaN/Zero
 108  	MOVV	F16, R4
 109  	MOVV	$FracMask, R9
 110  	AND	R9, R4, R6	// fraction
 111  	SRLV	$52, R4, R7	// exponent
 112  	ADDV	R5, R7
 113  	MOVV	$1, R12
 114  	BGE	R7, R12, normal
 115  	ADDV	$52, R7		// denormal
 116  	MOVV	$C1, R8
 117  	MOVV	R8, F17
 118  normal:
 119  	SLLV	$52, R7
 120  	OR	R7, R6, R4
 121  	MOVV	R4, F0
 122  	MULD	F17, F0		// return m * x
 123  	MOVD	F0, ret+8(FP)
 124  	RET
 125  nearzero:
 126  	ADDD	F17, F0, F0
 127  isNaN:
 128  	MOVD	F0, ret+8(FP)
 129  	RET
 130  underflow:
 131  	MOVV	R0, ret+8(FP)
 132  	RET
 133  overflow:
 134  	MOVV	$PosInf, R4
 135  	MOVV	R4, ret+8(FP)
 136  	RET
 137  
 138  
 139  // Exp2 returns 2**x, the base-2 exponential of x.
 140  // This is an assembly implementation of the method used for function Exp2 in file exp.go.
 141  //
 142  // func Exp2(x float64) float64
 143  TEXT ·archExp2(SB),$0-16
 144  	MOVD	x+0(FP), F0	// F0 = x
 145  
 146  	MOVV	$exprodata<>+0(SB), R10
 147  	MOVD	72(R10), F1	// Overflow2
 148  	MOVD	80(R10), F2	// Underflow2
 149  	MOVD	88(R10), F3	// NearZero
 150  
 151  	CMPEQD	F0, F0, FCC0
 152  	BFPF	isNaN		// x = NaN, return NaN
 153  
 154  	CMPGTD	F0, F1, FCC0
 155  	BFPT	overflow	// x > Overflow, return PosInf
 156  
 157  	CMPGTD	F2, F0, FCC0
 158  	BFPT	underflow	// x < Underflow, return 0
 159  
 160  	// argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
 161  	// computed as r = hi - lo for extra precision.
 162  	MOVD	0(R10), F10
 163  	MOVD	8(R10), F2
 164  	CMPGTD	F0, F10, FCC0
 165  	BFPT	add
 166  sub:
 167  	SUBD	F2, F0, F3	// x - 0.5
 168  	JMP	2(PC)
 169  add:
 170  	ADDD	F2, F0, F3	// x + 0.5
 171  
 172  	FTINTRZVD F3, F4
 173  	MOVV	F4, R5
 174  	FFINTDV	F4, F3
 175  
 176  	MOVD	32(R10), F4
 177  	MOVD	40(R10), F5
 178  	SUBD	F3, F0, F3
 179  	MULD	F3, F4
 180  	FNMSUBD	F10, F3, F5, F5
 181  	SUBD	F5, F4, F6
 182  	MULD	F6, F6, F7
 183  
 184  	// compute c
 185  	MOVV	$expmultirodata<>+0(SB), R11
 186  	MOVD	32(R11), F8
 187  	MOVD	24(R11), F9
 188  	FMADDD	F9, F8, F7, F13
 189  	MOVD	16(R11), F10
 190  	FMADDD	F10, F13, F7, F13
 191  	MOVD	8(R11), F11
 192  	FMADDD	F11, F13, F7, F13
 193  	MOVD	0(R11), F12
 194  	FMADDD	F12, F13, F7, F13
 195  	FNMSUBD	F6, F13, F7, F13
 196  
 197  	// compute y
 198  	MOVD	24(R10), F14
 199  	SUBD	F13, F14, F14
 200  	MULD	F6, F13, F15
 201  	DIVD	F14, F15
 202  
 203  	MOVD	16(R10), F17
 204  	SUBD	F15, F5, F15
 205  	SUBD	F4, F15, F15
 206  	SUBD	F15, F17, F16
 207  
 208  	// inline Ldexp(y, k), benefit:
 209  	// 1, no parameter pass overhead.
 210  	// 2, skip unnecessary checks for Inf/NaN/Zero
 211  	MOVV	F16, R4
 212  	MOVV	$FracMask, R9
 213  	SRLV	$52, R4, R7	// exponent
 214  	AND	R9, R4, R6	// fraction
 215  	ADDV	R5, R7
 216  	MOVV	$1, R12
 217  	BGE	R7, R12, normal
 218  
 219  	ADDV	$52, R7		// denormal
 220  	MOVV	$C1, R8
 221  	MOVV	R8, F17
 222  normal:
 223  	SLLV	$52, R7
 224  	OR	R7, R6, R4
 225  	MOVV	R4, F0
 226  	MULD	F17, F0
 227  isNaN:
 228  	MOVD	F0, ret+8(FP)
 229  	RET
 230  underflow:
 231  	MOVV	R0, ret+8(FP)
 232  	RET
 233  overflow:
 234  	MOVV	$PosInf, R4
 235  	MOVV	R4, ret+8(FP)
 236  	RET
 237