tanh_s390x.s raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  #include "textflag.h"
   6  
   7  // Minimax polynomial approximations
   8  DATA tanhrodataL18<>+0(SB)/8, $-1.0
   9  DATA tanhrodataL18<>+8(SB)/8, $-2.0
  10  DATA tanhrodataL18<>+16(SB)/8, $1.0
  11  DATA tanhrodataL18<>+24(SB)/8, $2.0
  12  DATA tanhrodataL18<>+32(SB)/8, $0.20000000000000011868E+01
  13  DATA tanhrodataL18<>+40(SB)/8, $0.13333333333333341256E+01
  14  DATA tanhrodataL18<>+48(SB)/8, $0.26666666663549111502E+00
  15  DATA tanhrodataL18<>+56(SB)/8, $0.66666666658721844678E+00
  16  DATA tanhrodataL18<>+64(SB)/8, $0.88890217768964374821E-01
  17  DATA tanhrodataL18<>+72(SB)/8, $0.25397199429103821138E-01
  18  DATA tanhrodataL18<>+80(SB)/8, $-.346573590279972643E+00
  19  DATA tanhrodataL18<>+88(SB)/8, $20.E0
  20  GLOBL tanhrodataL18<>+0(SB), RODATA, $96
  21  
  22  // Constants
  23  DATA tanhrlog2<>+0(SB)/8, $0x4007154760000000
  24  GLOBL tanhrlog2<>+0(SB), RODATA, $8
  25  DATA tanhxadd<>+0(SB)/8, $0xc2f0000100003ff0
  26  GLOBL tanhxadd<>+0(SB), RODATA, $8
  27  DATA tanhxmone<>+0(SB)/8, $-1.0
  28  GLOBL tanhxmone<>+0(SB), RODATA, $8
  29  DATA tanhxzero<>+0(SB)/8, $0
  30  GLOBL tanhxzero<>+0(SB), RODATA, $8
  31  
  32  // Polynomial coefficients
  33  DATA tanhtab<>+0(SB)/8, $0.000000000000000000E+00
  34  DATA tanhtab<>+8(SB)/8, $-.171540871271399150E-01
  35  DATA tanhtab<>+16(SB)/8, $-.306597931864376363E-01
  36  DATA tanhtab<>+24(SB)/8, $-.410200970469965021E-01
  37  DATA tanhtab<>+32(SB)/8, $-.486343079978231466E-01
  38  DATA tanhtab<>+40(SB)/8, $-.538226193725835820E-01
  39  DATA tanhtab<>+48(SB)/8, $-.568439602538111520E-01
  40  DATA tanhtab<>+56(SB)/8, $-.579091847395528847E-01
  41  DATA tanhtab<>+64(SB)/8, $-.571909584179366341E-01
  42  DATA tanhtab<>+72(SB)/8, $-.548312665987204407E-01
  43  DATA tanhtab<>+80(SB)/8, $-.509471843643441085E-01
  44  DATA tanhtab<>+88(SB)/8, $-.456353588448863359E-01
  45  DATA tanhtab<>+96(SB)/8, $-.389755254243262365E-01
  46  DATA tanhtab<>+104(SB)/8, $-.310332908285244231E-01
  47  DATA tanhtab<>+112(SB)/8, $-.218623539150173528E-01
  48  DATA tanhtab<>+120(SB)/8, $-.115062908917949451E-01
  49  GLOBL tanhtab<>+0(SB), RODATA, $128
  50  
  51  // Tanh returns the hyperbolic tangent of the argument.
  52  //
  53  // Special cases are:
  54  //      Tanh(±0) = ±0
  55  //      Tanh(±Inf) = ±1
  56  //      Tanh(NaN) = NaN
  57  // The algorithm used is minimax polynomial approximation using a table of
  58  // polynomial coefficients determined with a Remez exchange algorithm.
  59  
  60  TEXT ·tanhAsm(SB),NOSPLIT,$0-16
  61  	FMOVD   x+0(FP), F0
  62  	// special case Tanh(±0) = ±0
  63  	FMOVD   $(0.0), F1
  64  	FCMPU   F0, F1
  65  	BEQ     tanhIsZero
  66  	MOVD    $tanhrodataL18<>+0(SB), R5
  67  	LTDBR	F0, F0
  68  	MOVD    $0x4034000000000000, R1
  69  	BLTU    L15
  70  	FMOVD   F0, F1
  71  L2:
  72  	MOVD    $tanhxadd<>+0(SB), R2
  73  	FMOVD   0(R2), F2
  74  	MOVD    tanhrlog2<>+0(SB), R2
  75  	LDGR    R2, F4
  76  	WFMSDB  V0, V4, V2, V4
  77  	MOVD    $tanhtab<>+0(SB), R3
  78  	LGDR    F4, R2
  79  	RISBGZ	$57, $60, $3, R2, R4
  80  	WORD    $0xED105058     //cdb %f1,.L19-.L18(%r5)
  81  	BYTE    $0x00
  82  	BYTE    $0x19
  83  	RISBGN	$0, $15, $48, R2, R1
  84  	WORD    $0x68543000     //ld %f5,0(%r4,%r3)
  85  	LDGR    R1, F6
  86  	BLT     L3
  87  	MOVD    $tanhxzero<>+0(SB), R1
  88  	FMOVD   0(R1), F2
  89  	WFCHDBS V0, V2, V4
  90  	BEQ     L9
  91  	WFCHDBS V2, V0, V2
  92  	BNE     L1
  93  	MOVD    $tanhxmone<>+0(SB), R1
  94  	FMOVD   0(R1), F0
  95  	FMOVD   F0, ret+8(FP)
  96  	RET
  97  
  98  L3:
  99  	FADD    F4, F2
 100  	FMOVD   tanhrodataL18<>+80(SB), F4
 101  	FMADD   F4, F2, F0
 102  	FMOVD   tanhrodataL18<>+72(SB), F1
 103  	WFMDB   V0, V0, V3
 104  	FMOVD   tanhrodataL18<>+64(SB), F2
 105  	WFMADB  V0, V1, V2, V1
 106  	FMOVD   tanhrodataL18<>+56(SB), F4
 107  	FMOVD   tanhrodataL18<>+48(SB), F2
 108  	WFMADB  V1, V3, V4, V1
 109  	FMOVD   tanhrodataL18<>+40(SB), F4
 110  	WFMADB  V3, V2, V4, V2
 111  	FMOVD   tanhrodataL18<>+32(SB), F4
 112  	WORD    $0xB9270022     //lhr %r2,%r2
 113  	WFMADB  V3, V1, V4, V1
 114  	FMOVD   tanhrodataL18<>+24(SB), F4
 115  	WFMADB  V3, V2, V4, V3
 116  	WFMADB  V0, V5, V0, V2
 117  	WFMADB  V0, V1, V3, V0
 118  	WORD    $0xA7183ECF     //lhi %r1,16079
 119  	WFMADB  V0, V2, V5, V2
 120  	FMUL    F6, F2
 121  	MOVW    R2, R10
 122  	MOVW    R1, R11
 123  	CMPBLE  R10, R11, L16
 124  	FMOVD   F6, F0
 125  	WORD    $0xED005010     //adb %f0,.L28-.L18(%r5)
 126  	BYTE    $0x00
 127  	BYTE    $0x1A
 128  	WORD    $0xA7184330     //lhi %r1,17200
 129  	FADD    F2, F0
 130  	MOVW    R2, R10
 131  	MOVW    R1, R11
 132  	CMPBGT  R10, R11, L17
 133  	WORD    $0xED605010     //sdb %f6,.L28-.L18(%r5)
 134  	BYTE    $0x00
 135  	BYTE    $0x1B
 136  	FADD    F6, F2
 137  	WFDDB   V0, V2, V0
 138  	FMOVD   F0, ret+8(FP)
 139  	RET
 140  
 141  L9:
 142  	FMOVD   tanhrodataL18<>+16(SB), F0
 143  L1:
 144  	FMOVD   F0, ret+8(FP)
 145  	RET
 146  
 147  L15:
 148  	FNEG    F0, F1
 149  	BR      L2
 150  L16:
 151  	FADD    F6, F2
 152  	FMOVD   tanhrodataL18<>+8(SB), F0
 153  	FMADD   F4, F2, F0
 154  	FMOVD   tanhrodataL18<>+0(SB), F4
 155  	FNEG    F0, F0
 156  	WFMADB  V0, V2, V4, V0
 157  	FMOVD   F0, ret+8(FP)
 158  	RET
 159  
 160  L17:
 161  	WFDDB   V0, V4, V0
 162  	FMOVD   tanhrodataL18<>+16(SB), F2
 163  	WFSDB   V0, V2, V0
 164  	FMOVD   F0, ret+8(FP)
 165  	RET
 166  
 167  tanhIsZero:      //return ±0
 168  	FMOVD   F0, ret+8(FP)
 169  	RET
 170