cbrt_s390x.s raw

   1  // Copyright 2017 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  #include "textflag.h"
   6  
   7  // Minimax polynomial coefficients and other constants
   8  DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00
   9  DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00
  10  DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00
  11  DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00
  12  DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00
  13  DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00
  14  DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625
  15  DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00
  16  DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336.
  17  GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72
  18  
  19  // Index tables
  20  DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202
  21  DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000
  22  DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605
  23  DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303
  24  DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a
  25  DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808
  26  DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f
  27  DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d
  28  DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312
  29  DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010
  30  GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80
  31  
  32  DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141
  33  DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130
  34  DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112
  35  DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101
  36  DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0
  37  DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2
  38  DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1
  39  DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0
  40  DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092
  41  DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081
  42  DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070
  43  DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052
  44  DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041
  45  DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030
  46  DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012
  47  DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001
  48  GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128
  49  
  50  DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1
  51  DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90
  52  DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532
  53  DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1
  54  DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90
  55  DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532
  56  DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1
  57  DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90
  58  DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1
  59  DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90
  60  DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532
  61  DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1
  62  DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90
  63  DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532
  64  DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1
  65  DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90
  66  GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128
  67  
  68  // Cbrt returns the cube root of the argument.
  69  //
  70  // Special cases are:
  71  //      Cbrt(±0) = ±0
  72  //      Cbrt(±Inf) = ±Inf
  73  //      Cbrt(NaN) = NaN
  74  // The algorithm used is minimax polynomial approximation
  75  // with coefficients determined with a Remez exchange algorithm.
  76  
  77  TEXT	·cbrtAsm(SB), NOSPLIT, $0-16
  78  	FMOVD	x+0(FP), F0
  79  	MOVD	$·cbrtrodataL9<>+0(SB), R9
  80  	LGDR	F0, R2
  81  	WORD	$0xC039000F	//iilf	%r3,1048575
  82  	BYTE	$0xFF
  83  	BYTE	$0xFF
  84  	SRAD	$32, R2
  85  	WORD	$0xB9170012	//llgtr	%r1,%r2
  86  	MOVW	R1, R6
  87  	MOVW	R3, R7
  88  	CMPBLE	R6, R7, L2
  89  	WORD	$0xC0397FEF	//iilf	%r3,2146435071
  90  	BYTE	$0xFF
  91  	BYTE	$0xFF
  92  	MOVW	R3, R7
  93  	CMPBLE	R6, R7, L8
  94  L1:
  95  	FMOVD	F0, ret+8(FP)
  96  	RET
  97  L3:
  98  L2:
  99  	LTDBR	F0, F0
 100  	BEQ	L1
 101  	FMOVD	F0, F2
 102  	WORD	$0xED209040	//mdb	%f2,.L10-.L9(%r9)
 103  	BYTE	$0x00
 104  	BYTE	$0x1C
 105  	MOVH	$0x200, R4
 106  	LGDR	F2, R2
 107  	SRAD	$32, R2
 108  L4:
 109  	RISBGZ	$57, $62, $39, R2, R3
 110  	MOVD	$·cbrttab12067<>+0(SB), R1
 111  	WORD	$0x48131000	//lh	%r1,0(%r3,%r1)
 112  	RISBGZ	$57, $62, $45, R2, R3
 113  	MOVD	$·cbrttab22068<>+0(SB), R5
 114  	RISBGNZ	$60, $63, $48, R2, R2
 115  	WORD	$0x4A135000	//ah	%r1,0(%r3,%r5)
 116  	BYTE	$0x18	//lr	%r3,%r1
 117  	BYTE	$0x31
 118  	MOVD	$·cbrttab32069<>+0(SB), R1
 119  	FMOVD	56(R9), F1
 120  	FMOVD	48(R9), F5
 121  	WORD	$0xEC23393B	//rosbg	%r2,%r3,57,59,4
 122  	BYTE	$0x04
 123  	BYTE	$0x56
 124  	WORD	$0xE3121000	//llc	%r1,0(%r2,%r1)
 125  	BYTE	$0x00
 126  	BYTE	$0x94
 127  	ADDW	R3, R1
 128  	ADDW	R4, R1
 129  	SLW	$16, R1, R1
 130  	SLD	$32, R1, R1
 131  	LDGR	R1, F2
 132  	WFMDB	V2, V2, V4
 133  	WFMDB	V4, V0, V6
 134  	WFMSDB	V4, V6, V2, V4
 135  	FMOVD	40(R9), F6
 136  	FMSUB	F1, F4, F2
 137  	FMOVD	32(R9), F4
 138  	WFMDB	V2, V2, V3
 139  	FMOVD	24(R9), F1
 140  	FMUL	F3, F0
 141  	FMOVD	16(R9), F3
 142  	WFMADB	V2, V0, V5, V2
 143  	FMOVD	8(R9), F5
 144  	FMADD	F6, F2, F4
 145  	WFMADB	V2, V1, V3, V1
 146  	WFMDB	V2, V2, V6
 147  	FMOVD	0(R9), F3
 148  	WFMADB	V4, V6, V1, V4
 149  	WFMADB	V2, V5, V3, V2
 150  	FMADD	F4, F6, F2
 151  	FMADD	F2, F0, F0
 152  	FMOVD	F0, ret+8(FP)
 153  	RET
 154  L8:
 155  	MOVH	$0x0, R4
 156  	BR	L4
 157