atan2_s390x.s raw

   1  // Copyright 2017 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  #include "textflag.h"
   6  
   7  #define PosInf		0x7FF0000000000000
   8  #define NegInf		0xFFF0000000000000
   9  #define NegZero		0x8000000000000000
  10  #define Pi		0x400921FB54442D18
  11  #define NegPi		0xC00921FB54442D18
  12  #define Pi3Div4		0x4002D97C7F3321D2	// 3Pi/4
  13  #define NegPi3Div4	0xC002D97C7F3321D2	// -3Pi/4
  14  #define PiDiv4		0x3FE921FB54442D18	// Pi/4
  15  #define NegPiDiv4	0xBFE921FB54442D18	// -Pi/4
  16  
  17  // Minimax polynomial coefficients and other constants
  18  DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
  19  DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
  20  DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
  21  DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
  22  DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
  23  DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
  24  DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
  25  DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
  26  DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
  27  DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
  28  DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
  29  DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
  30  DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
  31  DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
  32  DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
  33  DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
  34  DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
  35  DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
  36  DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
  37  DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
  38  GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
  39  
  40  DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
  41  DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
  42  DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
  43  DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
  44  GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
  45  DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
  46  GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
  47  
  48  // Atan2 returns the arc tangent of y/x, using
  49  // the signs of the two to determine the quadrant
  50  // of the return value.
  51  //
  52  // Special cases are (in order):
  53  //      Atan2(y, NaN) = NaN
  54  //      Atan2(NaN, x) = NaN
  55  //      Atan2(+0, x>=0) = +0
  56  //      Atan2(-0, x>=0) = -0
  57  //      Atan2(+0, x<=-0) = +Pi
  58  //      Atan2(-0, x<=-0) = -Pi
  59  //      Atan2(y>0, 0) = +Pi/2
  60  //      Atan2(y<0, 0) = -Pi/2
  61  //      Atan2(+Inf, +Inf) = +Pi/4
  62  //      Atan2(-Inf, +Inf) = -Pi/4
  63  //      Atan2(+Inf, -Inf) = 3Pi/4
  64  //      Atan2(-Inf, -Inf) = -3Pi/4
  65  //      Atan2(y, +Inf) = 0
  66  //      Atan2(y>0, -Inf) = +Pi
  67  //      Atan2(y<0, -Inf) = -Pi
  68  //      Atan2(+Inf, x) = +Pi/2
  69  //      Atan2(-Inf, x) = -Pi/2
  70  // The algorithm used is minimax polynomial approximation
  71  // with coefficients determined with a Remez exchange algorithm.
  72  
  73  TEXT	·atan2Asm(SB), NOSPLIT, $0-24
  74  	// special case
  75  	MOVD	x+0(FP), R1
  76  	MOVD	y+8(FP), R2
  77  
  78  	// special case Atan2(NaN, y) = NaN
  79  	MOVD	$~(1<<63), R5
  80  	AND	R1, R5		// x = |x|
  81  	MOVD	$PosInf, R3
  82  	CMPUBLT	R3, R5, returnX
  83  
  84  	// special case Atan2(x, NaN) = NaN
  85  	MOVD	$~(1<<63), R5
  86  	AND	R2, R5
  87  	CMPUBLT R3, R5, returnY
  88  
  89  	MOVD	$NegZero, R3
  90  	CMPUBEQ	R3, R1, xIsNegZero
  91  
  92  	MOVD	$0, R3
  93  	CMPUBEQ	R3, R1, xIsPosZero
  94  
  95  	MOVD	$PosInf, R4
  96  	CMPUBEQ	R4, R2, yIsPosInf
  97  
  98  	MOVD	$NegInf, R4
  99  	CMPUBEQ	R4, R2, yIsNegInf
 100  	BR	Normal
 101  xIsNegZero:
 102  	// special case Atan(-0, y>=0) = -0
 103  	MOVD	$0, R4
 104  	CMPBLE	R4, R2, returnX
 105  
 106  	//special case Atan2(-0, y<=-0) = -Pi
 107  	MOVD	$NegZero, R4
 108  	CMPBGE	R4, R2, returnNegPi
 109  	BR	Normal
 110  xIsPosZero:
 111  	//special case Atan2(0, 0) = 0
 112  	MOVD	$0, R4
 113  	CMPUBEQ	R4, R2, returnX
 114  
 115  	//special case Atan2(0, y<=-0) = Pi
 116  	MOVD	$NegZero, R4
 117  	CMPBGE	R4, R2, returnPi
 118  	BR Normal
 119  yIsNegInf:
 120  	//special case Atan2(+Inf, -Inf) = 3Pi/4
 121  	MOVD	$PosInf, R3
 122  	CMPUBEQ	R3, R1, posInfNegInf
 123  
 124  	//special case Atan2(-Inf, -Inf) = -3Pi/4
 125  	MOVD	$NegInf, R3
 126  	CMPUBEQ	R3, R1, negInfNegInf
 127  	BR Normal
 128  yIsPosInf:
 129  	//special case Atan2(+Inf, +Inf) = Pi/4
 130  	MOVD	$PosInf, R3
 131  	CMPUBEQ	R3, R1, posInfPosInf
 132  
 133  	//special case Atan2(-Inf, +Inf) = -Pi/4
 134  	MOVD	$NegInf, R3
 135  	CMPUBEQ	R3, R1, negInfPosInf
 136  
 137  	//special case Atan2(x, +Inf) = Copysign(0, x)
 138  	CMPBLT	R1, $0, returnNegZero
 139  	BR returnPosZero
 140  
 141  Normal:
 142  	FMOVD	x+0(FP), F0
 143  	FMOVD	y+8(FP), F2
 144  	MOVD	$·atan2rodataL25<>+0(SB), R9
 145  	LGDR	F0, R2
 146  	LGDR	F2, R1
 147  	RISBGNZ	$32, $63, $32, R2, R2
 148  	RISBGNZ	$32, $63, $32, R1, R1
 149  	WORD	$0xB9170032	//llgtr	%r3,%r2
 150  	RISBGZ	$63, $63, $33, R2, R5
 151  	WORD	$0xB9170041	//llgtr	%r4,%r1
 152  	WFLCDB	V0, V20
 153  	MOVW	R4, R6
 154  	MOVW	R3, R7
 155  	CMPUBLT	R6, R7, L17
 156  	WFDDB	V2, V0, V3
 157  	ADDW	$2, R5, R2
 158  	MOVW	R4, R6
 159  	MOVW	R3, R7
 160  	CMPUBLE	R6, R7, L20
 161  L3:
 162  	WFMDB	V3, V3, V4
 163  	VLEG	$0, 152(R9), V18
 164  	VLEG	$0, 144(R9), V16
 165  	FMOVD	136(R9), F1
 166  	FMOVD	128(R9), F5
 167  	FMOVD	120(R9), F6
 168  	WFMADB	V4, V16, V5, V16
 169  	WFMADB	V4, V6, V1, V6
 170  	FMOVD	112(R9), F7
 171  	WFMDB	V4, V4, V1
 172  	WFMADB	V4, V7, V18, V7
 173  	VLEG	$0, 104(R9), V18
 174  	WFMADB	V1, V6, V16, V6
 175  	CMPWU	R4, R3
 176  	FMOVD	96(R9), F5
 177  	VLEG	$0, 88(R9), V16
 178  	WFMADB	V4, V5, V18, V5
 179  	VLEG	$0, 80(R9), V18
 180  	VLEG	$0, 72(R9), V22
 181  	WFMADB	V4, V16, V18, V16
 182  	VLEG	$0, 64(R9), V18
 183  	WFMADB	V1, V7, V5, V7
 184  	WFMADB	V4, V18, V22, V18
 185  	WFMDB	V1, V1, V5
 186  	WFMADB	V1, V16, V18, V16
 187  	VLEG	$0, 56(R9), V18
 188  	WFMADB	V5, V6, V7, V6
 189  	VLEG	$0, 48(R9), V22
 190  	FMOVD	40(R9), F7
 191  	WFMADB	V4, V7, V18, V7
 192  	VLEG	$0, 32(R9), V18
 193  	WFMADB	V5, V6, V16, V6
 194  	WFMADB	V4, V18, V22, V18
 195  	VLEG	$0, 24(R9), V16
 196  	WFMADB	V1, V7, V18, V7
 197  	VLEG	$0, 16(R9), V18
 198  	VLEG	$0, 8(R9), V22
 199  	WFMADB	V4, V18, V16, V18
 200  	VLEG	$0, 0(R9), V16
 201  	WFMADB	V5, V6, V7, V6
 202  	WFMADB	V4, V16, V22, V16
 203  	FMUL	F3, F4
 204  	WFMADB	V1, V18, V16, V1
 205  	FMADD	F6, F5, F1
 206  	WFMADB	V4, V1, V3, V4
 207  	BLT	L18
 208  	BGT	L7
 209  	LTDBR	F2, F2
 210  	BLTU	L21
 211  L8:
 212  	LTDBR	F0, F0
 213  	BLTU	L22
 214  L9:
 215  	WFCHDBS	V2, V0, V0
 216  	BNE	L18
 217  L7:
 218  	MOVW	R1, R6
 219  	CMPBGE	R6, $0, L1
 220  L18:
 221  	RISBGZ	$58, $60, $3, R2, R2
 222  	MOVD	$·atan2xpi2h<>+0(SB), R1
 223  	MOVD	·atan2xpim<>+0(SB), R3
 224  	LDGR	R3, F0
 225  	WORD	$0xED021000	//madb	%f4,%f0,0(%r2,%r1)
 226  	BYTE	$0x40
 227  	BYTE	$0x1E
 228  L1:
 229  	FMOVD	F4, ret+16(FP)
 230  	RET
 231  
 232  L20:
 233  	LTDBR	F2, F2
 234  	BLTU	L23
 235  	FMOVD	F2, F6
 236  L4:
 237  	LTDBR	F0, F0
 238  	BLTU	L24
 239  	FMOVD	F0, F4
 240  L5:
 241  	WFCHDBS	V6, V4, V4
 242  	BEQ	L3
 243  L17:
 244  	WFDDB	V0, V2, V4
 245  	BYTE	$0x18	//lr	%r2,%r5
 246  	BYTE	$0x25
 247  	LCDBR	F4, F3
 248  	BR	L3
 249  L23:
 250  	LCDBR   F2, F6
 251  	BR	L4
 252  L22:
 253  	VLR	V20, V0
 254  	BR	L9
 255  L21:
 256  	LCDBR   F2, F2
 257  	BR	L8
 258  L24:
 259  	VLR	V20, V4
 260  	BR	L5
 261  returnX:	//the result is same as the first argument
 262  	MOVD	R1, ret+16(FP)
 263  	RET
 264  returnY:	//the result is same as the second argument
 265  	MOVD	R2, ret+16(FP)
 266  	RET
 267  returnPi:
 268  	MOVD	$Pi, R1
 269  	MOVD	R1, ret+16(FP)
 270  	RET
 271  returnNegPi:
 272  	MOVD	$NegPi, R1
 273  	MOVD	R1, ret+16(FP)
 274  	RET
 275  posInfNegInf:
 276  	MOVD	$Pi3Div4, R1
 277  	MOVD	R1, ret+16(FP)
 278  	RET
 279  negInfNegInf:
 280  	MOVD	$NegPi3Div4, R1
 281  	MOVD	R1, ret+16(FP)
 282  	RET
 283  posInfPosInf:
 284  	MOVD	$PiDiv4, R1
 285  	MOVD	R1, ret+16(FP)
 286  	RET
 287  negInfPosInf:
 288  	MOVD	$NegPiDiv4, R1
 289  	MOVD	R1, ret+16(FP)
 290  	RET
 291  returnNegZero:
 292  	MOVD	$NegZero, R1
 293  	MOVD	R1, ret+16(FP)
 294  	RET
 295  returnPosZero:
 296  	MOVD	$0, ret+16(FP)
 297  	RET
 298