sqrtf.c raw

   1  #include <stdint.h>
   2  #include <math.h>
   3  #include "libm.h"
   4  #include "sqrt_data.h"
   5  
   6  #define FENV_SUPPORT 1
   7  
   8  static inline uint32_t mul32(uint32_t a, uint32_t b)
   9  {
  10  	return (uint64_t)a*b >> 32;
  11  }
  12  
  13  /* see sqrt.c for more detailed comments.  */
  14  
  15  float sqrtf(float x)
  16  {
  17  	uint32_t ix, m, m1, m0, even, ey;
  18  
  19  	ix = asuint(x);
  20  	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
  21  		/* x < 0x1p-126 or inf or nan.  */
  22  		if (ix * 2 == 0)
  23  			return x;
  24  		if (ix == 0x7f800000)
  25  			return x;
  26  		if (ix > 0x7f800000)
  27  			return __math_invalidf(x);
  28  		/* x is subnormal, normalize it.  */
  29  		ix = asuint(x * 0x1p23f);
  30  		ix -= 23 << 23;
  31  	}
  32  
  33  	/* x = 4^e m; with int e and m in [1, 4).  */
  34  	even = ix & 0x00800000;
  35  	m1 = (ix << 8) | 0x80000000;
  36  	m0 = (ix << 7) & 0x7fffffff;
  37  	m = even ? m0 : m1;
  38  
  39  	/* 2^e is the exponent part of the return value.  */
  40  	ey = ix >> 1;
  41  	ey += 0x3f800000 >> 1;
  42  	ey &= 0x7f800000;
  43  
  44  	/* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations.  */
  45  	static const uint32_t three = 0xc0000000;
  46  	uint32_t r, s, d, u, i;
  47  	i = (ix >> 17) % 128;
  48  	r = (uint32_t)__rsqrt_tab[i] << 16;
  49  	/* |r*sqrt(m) - 1| < 0x1p-8 */
  50  	s = mul32(m, r);
  51  	/* |s/sqrt(m) - 1| < 0x1p-8 */
  52  	d = mul32(s, r);
  53  	u = three - d;
  54  	r = mul32(r, u) << 1;
  55  	/* |r*sqrt(m) - 1| < 0x1.7bp-16 */
  56  	s = mul32(s, u) << 1;
  57  	/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
  58  	d = mul32(s, r);
  59  	u = three - d;
  60  	s = mul32(s, u);
  61  	/* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */
  62  	s = (s - 1)>>6;
  63  	/* s < sqrt(m) < s + 0x1.08p-23 */
  64  
  65  	/* compute nearest rounded result.  */
  66  	uint32_t d0, d1, d2;
  67  	float y, t;
  68  	d0 = (m << 16) - s*s;
  69  	d1 = s - d0;
  70  	d2 = d1 + s + 1;
  71  	s += d1 >> 31;
  72  	s &= 0x007fffff;
  73  	s |= ey;
  74  	y = asfloat(s);
  75  	if (FENV_SUPPORT) {
  76  		/* handle rounding and inexact exception. */
  77  		uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000;
  78  		tiny |= (d1^d2) & 0x80000000;
  79  		t = asfloat(tiny);
  80  		y = eval_as_float(y + t);
  81  	}
  82  	return y;
  83  }
  84