powl.c raw

   1  /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
   2  /*
   3   * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
   4   *
   5   * Permission to use, copy, modify, and distribute this software for any
   6   * purpose with or without fee is hereby granted, provided that the above
   7   * copyright notice and this permission notice appear in all copies.
   8   *
   9   * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11   * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16   */
  17  /*                                                      powl.c
  18   *
  19   *      Power function, long double precision
  20   *
  21   *
  22   * SYNOPSIS:
  23   *
  24   * long double x, y, z, powl();
  25   *
  26   * z = powl( x, y );
  27   *
  28   *
  29   * DESCRIPTION:
  30   *
  31   * Computes x raised to the yth power.  Analytically,
  32   *
  33   *      x**y  =  exp( y log(x) ).
  34   *
  35   * Following Cody and Waite, this program uses a lookup table
  36   * of 2**-i/32 and pseudo extended precision arithmetic to
  37   * obtain several extra bits of accuracy in both the logarithm
  38   * and the exponential.
  39   *
  40   *
  41   * ACCURACY:
  42   *
  43   * The relative error of pow(x,y) can be estimated
  44   * by   y dl ln(2),   where dl is the absolute error of
  45   * the internally computed base 2 logarithm.  At the ends
  46   * of the approximation interval the logarithm equal 1/32
  47   * and its relative error is about 1 lsb = 1.1e-19.  Hence
  48   * the predicted relative error in the result is 2.3e-21 y .
  49   *
  50   *                      Relative error:
  51   * arithmetic   domain     # trials      peak         rms
  52   *
  53   *    IEEE     +-1000       40000      2.8e-18      3.7e-19
  54   * .001 < x < 1000, with log(x) uniformly distributed.
  55   * -1000 < y < 1000, y uniformly distributed.
  56   *
  57   *    IEEE     0,8700       60000      6.5e-18      1.0e-18
  58   * 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
  59   *
  60   *
  61   * ERROR MESSAGES:
  62   *
  63   *   message         condition      value returned
  64   * pow overflow     x**y > MAXNUM      INFINITY
  65   * pow underflow   x**y < 1/MAXNUM       0.0
  66   * pow domain      x<0 and y noninteger  0.0
  67   *
  68   */
  69  
  70  #include "libm.h"
  71  
  72  #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
  73  long double powl(long double x, long double y)
  74  {
  75  	return pow(x, y);
  76  }
  77  #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
  78  
  79  /* Table size */
  80  #define NXT 32
  81  
  82  /* log(1+x) =  x - .5x^2 + x^3 *  P(z)/Q(z)
  83   * on the domain  2^(-1/32) - 1  <=  x  <=  2^(1/32) - 1
  84   */
  85  static const long double P[] = {
  86   8.3319510773868690346226E-4L,
  87   4.9000050881978028599627E-1L,
  88   1.7500123722550302671919E0L,
  89   1.4000100839971580279335E0L,
  90  };
  91  static const long double Q[] = {
  92  /* 1.0000000000000000000000E0L,*/
  93   5.2500282295834889175431E0L,
  94   8.4000598057587009834666E0L,
  95   4.2000302519914740834728E0L,
  96  };
  97  /* A[i] = 2^(-i/32), rounded to IEEE long double precision.
  98   * If i is even, A[i] + B[i/2] gives additional accuracy.
  99   */
 100  static const long double A[33] = {
 101   1.0000000000000000000000E0L,
 102   9.7857206208770013448287E-1L,
 103   9.5760328069857364691013E-1L,
 104   9.3708381705514995065011E-1L,
 105   9.1700404320467123175367E-1L,
 106   8.9735453750155359320742E-1L,
 107   8.7812608018664974155474E-1L,
 108   8.5930964906123895780165E-1L,
 109   8.4089641525371454301892E-1L,
 110   8.2287773907698242225554E-1L,
 111   8.0524516597462715409607E-1L,
 112   7.8799042255394324325455E-1L,
 113   7.7110541270397041179298E-1L,
 114   7.5458221379671136985669E-1L,
 115   7.3841307296974965571198E-1L,
 116   7.2259040348852331001267E-1L,
 117   7.0710678118654752438189E-1L,
 118   6.9195494098191597746178E-1L,
 119   6.7712777346844636413344E-1L,
 120   6.6261832157987064729696E-1L,
 121   6.4841977732550483296079E-1L,
 122   6.3452547859586661129850E-1L,
 123   6.2092890603674202431705E-1L,
 124   6.0762367999023443907803E-1L,
 125   5.9460355750136053334378E-1L,
 126   5.8186242938878875689693E-1L,
 127   5.6939431737834582684856E-1L,
 128   5.5719337129794626814472E-1L,
 129   5.4525386633262882960438E-1L,
 130   5.3357020033841180906486E-1L,
 131   5.2213689121370692017331E-1L,
 132   5.1094857432705833910408E-1L,
 133   5.0000000000000000000000E-1L,
 134  };
 135  static const long double B[17] = {
 136   0.0000000000000000000000E0L,
 137   2.6176170809902549338711E-20L,
 138  -1.0126791927256478897086E-20L,
 139   1.3438228172316276937655E-21L,
 140   1.2207982955417546912101E-20L,
 141  -6.3084814358060867200133E-21L,
 142   1.3164426894366316434230E-20L,
 143  -1.8527916071632873716786E-20L,
 144   1.8950325588932570796551E-20L,
 145   1.5564775779538780478155E-20L,
 146   6.0859793637556860974380E-21L,
 147  -2.0208749253662532228949E-20L,
 148   1.4966292219224761844552E-20L,
 149   3.3540909728056476875639E-21L,
 150  -8.6987564101742849540743E-22L,
 151  -1.2327176863327626135542E-20L,
 152   0.0000000000000000000000E0L,
 153  };
 154  
 155  /* 2^x = 1 + x P(x),
 156   * on the interval -1/32 <= x <= 0
 157   */
 158  static const long double R[] = {
 159   1.5089970579127659901157E-5L,
 160   1.5402715328927013076125E-4L,
 161   1.3333556028915671091390E-3L,
 162   9.6181291046036762031786E-3L,
 163   5.5504108664798463044015E-2L,
 164   2.4022650695910062854352E-1L,
 165   6.9314718055994530931447E-1L,
 166  };
 167  
 168  #define MEXP (NXT*16384.0L)
 169  /* The following if denormal numbers are supported, else -MEXP: */
 170  #define MNEXP (-NXT*(16384.0L+64.0L))
 171  /* log2(e) - 1 */
 172  #define LOG2EA 0.44269504088896340735992L
 173  
 174  #define F W
 175  #define Fa Wa
 176  #define Fb Wb
 177  #define G W
 178  #define Ga Wa
 179  #define Gb u
 180  #define H W
 181  #define Ha Wb
 182  #define Hb Wb
 183  
 184  static const long double MAXLOGL = 1.1356523406294143949492E4L;
 185  static const long double MINLOGL = -1.13994985314888605586758E4L;
 186  static const long double LOGE2L = 6.9314718055994530941723E-1L;
 187  static const long double huge = 0x1p10000L;
 188  /* XXX Prevent gcc from erroneously constant folding this. */
 189  static const volatile long double twom10000 = 0x1p-10000L;
 190  
 191  static long double reducl(long double);
 192  static long double powil(long double, int);
 193  
 194  long double powl(long double x, long double y)
 195  {
 196  	/* double F, Fa, Fb, G, Ga, Gb, H, Ha, Hb */
 197  	int i, nflg, iyflg, yoddint;
 198  	long e;
 199  	volatile long double z=0;
 200  	long double w=0, W=0, Wa=0, Wb=0, ya=0, yb=0, u=0;
 201  
 202  	/* make sure no invalid exception is raised by nan comparision */
 203  	if (isnan(x)) {
 204  		if (!isnan(y) && y == 0.0)
 205  			return 1.0;
 206  		return x;
 207  	}
 208  	if (isnan(y)) {
 209  		if (x == 1.0)
 210  			return 1.0;
 211  		return y;
 212  	}
 213  	if (x == 1.0)
 214  		return 1.0; /* 1**y = 1, even if y is nan */
 215  	if (x == -1.0 && !isfinite(y))
 216  		return 1.0; /* -1**inf = 1 */
 217  	if (y == 0.0)
 218  		return 1.0; /* x**0 = 1, even if x is nan */
 219  	if (y == 1.0)
 220  		return x;
 221  	if (y >= LDBL_MAX) {
 222  		if (x > 1.0 || x < -1.0)
 223  			return INFINITY;
 224  		if (x != 0.0)
 225  			return 0.0;
 226  	}
 227  	if (y <= -LDBL_MAX) {
 228  		if (x > 1.0 || x < -1.0)
 229  			return 0.0;
 230  		if (x != 0.0 || y == -INFINITY)
 231  			return INFINITY;
 232  	}
 233  	if (x >= LDBL_MAX) {
 234  		if (y > 0.0)
 235  			return INFINITY;
 236  		return 0.0;
 237  	}
 238  
 239  	w = floorl(y);
 240  
 241  	/* Set iyflg to 1 if y is an integer. */
 242  	iyflg = 0;
 243  	if (w == y)
 244  		iyflg = 1;
 245  
 246  	/* Test for odd integer y. */
 247  	yoddint = 0;
 248  	if (iyflg) {
 249  		ya = fabsl(y);
 250  		ya = floorl(0.5 * ya);
 251  		yb = 0.5 * fabsl(w);
 252  		if( ya != yb )
 253  			yoddint = 1;
 254  	}
 255  
 256  	if (x <= -LDBL_MAX) {
 257  		if (y > 0.0) {
 258  			if (yoddint)
 259  				return -INFINITY;
 260  			return INFINITY;
 261  		}
 262  		if (y < 0.0) {
 263  			if (yoddint)
 264  				return -0.0;
 265  			return 0.0;
 266  		}
 267  	}
 268  	nflg = 0; /* (x<0)**(odd int) */
 269  	if (x <= 0.0) {
 270  		if (x == 0.0) {
 271  			if (y < 0.0) {
 272  				if (signbit(x) && yoddint)
 273  					/* (-0.0)**(-odd int) = -inf, divbyzero */
 274  					return -1.0/0.0;
 275  				/* (+-0.0)**(negative) = inf, divbyzero */
 276  				return 1.0/0.0;
 277  			}
 278  			if (signbit(x) && yoddint)
 279  				return -0.0;
 280  			return 0.0;
 281  		}
 282  		if (iyflg == 0)
 283  			return (x - x) / (x - x); /* (x<0)**(non-int) is NaN */
 284  		/* (x<0)**(integer) */
 285  		if (yoddint)
 286  			nflg = 1; /* negate result */
 287  		x = -x;
 288  	}
 289  	/* (+integer)**(integer)  */
 290  	if (iyflg && floorl(x) == x && fabsl(y) < 32768.0) {
 291  		w = powil(x, (int)y);
 292  		return nflg ? -w : w;
 293  	}
 294  
 295  	/* separate significand from exponent */
 296  	x = frexpl(x, &i);
 297  	e = i;
 298  
 299  	/* find significand in antilog table A[] */
 300  	i = 1;
 301  	if (x <= A[17])
 302  		i = 17;
 303  	if (x <= A[i+8])
 304  		i += 8;
 305  	if (x <= A[i+4])
 306  		i += 4;
 307  	if (x <= A[i+2])
 308  		i += 2;
 309  	if (x >= A[1])
 310  		i = -1;
 311  	i += 1;
 312  
 313  	/* Find (x - A[i])/A[i]
 314  	 * in order to compute log(x/A[i]):
 315  	 *
 316  	 * log(x) = log( a x/a ) = log(a) + log(x/a)
 317  	 *
 318  	 * log(x/a) = log(1+v),  v = x/a - 1 = (x-a)/a
 319  	 */
 320  	x -= A[i];
 321  	x -= B[i/2];
 322  	x /= A[i];
 323  
 324  	/* rational approximation for log(1+v):
 325  	 *
 326  	 * log(1+v)  =  v  -  v**2/2  +  v**3 P(v) / Q(v)
 327  	 */
 328  	z = x*x;
 329  	w = x * (z * __polevll(x, P, 3) / __p1evll(x, Q, 3));
 330  	w = w - 0.5*z;
 331  
 332  	/* Convert to base 2 logarithm:
 333  	 * multiply by log2(e) = 1 + LOG2EA
 334  	 */
 335  	z = LOG2EA * w;
 336  	z += w;
 337  	z += LOG2EA * x;
 338  	z += x;
 339  
 340  	/* Compute exponent term of the base 2 logarithm. */
 341  	w = -i;
 342  	w /= NXT;
 343  	w += e;
 344  	/* Now base 2 log of x is w + z. */
 345  
 346  	/* Multiply base 2 log by y, in extended precision. */
 347  
 348  	/* separate y into large part ya
 349  	 * and small part yb less than 1/NXT
 350  	 */
 351  	ya = reducl(y);
 352  	yb = y - ya;
 353  
 354  	/* (w+z)(ya+yb)
 355  	 * = w*ya + w*yb + z*y
 356  	 */
 357  	F = z * y  +  w * yb;
 358  	Fa = reducl(F);
 359  	Fb = F - Fa;
 360  
 361  	G = Fa + w * ya;
 362  	Ga = reducl(G);
 363  	Gb = G - Ga;
 364  
 365  	H = Fb + Gb;
 366  	Ha = reducl(H);
 367  	w = (Ga + Ha) * NXT;
 368  
 369  	/* Test the power of 2 for overflow */
 370  	if (w > MEXP)
 371  		return huge * huge;  /* overflow */
 372  	if (w < MNEXP)
 373  		return twom10000 * twom10000;  /* underflow */
 374  
 375  	e = w;
 376  	Hb = H - Ha;
 377  
 378  	if (Hb > 0.0) {
 379  		e += 1;
 380  		Hb -= 1.0/NXT;  /*0.0625L;*/
 381  	}
 382  
 383  	/* Now the product y * log2(x)  =  Hb + e/NXT.
 384  	 *
 385  	 * Compute base 2 exponential of Hb,
 386  	 * where -0.0625 <= Hb <= 0.
 387  	 */
 388  	z = Hb * __polevll(Hb, R, 6);  /*  z = 2**Hb - 1  */
 389  
 390  	/* Express e/NXT as an integer plus a negative number of (1/NXT)ths.
 391  	 * Find lookup table entry for the fractional power of 2.
 392  	 */
 393  	if (e < 0)
 394  		i = 0;
 395  	else
 396  		i = 1;
 397  	i = e/NXT + i;
 398  	e = NXT*i - e;
 399  	w = A[e];
 400  	z = w * z;  /*  2**-e * ( 1 + (2**Hb-1) )  */
 401  	z = z + w;
 402  	z = scalbnl(z, i);  /* multiply by integer power of 2 */
 403  
 404  	if (nflg)
 405  		z = -z;
 406  	return z;
 407  }
 408  
 409  
 410  /* Find a multiple of 1/NXT that is within 1/NXT of x. */
 411  static long double reducl(long double x)
 412  {
 413  	long double t;
 414  
 415  	t = x * NXT;
 416  	t = floorl(t);
 417  	t = t / NXT;
 418  	return t;
 419  }
 420  
 421  /*
 422   *      Positive real raised to integer power, long double precision
 423   *
 424   *
 425   * SYNOPSIS:
 426   *
 427   * long double x, y, powil();
 428   * int n;
 429   *
 430   * y = powil( x, n );
 431   *
 432   *
 433   * DESCRIPTION:
 434   *
 435   * Returns argument x>0 raised to the nth power.
 436   * The routine efficiently decomposes n as a sum of powers of
 437   * two. The desired power is a product of two-to-the-kth
 438   * powers of x.  Thus to compute the 32767 power of x requires
 439   * 28 multiplications instead of 32767 multiplications.
 440   *
 441   *
 442   * ACCURACY:
 443   *
 444   *                      Relative error:
 445   * arithmetic   x domain   n domain  # trials      peak         rms
 446   *    IEEE     .001,1000  -1022,1023  50000       4.3e-17     7.8e-18
 447   *    IEEE        1,2     -1022,1023  20000       3.9e-17     7.6e-18
 448   *    IEEE     .99,1.01     0,8700    10000       3.6e-16     7.2e-17
 449   *
 450   * Returns MAXNUM on overflow, zero on underflow.
 451   */
 452  
 453  static long double powil(long double x, int nn)
 454  {
 455  	long double ww, y;
 456  	long double s;
 457  	int n, e, sign, lx;
 458  
 459  	if (nn == 0)
 460  		return 1.0;
 461  
 462  	if (nn < 0) {
 463  		sign = -1;
 464  		n = -nn;
 465  	} else {
 466  		sign = 1;
 467  		n = nn;
 468  	}
 469  
 470  	/* Overflow detection */
 471  
 472  	/* Calculate approximate logarithm of answer */
 473  	s = x;
 474  	s = frexpl( s, &lx);
 475  	e = (lx - 1)*n;
 476  	if ((e == 0) || (e > 64) || (e < -64)) {
 477  		s = (s - 7.0710678118654752e-1L) / (s +  7.0710678118654752e-1L);
 478  		s = (2.9142135623730950L * s - 0.5 + lx) * nn * LOGE2L;
 479  	} else {
 480  		s = LOGE2L * e;
 481  	}
 482  
 483  	if (s > MAXLOGL)
 484  		return huge * huge;  /* overflow */
 485  
 486  	if (s < MINLOGL)
 487  		return twom10000 * twom10000;  /* underflow */
 488  	/* Handle tiny denormal answer, but with less accuracy
 489  	 * since roundoff error in 1.0/x will be amplified.
 490  	 * The precise demarcation should be the gradual underflow threshold.
 491  	 */
 492  	if (s < -MAXLOGL+2.0) {
 493  		x = 1.0/x;
 494  		sign = -sign;
 495  	}
 496  
 497  	/* First bit of the power */
 498  	if (n & 1)
 499  		y = x;
 500  	else
 501  		y = 1.0;
 502  
 503  	ww = x;
 504  	n >>= 1;
 505  	while (n) {
 506  		ww = ww * ww;   /* arg to the 2-to-the-kth power */
 507  		if (n & 1)     /* if that bit is set, then include in product */
 508  			y *= ww;
 509  		n >>= 1;
 510  	}
 511  
 512  	if (sign < 0)
 513  		y = 1.0/y;
 514  	return y;
 515  }
 516  #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
 517  // TODO: broken implementation to make things compile
 518  long double powl(long double x, long double y)
 519  {
 520  	return pow(x, y);
 521  }
 522  #endif
 523