remquol.c raw

   1  #include <math.h>
   2  
   3  long double remquol(long double x, long double y, int *quo)
   4  {
   5  	signed char *cx = (void *)&x, *cy = (void *)&y;
   6  	/* By ensuring that addresses of x and y cannot be discarded,
   7  	 * this empty asm guides GCC into representing extraction of
   8  	 * their sign bits as memory loads rather than making x and y
   9  	 * not-address-taken internally and using bitfield operations,
  10  	 * which in the end wouldn't work out, as extraction from FPU
  11  	 * registers needs to go through memory anyway. This way GCC
  12  	 * should manage to use incoming stack slots without spills. */
  13  	__asm__ ("" :: "X"(cx), "X"(cy));
  14  
  15  	long double t = x;
  16  	unsigned fpsr;
  17  	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(t), "=a"(fpsr) : "u"(y));
  18  	while (fpsr & 0x400);
  19  	/* C0, C1, C3 flags in x87 status word carry low bits of quotient:
  20  	 * 15 14 13 12 11 10  9  8
  21  	 *  . C3  .  .  . C2 C1 C0
  22  	 *  . b1  .  .  .  0 b0 b2 */
  23  	unsigned char i = fpsr >> 8;
  24  	i = i>>4 | i<<4;
  25  	/* i[5:2] is now {b0 b2 ? b1}. Retrieve {0 b2 b1 b0} via
  26  	 * in-register table lookup. */
  27  	unsigned qbits = 0x7575313164642020 >> (i & 60);
  28  	qbits &= 7;
  29  
  30  	*quo = (cx[9]^cy[9]) < 0 ? -qbits : qbits;
  31  	return t;
  32  }
  33