float.mx raw

   1  // Copyright 2014 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // This file implements multi-precision floating-point numbers.
   6  // Like in the GNU MPFR library (https://www.mpfr.org/), operands
   7  // can be of mixed precision. Unlike MPFR, the rounding mode is
   8  // not specified with each operation, but with each operand. The
   9  // rounding mode of the result operand determines the rounding
  10  // mode of an operation. This is a from-scratch implementation.
  11  
  12  package big
  13  
  14  import (
  15  	"fmt"
  16  	"math"
  17  	"math/bits"
  18  )
  19  
  20  const debugFloat = false // enable for debugging
  21  
  22  // A nonzero finite Float represents a multi-precision floating point number
  23  //
  24  //	sign × mantissa × 2**exponent
  25  //
  26  // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
  27  // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
  28  // All Floats are ordered, and the ordering of two Floats x and y
  29  // is defined by x.Cmp(y).
  30  //
  31  // Each Float value also has a precision, rounding mode, and accuracy.
  32  // The precision is the maximum number of mantissa bits available to
  33  // represent the value. The rounding mode specifies how a result should
  34  // be rounded to fit into the mantissa bits, and accuracy describes the
  35  // rounding error with respect to the exact result.
  36  //
  37  // Unless specified otherwise, all operations (including setters) that
  38  // specify a *Float variable for the result (usually via the receiver
  39  // with the exception of [Float.MantExp]), round the numeric result according
  40  // to the precision and rounding mode of the result variable.
  41  //
  42  // If the provided result precision is 0 (see below), it is set to the
  43  // precision of the argument with the largest precision value before any
  44  // rounding takes place, and the rounding mode remains unchanged. Thus,
  45  // uninitialized Floats provided as result arguments will have their
  46  // precision set to a reasonable value determined by the operands, and
  47  // their mode is the zero value for RoundingMode (ToNearestEven).
  48  //
  49  // By setting the desired precision to 24 or 53 and using matching rounding
  50  // mode (typically [ToNearestEven]), Float operations produce the same results
  51  // as the corresponding float32 or float64 IEEE 754 arithmetic for operands
  52  // that correspond to normal (i.e., not denormal) float32 or float64 numbers.
  53  // Exponent underflow and overflow lead to a 0 or an Infinity for different
  54  // values than IEEE 754 because Float exponents have a much larger range.
  55  //
  56  // The zero (uninitialized) value for a Float is ready to use and represents
  57  // the number +0.0 exactly, with precision 0 and rounding mode [ToNearestEven].
  58  //
  59  // Operations always take pointer arguments (*Float) rather
  60  // than Float values, and each unique Float value requires
  61  // its own unique *Float pointer. To "copy" a Float value,
  62  // an existing (or newly allocated) Float must be set to
  63  // a new value using the [Float.Set] method; shallow copies
  64  // of Floats are not supported and may lead to errors.
  65  type Float struct {
  66  	prec uint32
  67  	mode RoundingMode
  68  	acc  Accuracy
  69  	form form
  70  	neg  bool
  71  	mant nat
  72  	exp  int32
  73  }
  74  
  75  // An ErrNaN panic is raised by a [Float] operation that would lead to
  76  // a NaN under IEEE 754 rules. An ErrNaN implements the error interface.
  77  type ErrNaN struct {
  78  	msg []byte
  79  }
  80  
  81  func (err ErrNaN) Error() string {
  82  	return err.msg
  83  }
  84  
  85  // NewFloat allocates and returns a new [Float] set to x,
  86  // with precision 53 and rounding mode [ToNearestEven].
  87  // NewFloat panics with [ErrNaN] if x is a NaN.
  88  func NewFloat(x float64) *Float {
  89  	if math.IsNaN(x) {
  90  		panic(ErrNaN{"NewFloat(NaN)"})
  91  	}
  92  	return (&Float{}).SetFloat64(x)
  93  }
  94  
  95  // Exponent and precision limits.
  96  const (
  97  	MaxExp  = math.MaxInt32  // largest supported exponent
  98  	MinExp  = math.MinInt32  // smallest supported exponent
  99  	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
 100  )
 101  
 102  // Internal representation: The mantissa bits x.mant of a nonzero finite
 103  // Float x are stored in a nat slice long enough to hold up to x.prec bits;
 104  // the slice may (but doesn't have to) be shorter if the mantissa contains
 105  // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
 106  // the msb is shifted all the way "to the left"). Thus, if the mantissa has
 107  // trailing 0 bits or x.prec is not a multiple of the Word size _W,
 108  // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
 109  // to the value 0.5; the exponent x.exp shifts the binary point as needed.
 110  //
 111  // A zero or non-finite Float x ignores x.mant and x.exp.
 112  //
 113  // x                 form      neg      mant         exp
 114  // ----------------------------------------------------------
 115  // ±0                zero      sign     -            -
 116  // 0 < |x| < +Inf    finite    sign     mantissa     exponent
 117  // ±Inf              inf       sign     -            -
 118  
 119  // A form value describes the internal representation.
 120  type form byte
 121  
 122  // The form value order is relevant - do not change!
 123  const (
 124  	zero form = iota
 125  	finite
 126  	inf
 127  )
 128  
 129  // RoundingMode determines how a [Float] value is rounded to the
 130  // desired precision. Rounding may change the [Float] value; the
 131  // rounding error is described by the [Float]'s [Accuracy].
 132  type RoundingMode byte
 133  
 134  // These constants define supported rounding modes.
 135  const (
 136  	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
 137  	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
 138  	ToZero                            // == IEEE 754-2008 roundTowardZero
 139  	AwayFromZero                      // no IEEE 754-2008 equivalent
 140  	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
 141  	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
 142  )
 143  
 144  //go:generate stringer -type=RoundingMode
 145  
 146  // Accuracy describes the rounding error produced by the most recent
 147  // operation that generated a [Float] value, relative to the exact value.
 148  type Accuracy int8
 149  
 150  // Constants describing the [Accuracy] of a [Float].
 151  const (
 152  	Below Accuracy = -1
 153  	Exact Accuracy = 0
 154  	Above Accuracy = +1
 155  )
 156  
 157  //go:generate stringer -type=Accuracy
 158  
 159  // SetPrec sets z's precision to prec and returns the (possibly) rounded
 160  // value of z. Rounding occurs according to z's rounding mode if the mantissa
 161  // cannot be represented in prec bits without loss of precision.
 162  // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
 163  // If prec > [MaxPrec], it is set to [MaxPrec].
 164  func (z *Float) SetPrec(prec uint) *Float {
 165  	z.acc = Exact // optimistically assume no rounding is needed
 166  
 167  	// special case
 168  	if prec == 0 {
 169  		z.prec = 0
 170  		if z.form == finite {
 171  			// truncate z to 0
 172  			z.acc = makeAcc(z.neg)
 173  			z.form = zero
 174  		}
 175  		return z
 176  	}
 177  
 178  	// general case
 179  	if prec > MaxPrec {
 180  		prec = MaxPrec
 181  	}
 182  	old := z.prec
 183  	z.prec = uint32(prec)
 184  	if z.prec < old {
 185  		z.round(0)
 186  	}
 187  	return z
 188  }
 189  
 190  func makeAcc(above bool) Accuracy {
 191  	if above {
 192  		return Above
 193  	}
 194  	return Below
 195  }
 196  
 197  // SetMode sets z's rounding mode to mode and returns an exact z.
 198  // z remains unchanged otherwise.
 199  // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to [Exact].
 200  func (z *Float) SetMode(mode RoundingMode) *Float {
 201  	z.mode = mode
 202  	z.acc = Exact
 203  	return z
 204  }
 205  
 206  // Prec returns the mantissa precision of x in bits.
 207  // The result may be 0 for |x| == 0 and |x| == Inf.
 208  func (x *Float) Prec() uint {
 209  	return uint(x.prec)
 210  }
 211  
 212  // MinPrec returns the minimum precision required to represent x exactly
 213  // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
 214  // The result is 0 for |x| == 0 and |x| == Inf.
 215  func (x *Float) MinPrec() uint {
 216  	if x.form != finite {
 217  		return 0
 218  	}
 219  	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
 220  }
 221  
 222  // Mode returns the rounding mode of x.
 223  func (x *Float) Mode() RoundingMode {
 224  	return x.mode
 225  }
 226  
 227  // Acc returns the accuracy of x produced by the most recent
 228  // operation, unless explicitly documented otherwise by that
 229  // operation.
 230  func (x *Float) Acc() Accuracy {
 231  	return x.acc
 232  }
 233  
 234  // Sign returns:
 235  //   - -1 if x < 0;
 236  //   - 0 if x is ±0;
 237  //   - +1 if x > 0.
 238  func (x *Float) Sign() int {
 239  	if debugFloat {
 240  		x.validate()
 241  	}
 242  	if x.form == zero {
 243  		return 0
 244  	}
 245  	if x.neg {
 246  		return -1
 247  	}
 248  	return 1
 249  }
 250  
 251  // MantExp breaks x into its mantissa and exponent components
 252  // and returns the exponent. If a non-nil mant argument is
 253  // provided its value is set to the mantissa of x, with the
 254  // same precision and rounding mode as x. The components
 255  // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
 256  // Calling MantExp with a nil argument is an efficient way to
 257  // get the exponent of the receiver.
 258  //
 259  // Special cases are:
 260  //
 261  //	(  ±0).MantExp(mant) = 0, with mant set to   ±0
 262  //	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
 263  //
 264  // x and mant may be the same in which case x is set to its
 265  // mantissa value.
 266  func (x *Float) MantExp(mant *Float) (exp int) {
 267  	if debugFloat {
 268  		x.validate()
 269  	}
 270  	if x.form == finite {
 271  		exp = int(x.exp)
 272  	}
 273  	if mant != nil {
 274  		mant.Copy(x)
 275  		if mant.form == finite {
 276  			mant.exp = 0
 277  		}
 278  	}
 279  	return
 280  }
 281  
 282  func (z *Float) setExpAndRound(exp int64, sbit uint) {
 283  	if exp < MinExp {
 284  		// underflow
 285  		z.acc = makeAcc(z.neg)
 286  		z.form = zero
 287  		return
 288  	}
 289  
 290  	if exp > MaxExp {
 291  		// overflow
 292  		z.acc = makeAcc(!z.neg)
 293  		z.form = inf
 294  		return
 295  	}
 296  
 297  	z.form = finite
 298  	z.exp = int32(exp)
 299  	z.round(sbit)
 300  }
 301  
 302  // SetMantExp sets z to mant × 2**exp and returns z.
 303  // The result z has the same precision and rounding mode
 304  // as mant. SetMantExp is an inverse of [Float.MantExp] but does
 305  // not require 0.5 <= |mant| < 1.0. Specifically, for a
 306  // given x of type *[Float], SetMantExp relates to [Float.MantExp]
 307  // as follows:
 308  //
 309  //	mant := new(Float)
 310  //	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
 311  //
 312  // Special cases are:
 313  //
 314  //	z.SetMantExp(  ±0, exp) =   ±0
 315  //	z.SetMantExp(±Inf, exp) = ±Inf
 316  //
 317  // z and mant may be the same in which case z's exponent
 318  // is set to exp.
 319  func (z *Float) SetMantExp(mant *Float, exp int) *Float {
 320  	if debugFloat {
 321  		z.validate()
 322  		mant.validate()
 323  	}
 324  	z.Copy(mant)
 325  
 326  	if z.form == finite {
 327  		// 0 < |mant| < +Inf
 328  		z.setExpAndRound(int64(z.exp)+int64(exp), 0)
 329  	}
 330  	return z
 331  }
 332  
 333  // Signbit reports whether x is negative or negative zero.
 334  func (x *Float) Signbit() bool {
 335  	return x.neg
 336  }
 337  
 338  // IsInf reports whether x is +Inf or -Inf.
 339  func (x *Float) IsInf() bool {
 340  	return x.form == inf
 341  }
 342  
 343  // IsInt reports whether x is an integer.
 344  // ±Inf values are not integers.
 345  func (x *Float) IsInt() bool {
 346  	if debugFloat {
 347  		x.validate()
 348  	}
 349  	// special cases
 350  	if x.form != finite {
 351  		return x.form == zero
 352  	}
 353  	// x.form == finite
 354  	if x.exp <= 0 {
 355  		return false
 356  	}
 357  	// x.exp > 0
 358  	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
 359  }
 360  
 361  // debugging support
 362  func (x *Float) validate() {
 363  	if !debugFloat {
 364  		// avoid performance bugs
 365  		panic("validate called but debugFloat is not set")
 366  	}
 367  	if msg := x.validate0(); msg != "" {
 368  		panic(msg)
 369  	}
 370  }
 371  
 372  func (x *Float) validate0() []byte {
 373  	if x.form != finite {
 374  		return ""
 375  	}
 376  	m := len(x.mant)
 377  	if m == 0 {
 378  		return "nonzero finite number with empty mantissa"
 379  	}
 380  	const msb = 1 << (_W - 1)
 381  	if x.mant[m-1]&msb == 0 {
 382  		return fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))
 383  	}
 384  	if x.prec == 0 {
 385  		return "zero precision finite number"
 386  	}
 387  	return ""
 388  }
 389  
 390  // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
 391  // sbit must be 0 or 1 and summarizes any "sticky bit" information one might
 392  // have before calling round. z's mantissa must be normalized (with the msb set)
 393  // or empty.
 394  //
 395  // CAUTION: The rounding modes [ToNegativeInf], [ToPositiveInf] are affected by the
 396  // sign of z. For correct rounding, the sign of z must be set correctly before
 397  // calling round.
 398  func (z *Float) round(sbit uint) {
 399  	if debugFloat {
 400  		z.validate()
 401  	}
 402  
 403  	z.acc = Exact
 404  	if z.form != finite {
 405  		// ±0 or ±Inf => nothing left to do
 406  		return
 407  	}
 408  	// z.form == finite && len(z.mant) > 0
 409  	// m > 0 implies z.prec > 0 (checked by validate)
 410  
 411  	m := uint32(len(z.mant)) // present mantissa length in words
 412  	bits := m * _W           // present mantissa bits; bits > 0
 413  	if bits <= z.prec {
 414  		// mantissa fits => nothing to do
 415  		return
 416  	}
 417  	// bits > z.prec
 418  
 419  	// Rounding is based on two bits: the rounding bit (rbit) and the
 420  	// sticky bit (sbit). The rbit is the bit immediately before the
 421  	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
 422  	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
 423  	//
 424  	//   rbit  sbit  => "fractional part"
 425  	//
 426  	//   0     0        == 0
 427  	//   0     1        >  0  , < 0.5
 428  	//   1     0        == 0.5
 429  	//   1     1        >  0.5, < 1.0
 430  
 431  	// bits > z.prec: mantissa too large => round
 432  	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
 433  	rbit := z.mant.bit(r) & 1    // rounding bit; be safe and ensure it's a single bit
 434  	// The sticky bit is only needed for rounding ToNearestEven
 435  	// or when the rounding bit is zero. Avoid computation otherwise.
 436  	if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) {
 437  		sbit = z.mant.sticky(r)
 438  	}
 439  	sbit &= 1 // be safe and ensure it's a single bit
 440  
 441  	// cut off extra words
 442  	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
 443  	if m > n {
 444  		copy(z.mant, z.mant[m-n:]) // move n last words to front
 445  		z.mant = z.mant[:n]
 446  	}
 447  
 448  	// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
 449  	ntz := n*_W - z.prec // 0 <= ntz < _W
 450  	lsb := Word(1) << ntz
 451  
 452  	// round if result is inexact
 453  	if rbit|sbit != 0 {
 454  		// Make rounding decision: The result mantissa is truncated ("rounded down")
 455  		// by default. Decide if we need to increment, or "round up", the (unsigned)
 456  		// mantissa.
 457  		inc := false
 458  		switch z.mode {
 459  		case ToNegativeInf:
 460  			inc = z.neg
 461  		case ToZero:
 462  			// nothing to do
 463  		case ToNearestEven:
 464  			inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0)
 465  		case ToNearestAway:
 466  			inc = rbit != 0
 467  		case AwayFromZero:
 468  			inc = true
 469  		case ToPositiveInf:
 470  			inc = !z.neg
 471  		default:
 472  			panic("unreachable")
 473  		}
 474  
 475  		// A positive result (!z.neg) is Above the exact result if we increment,
 476  		// and it's Below if we truncate (Exact results require no rounding).
 477  		// For a negative result (z.neg) it is exactly the opposite.
 478  		z.acc = makeAcc(inc != z.neg)
 479  
 480  		if inc {
 481  			// add 1 to mantissa
 482  			if addVW(z.mant, z.mant, lsb) != 0 {
 483  				// mantissa overflow => adjust exponent
 484  				if z.exp >= MaxExp {
 485  					// exponent overflow
 486  					z.form = inf
 487  					return
 488  				}
 489  				z.exp++
 490  				// adjust mantissa: divide by 2 to compensate for exponent adjustment
 491  				rshVU(z.mant, z.mant, 1)
 492  				// set msb == carry == 1 from the mantissa overflow above
 493  				const msb = 1 << (_W - 1)
 494  				z.mant[n-1] |= msb
 495  			}
 496  		}
 497  	}
 498  
 499  	// zero out trailing bits in least-significant word
 500  	z.mant[0] &^= lsb - 1
 501  
 502  	if debugFloat {
 503  		z.validate()
 504  	}
 505  }
 506  
 507  func (z *Float) setBits64(neg bool, x uint64) *Float {
 508  	if z.prec == 0 {
 509  		z.prec = 64
 510  	}
 511  	z.acc = Exact
 512  	z.neg = neg
 513  	if x == 0 {
 514  		z.form = zero
 515  		return z
 516  	}
 517  	// x != 0
 518  	z.form = finite
 519  	s := bits.LeadingZeros64(x)
 520  	z.mant = z.mant.setUint64(x << uint(s))
 521  	z.exp = int32(64 - s) // always fits
 522  	if z.prec < 64 {
 523  		z.round(0)
 524  	}
 525  	return z
 526  }
 527  
 528  // SetUint64 sets z to the (possibly rounded) value of x and returns z.
 529  // If z's precision is 0, it is changed to 64 (and rounding will have
 530  // no effect).
 531  func (z *Float) SetUint64(x uint64) *Float {
 532  	return z.setBits64(false, x)
 533  }
 534  
 535  // SetInt64 sets z to the (possibly rounded) value of x and returns z.
 536  // If z's precision is 0, it is changed to 64 (and rounding will have
 537  // no effect).
 538  func (z *Float) SetInt64(x int64) *Float {
 539  	u := x
 540  	if u < 0 {
 541  		u = -u
 542  	}
 543  	// We cannot simply call z.SetUint64(uint64(u)) and change
 544  	// the sign afterwards because the sign affects rounding.
 545  	return z.setBits64(x < 0, uint64(u))
 546  }
 547  
 548  // SetFloat64 sets z to the (possibly rounded) value of x and returns z.
 549  // If z's precision is 0, it is changed to 53 (and rounding will have
 550  // no effect). SetFloat64 panics with [ErrNaN] if x is a NaN.
 551  func (z *Float) SetFloat64(x float64) *Float {
 552  	if z.prec == 0 {
 553  		z.prec = 53
 554  	}
 555  	if math.IsNaN(x) {
 556  		panic(ErrNaN{"Float.SetFloat64(NaN)"})
 557  	}
 558  	z.acc = Exact
 559  	z.neg = math.Signbit(x) // handle -0, -Inf correctly
 560  	if x == 0 {
 561  		z.form = zero
 562  		return z
 563  	}
 564  	if math.IsInf(x, 0) {
 565  		z.form = inf
 566  		return z
 567  	}
 568  	// normalized x != 0
 569  	z.form = finite
 570  	fmant, exp := math.Frexp(x) // get normalized mantissa
 571  	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
 572  	z.exp = int32(exp) // always fits
 573  	if z.prec < 53 {
 574  		z.round(0)
 575  	}
 576  	return z
 577  }
 578  
 579  // fnorm normalizes mantissa m by shifting it to the left
 580  // such that the msb of the most-significant word (msw) is 1.
 581  // It returns the shift amount. It assumes that len(m) != 0.
 582  func fnorm(m nat) int64 {
 583  	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
 584  		panic("msw of mantissa is 0")
 585  	}
 586  	s := nlz(m[len(m)-1])
 587  	if s > 0 {
 588  		c := lshVU(m, m, s)
 589  		if debugFloat && c != 0 {
 590  			panic("nlz or lshVU incorrect")
 591  		}
 592  	}
 593  	return int64(s)
 594  }
 595  
 596  // SetInt sets z to the (possibly rounded) value of x and returns z.
 597  // If z's precision is 0, it is changed to the larger of x.BitLen()
 598  // or 64 (and rounding will have no effect).
 599  func (z *Float) SetInt(x *Int) *Float {
 600  	// TODO(gri) can be more efficient if z.prec > 0
 601  	// but small compared to the size of x, or if there
 602  	// are many trailing 0's.
 603  	bits := uint32(x.BitLen())
 604  	if z.prec == 0 {
 605  		z.prec = max(bits, 64)
 606  	}
 607  	z.acc = Exact
 608  	z.neg = x.neg
 609  	if len(x.abs) == 0 {
 610  		z.form = zero
 611  		return z
 612  	}
 613  	// x != 0
 614  	z.mant = z.mant.set(x.abs)
 615  	fnorm(z.mant)
 616  	z.setExpAndRound(int64(bits), 0)
 617  	return z
 618  }
 619  
 620  // SetRat sets z to the (possibly rounded) value of x and returns z.
 621  // If z's precision is 0, it is changed to the largest of a.BitLen(),
 622  // b.BitLen(), or 64; with x = a/b.
 623  func (z *Float) SetRat(x *Rat) *Float {
 624  	if x.IsInt() {
 625  		return z.SetInt(x.Num())
 626  	}
 627  	var a, b Float
 628  	a.SetInt(x.Num())
 629  	b.SetInt(x.Denom())
 630  	if z.prec == 0 {
 631  		z.prec = max(a.prec, b.prec)
 632  	}
 633  	return z.Quo(&a, &b)
 634  }
 635  
 636  // SetInf sets z to the infinite Float -Inf if signbit is
 637  // set, or +Inf if signbit is not set, and returns z. The
 638  // precision of z is unchanged and the result is always
 639  // [Exact].
 640  func (z *Float) SetInf(signbit bool) *Float {
 641  	z.acc = Exact
 642  	z.form = inf
 643  	z.neg = signbit
 644  	return z
 645  }
 646  
 647  // Set sets z to the (possibly rounded) value of x and returns z.
 648  // If z's precision is 0, it is changed to the precision of x
 649  // before setting z (and rounding will have no effect).
 650  // Rounding is performed according to z's precision and rounding
 651  // mode; and z's accuracy reports the result error relative to the
 652  // exact (not rounded) result.
 653  func (z *Float) Set(x *Float) *Float {
 654  	if debugFloat {
 655  		x.validate()
 656  	}
 657  	z.acc = Exact
 658  	if z != x {
 659  		z.form = x.form
 660  		z.neg = x.neg
 661  		if x.form == finite {
 662  			z.exp = x.exp
 663  			z.mant = z.mant.set(x.mant)
 664  		}
 665  		if z.prec == 0 {
 666  			z.prec = x.prec
 667  		} else if z.prec < x.prec {
 668  			z.round(0)
 669  		}
 670  	}
 671  	return z
 672  }
 673  
 674  // Copy sets z to x, with the same precision, rounding mode, and accuracy as x.
 675  // Copy returns z. If x and z are identical, Copy is a no-op.
 676  func (z *Float) Copy(x *Float) *Float {
 677  	if debugFloat {
 678  		x.validate()
 679  	}
 680  	if z != x {
 681  		z.prec = x.prec
 682  		z.mode = x.mode
 683  		z.acc = x.acc
 684  		z.form = x.form
 685  		z.neg = x.neg
 686  		if z.form == finite {
 687  			z.mant = z.mant.set(x.mant)
 688  			z.exp = x.exp
 689  		}
 690  	}
 691  	return z
 692  }
 693  
 694  // msb32 returns the 32 most significant bits of x.
 695  func msb32(x nat) uint32 {
 696  	i := len(x) - 1
 697  	if i < 0 {
 698  		return 0
 699  	}
 700  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
 701  		panic("x not normalized")
 702  	}
 703  	switch _W {
 704  	case 32:
 705  		return uint32(x[i])
 706  	case 64:
 707  		return uint32(x[i] >> 32)
 708  	}
 709  	panic("unreachable")
 710  }
 711  
 712  // msb64 returns the 64 most significant bits of x.
 713  func msb64(x nat) uint64 {
 714  	i := len(x) - 1
 715  	if i < 0 {
 716  		return 0
 717  	}
 718  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
 719  		panic("x not normalized")
 720  	}
 721  	switch _W {
 722  	case 32:
 723  		v := uint64(x[i]) << 32
 724  		if i > 0 {
 725  			v |= uint64(x[i-1])
 726  		}
 727  		return v
 728  	case 64:
 729  		return uint64(x[i])
 730  	}
 731  	panic("unreachable")
 732  }
 733  
 734  // Uint64 returns the unsigned integer resulting from truncating x
 735  // towards zero. If 0 <= x <= [math.MaxUint64], the result is [Exact]
 736  // if x is an integer and [Below] otherwise.
 737  // The result is (0, [Above]) for x < 0, and ([math.MaxUint64], [Below])
 738  // for x > [math.MaxUint64].
 739  func (x *Float) Uint64() (uint64, Accuracy) {
 740  	if debugFloat {
 741  		x.validate()
 742  	}
 743  
 744  	switch x.form {
 745  	case finite:
 746  		if x.neg {
 747  			return 0, Above
 748  		}
 749  		// 0 < x < +Inf
 750  		if x.exp <= 0 {
 751  			// 0 < x < 1
 752  			return 0, Below
 753  		}
 754  		// 1 <= x < Inf
 755  		if x.exp <= 64 {
 756  			// u = trunc(x) fits into a uint64
 757  			u := msb64(x.mant) >> (64 - uint32(x.exp))
 758  			if x.MinPrec() <= 64 {
 759  				return u, Exact
 760  			}
 761  			return u, Below // x truncated
 762  		}
 763  		// x too large
 764  		return math.MaxUint64, Below
 765  
 766  	case zero:
 767  		return 0, Exact
 768  
 769  	case inf:
 770  		if x.neg {
 771  			return 0, Above
 772  		}
 773  		return math.MaxUint64, Below
 774  	}
 775  
 776  	panic("unreachable")
 777  }
 778  
 779  // Int64 returns the integer resulting from truncating x towards zero.
 780  // If [math.MinInt64] <= x <= [math.MaxInt64], the result is [Exact] if x is
 781  // an integer, and [Above] (x < 0) or [Below] (x > 0) otherwise.
 782  // The result is ([math.MinInt64], [Above]) for x < [math.MinInt64],
 783  // and ([math.MaxInt64], [Below]) for x > [math.MaxInt64].
 784  func (x *Float) Int64() (int64, Accuracy) {
 785  	if debugFloat {
 786  		x.validate()
 787  	}
 788  
 789  	switch x.form {
 790  	case finite:
 791  		// 0 < |x| < +Inf
 792  		acc := makeAcc(x.neg)
 793  		if x.exp <= 0 {
 794  			// 0 < |x| < 1
 795  			return 0, acc
 796  		}
 797  		// x.exp > 0
 798  
 799  		// 1 <= |x| < +Inf
 800  		if x.exp <= 63 {
 801  			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
 802  			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
 803  			if x.neg {
 804  				i = -i
 805  			}
 806  			if x.MinPrec() <= uint(x.exp) {
 807  				return i, Exact
 808  			}
 809  			return i, acc // x truncated
 810  		}
 811  		if x.neg {
 812  			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
 813  			if x.exp == 64 && x.MinPrec() == 1 {
 814  				acc = Exact
 815  			}
 816  			return math.MinInt64, acc
 817  		}
 818  		// x too large
 819  		return math.MaxInt64, Below
 820  
 821  	case zero:
 822  		return 0, Exact
 823  
 824  	case inf:
 825  		if x.neg {
 826  			return math.MinInt64, Above
 827  		}
 828  		return math.MaxInt64, Below
 829  	}
 830  
 831  	panic("unreachable")
 832  }
 833  
 834  // Float32 returns the float32 value nearest to x. If x is too small to be
 835  // represented by a float32 (|x| < [math.SmallestNonzeroFloat32]), the result
 836  // is (0, [Below]) or (-0, [Above]), respectively, depending on the sign of x.
 837  // If x is too large to be represented by a float32 (|x| > [math.MaxFloat32]),
 838  // the result is (+Inf, [Above]) or (-Inf, [Below]), depending on the sign of x.
 839  func (x *Float) Float32() (float32, Accuracy) {
 840  	if debugFloat {
 841  		x.validate()
 842  	}
 843  
 844  	switch x.form {
 845  	case finite:
 846  		// 0 < |x| < +Inf
 847  
 848  		const (
 849  			fbits = 32                //        float size
 850  			mbits = 23                //        mantissa size (excluding implicit msb)
 851  			ebits = fbits - mbits - 1 //     8  exponent size
 852  			bias  = 1<<(ebits-1) - 1  //   127  exponent bias
 853  			dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
 854  			emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
 855  			emax  = bias              //   127  largest unbiased exponent (normal)
 856  		)
 857  
 858  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
 859  		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
 860  
 861  		// Compute precision p for float32 mantissa.
 862  		// If the exponent is too small, we have a denormal number before
 863  		// rounding and fewer than p mantissa bits of precision available
 864  		// (the exponent remains fixed but the mantissa gets shifted right).
 865  		p := mbits + 1 // precision of normal float
 866  		if e < emin {
 867  			// recompute precision
 868  			p = mbits + 1 - emin + int(e)
 869  			// If p == 0, the mantissa of x is shifted so much to the right
 870  			// that its msb falls immediately to the right of the float32
 871  			// mantissa space. In other words, if the smallest denormal is
 872  			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
 873  			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
 874  			// If m == 0.5, it is rounded down to even, i.e., 0.0.
 875  			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
 876  			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
 877  				// underflow to ±0
 878  				if x.neg {
 879  					var z float32
 880  					return -z, Above
 881  				}
 882  				return 0.0, Below
 883  			}
 884  			// otherwise, round up
 885  			// We handle p == 0 explicitly because it's easy and because
 886  			// Float.round doesn't support rounding to 0 bits of precision.
 887  			if p == 0 {
 888  				if x.neg {
 889  					return -math.SmallestNonzeroFloat32, Below
 890  				}
 891  				return math.SmallestNonzeroFloat32, Above
 892  			}
 893  		}
 894  		// p > 0
 895  
 896  		// round
 897  		var r Float
 898  		r.prec = uint32(p)
 899  		r.Set(x)
 900  		e = r.exp - 1
 901  
 902  		// Rounding may have caused r to overflow to ±Inf
 903  		// (rounding never causes underflows to 0).
 904  		// If the exponent is too large, also overflow to ±Inf.
 905  		if r.form == inf || e > emax {
 906  			// overflow
 907  			if x.neg {
 908  				return float32(math.Inf(-1)), Below
 909  			}
 910  			return float32(math.Inf(+1)), Above
 911  		}
 912  		// e <= emax
 913  
 914  		// Determine sign, biased exponent, and mantissa.
 915  		var sign, bexp, mant uint32
 916  		if x.neg {
 917  			sign = 1 << (fbits - 1)
 918  		}
 919  
 920  		// Rounding may have caused a denormal number to
 921  		// become normal. Check again.
 922  		if e < emin {
 923  			// denormal number: recompute precision
 924  			// Since rounding may have at best increased precision
 925  			// and we have eliminated p <= 0 early, we know p > 0.
 926  			// bexp == 0 for denormals
 927  			p = mbits + 1 - emin + int(e)
 928  			mant = msb32(r.mant) >> uint(fbits-p)
 929  		} else {
 930  			// normal number: emin <= e <= emax
 931  			bexp = uint32(e+bias) << mbits
 932  			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
 933  		}
 934  
 935  		return math.Float32frombits(sign | bexp | mant), r.acc
 936  
 937  	case zero:
 938  		if x.neg {
 939  			var z float32
 940  			return -z, Exact
 941  		}
 942  		return 0.0, Exact
 943  
 944  	case inf:
 945  		if x.neg {
 946  			return float32(math.Inf(-1)), Exact
 947  		}
 948  		return float32(math.Inf(+1)), Exact
 949  	}
 950  
 951  	panic("unreachable")
 952  }
 953  
 954  // Float64 returns the float64 value nearest to x. If x is too small to be
 955  // represented by a float64 (|x| < [math.SmallestNonzeroFloat64]), the result
 956  // is (0, [Below]) or (-0, [Above]), respectively, depending on the sign of x.
 957  // If x is too large to be represented by a float64 (|x| > [math.MaxFloat64]),
 958  // the result is (+Inf, [Above]) or (-Inf, [Below]), depending on the sign of x.
 959  func (x *Float) Float64() (float64, Accuracy) {
 960  	if debugFloat {
 961  		x.validate()
 962  	}
 963  
 964  	switch x.form {
 965  	case finite:
 966  		// 0 < |x| < +Inf
 967  
 968  		const (
 969  			fbits = 64                //        float size
 970  			mbits = 52                //        mantissa size (excluding implicit msb)
 971  			ebits = fbits - mbits - 1 //    11  exponent size
 972  			bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
 973  			dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
 974  			emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
 975  			emax  = bias              //  1023  largest unbiased exponent (normal)
 976  		)
 977  
 978  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
 979  		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
 980  
 981  		// Compute precision p for float64 mantissa.
 982  		// If the exponent is too small, we have a denormal number before
 983  		// rounding and fewer than p mantissa bits of precision available
 984  		// (the exponent remains fixed but the mantissa gets shifted right).
 985  		p := mbits + 1 // precision of normal float
 986  		if e < emin {
 987  			// recompute precision
 988  			p = mbits + 1 - emin + int(e)
 989  			// If p == 0, the mantissa of x is shifted so much to the right
 990  			// that its msb falls immediately to the right of the float64
 991  			// mantissa space. In other words, if the smallest denormal is
 992  			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
 993  			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
 994  			// If m == 0.5, it is rounded down to even, i.e., 0.0.
 995  			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
 996  			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
 997  				// underflow to ±0
 998  				if x.neg {
 999  					var z float64
1000  					return -z, Above
1001  				}
1002  				return 0.0, Below
1003  			}
1004  			// otherwise, round up
1005  			// We handle p == 0 explicitly because it's easy and because
1006  			// Float.round doesn't support rounding to 0 bits of precision.
1007  			if p == 0 {
1008  				if x.neg {
1009  					return -math.SmallestNonzeroFloat64, Below
1010  				}
1011  				return math.SmallestNonzeroFloat64, Above
1012  			}
1013  		}
1014  		// p > 0
1015  
1016  		// round
1017  		var r Float
1018  		r.prec = uint32(p)
1019  		r.Set(x)
1020  		e = r.exp - 1
1021  
1022  		// Rounding may have caused r to overflow to ±Inf
1023  		// (rounding never causes underflows to 0).
1024  		// If the exponent is too large, also overflow to ±Inf.
1025  		if r.form == inf || e > emax {
1026  			// overflow
1027  			if x.neg {
1028  				return math.Inf(-1), Below
1029  			}
1030  			return math.Inf(+1), Above
1031  		}
1032  		// e <= emax
1033  
1034  		// Determine sign, biased exponent, and mantissa.
1035  		var sign, bexp, mant uint64
1036  		if x.neg {
1037  			sign = 1 << (fbits - 1)
1038  		}
1039  
1040  		// Rounding may have caused a denormal number to
1041  		// become normal. Check again.
1042  		if e < emin {
1043  			// denormal number: recompute precision
1044  			// Since rounding may have at best increased precision
1045  			// and we have eliminated p <= 0 early, we know p > 0.
1046  			// bexp == 0 for denormals
1047  			p = mbits + 1 - emin + int(e)
1048  			mant = msb64(r.mant) >> uint(fbits-p)
1049  		} else {
1050  			// normal number: emin <= e <= emax
1051  			bexp = uint64(e+bias) << mbits
1052  			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
1053  		}
1054  
1055  		return math.Float64frombits(sign | bexp | mant), r.acc
1056  
1057  	case zero:
1058  		if x.neg {
1059  			var z float64
1060  			return -z, Exact
1061  		}
1062  		return 0.0, Exact
1063  
1064  	case inf:
1065  		if x.neg {
1066  			return math.Inf(-1), Exact
1067  		}
1068  		return math.Inf(+1), Exact
1069  	}
1070  
1071  	panic("unreachable")
1072  }
1073  
1074  // Int returns the result of truncating x towards zero;
1075  // or nil if x is an infinity.
1076  // The result is [Exact] if x.IsInt(); otherwise it is [Below]
1077  // for x > 0, and [Above] for x < 0.
1078  // If a non-nil *[Int] argument z is provided, [Int] stores
1079  // the result in z instead of allocating a new [Int].
1080  func (x *Float) Int(z *Int) (*Int, Accuracy) {
1081  	if debugFloat {
1082  		x.validate()
1083  	}
1084  
1085  	if z == nil && x.form <= finite {
1086  		z = &Int{}
1087  	}
1088  
1089  	switch x.form {
1090  	case finite:
1091  		// 0 < |x| < +Inf
1092  		acc := makeAcc(x.neg)
1093  		if x.exp <= 0 {
1094  			// 0 < |x| < 1
1095  			return z.SetInt64(0), acc
1096  		}
1097  		// x.exp > 0
1098  
1099  		// 1 <= |x| < +Inf
1100  		// determine minimum required precision for x
1101  		allBits := uint(len(x.mant)) * _W
1102  		exp := uint(x.exp)
1103  		if x.MinPrec() <= exp {
1104  			acc = Exact
1105  		}
1106  		// shift mantissa as needed
1107  		if z == nil {
1108  			z = &Int{}
1109  		}
1110  		z.neg = x.neg
1111  		switch {
1112  		case exp > allBits:
1113  			z.abs = z.abs.lsh(x.mant, exp-allBits)
1114  		default:
1115  			z.abs = z.abs.set(x.mant)
1116  		case exp < allBits:
1117  			z.abs = z.abs.rsh(x.mant, allBits-exp)
1118  		}
1119  		return z, acc
1120  
1121  	case zero:
1122  		return z.SetInt64(0), Exact
1123  
1124  	case inf:
1125  		return nil, makeAcc(x.neg)
1126  	}
1127  
1128  	panic("unreachable")
1129  }
1130  
1131  // Rat returns the rational number corresponding to x;
1132  // or nil if x is an infinity.
1133  // The result is [Exact] if x is not an Inf.
1134  // If a non-nil *[Rat] argument z is provided, [Rat] stores
1135  // the result in z instead of allocating a new [Rat].
1136  func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
1137  	if debugFloat {
1138  		x.validate()
1139  	}
1140  
1141  	if z == nil && x.form <= finite {
1142  		z = &Rat{}
1143  	}
1144  
1145  	switch x.form {
1146  	case finite:
1147  		// 0 < |x| < +Inf
1148  		allBits := int32(len(x.mant)) * _W
1149  		// build up numerator and denominator
1150  		z.a.neg = x.neg
1151  		switch {
1152  		case x.exp > allBits:
1153  			z.a.abs = z.a.abs.lsh(x.mant, uint(x.exp-allBits))
1154  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
1155  			// z already in normal form
1156  		default:
1157  			z.a.abs = z.a.abs.set(x.mant)
1158  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
1159  			// z already in normal form
1160  		case x.exp < allBits:
1161  			z.a.abs = z.a.abs.set(x.mant)
1162  			t := z.b.abs.setUint64(1)
1163  			z.b.abs = t.lsh(t, uint(allBits-x.exp))
1164  			z.norm()
1165  		}
1166  		return z, Exact
1167  
1168  	case zero:
1169  		return z.SetInt64(0), Exact
1170  
1171  	case inf:
1172  		return nil, makeAcc(x.neg)
1173  	}
1174  
1175  	panic("unreachable")
1176  }
1177  
1178  // Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
1179  // and returns z.
1180  func (z *Float) Abs(x *Float) *Float {
1181  	z.Set(x)
1182  	z.neg = false
1183  	return z
1184  }
1185  
1186  // Neg sets z to the (possibly rounded) value of x with its sign negated,
1187  // and returns z.
1188  func (z *Float) Neg(x *Float) *Float {
1189  	z.Set(x)
1190  	z.neg = !z.neg
1191  	return z
1192  }
1193  
1194  func validateBinaryOperands(x, y *Float) {
1195  	if !debugFloat {
1196  		// avoid performance bugs
1197  		panic("validateBinaryOperands called but debugFloat is not set")
1198  	}
1199  	if len(x.mant) == 0 {
1200  		panic("empty mantissa for x")
1201  	}
1202  	if len(y.mant) == 0 {
1203  		panic("empty mantissa for y")
1204  	}
1205  }
1206  
1207  // z = x + y, ignoring signs of x and y for the addition
1208  // but using the sign of z for rounding the result.
1209  // x and y must have a non-empty mantissa and valid exponent.
1210  func (z *Float) uadd(x, y *Float) {
1211  	// Note: This implementation requires 2 shifts most of the
1212  	// time. It is also inefficient if exponents or precisions
1213  	// differ by wide margins. The following article describes
1214  	// an efficient (but much more complicated) implementation
1215  	// compatible with the internal representation used here:
1216  	//
1217  	// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
1218  	// Point Addition With Exact Rounding (as in the MPFR Library)"
1219  	// http://www.vinc17.net/research/papers/rnc6.pdf
1220  
1221  	if debugFloat {
1222  		validateBinaryOperands(x, y)
1223  	}
1224  
1225  	// compute exponents ex, ey for mantissa with "binary point"
1226  	// on the right (mantissa.0) - use int64 to avoid overflow
1227  	ex := int64(x.exp) - int64(len(x.mant))*_W
1228  	ey := int64(y.exp) - int64(len(y.mant))*_W
1229  
1230  	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
1231  
1232  	// TODO(gri) having a combined add-and-shift primitive
1233  	//           could make this code significantly faster
1234  	switch {
1235  	case ex < ey:
1236  		if al {
1237  			t := nat(nil).lsh(y.mant, uint(ey-ex))
1238  			z.mant = z.mant.add(x.mant, t)
1239  		} else {
1240  			z.mant = z.mant.lsh(y.mant, uint(ey-ex))
1241  			z.mant = z.mant.add(x.mant, z.mant)
1242  		}
1243  	default:
1244  		// ex == ey, no shift needed
1245  		z.mant = z.mant.add(x.mant, y.mant)
1246  	case ex > ey:
1247  		if al {
1248  			t := nat(nil).lsh(x.mant, uint(ex-ey))
1249  			z.mant = z.mant.add(t, y.mant)
1250  		} else {
1251  			z.mant = z.mant.lsh(x.mant, uint(ex-ey))
1252  			z.mant = z.mant.add(z.mant, y.mant)
1253  		}
1254  		ex = ey
1255  	}
1256  	// len(z.mant) > 0
1257  
1258  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
1259  }
1260  
1261  // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
1262  // but using the sign of z for rounding the result.
1263  // x and y must have a non-empty mantissa and valid exponent.
1264  func (z *Float) usub(x, y *Float) {
1265  	// This code is symmetric to uadd.
1266  	// We have not factored the common code out because
1267  	// eventually uadd (and usub) should be optimized
1268  	// by special-casing, and the code will diverge.
1269  
1270  	if debugFloat {
1271  		validateBinaryOperands(x, y)
1272  	}
1273  
1274  	ex := int64(x.exp) - int64(len(x.mant))*_W
1275  	ey := int64(y.exp) - int64(len(y.mant))*_W
1276  
1277  	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
1278  
1279  	switch {
1280  	case ex < ey:
1281  		if al {
1282  			t := nat(nil).lsh(y.mant, uint(ey-ex))
1283  			z.mant = t.sub(x.mant, t)
1284  		} else {
1285  			z.mant = z.mant.lsh(y.mant, uint(ey-ex))
1286  			z.mant = z.mant.sub(x.mant, z.mant)
1287  		}
1288  	default:
1289  		// ex == ey, no shift needed
1290  		z.mant = z.mant.sub(x.mant, y.mant)
1291  	case ex > ey:
1292  		if al {
1293  			t := nat(nil).lsh(x.mant, uint(ex-ey))
1294  			z.mant = t.sub(t, y.mant)
1295  		} else {
1296  			z.mant = z.mant.lsh(x.mant, uint(ex-ey))
1297  			z.mant = z.mant.sub(z.mant, y.mant)
1298  		}
1299  		ex = ey
1300  	}
1301  
1302  	// operands may have canceled each other out
1303  	if len(z.mant) == 0 {
1304  		z.acc = Exact
1305  		z.form = zero
1306  		z.neg = false
1307  		return
1308  	}
1309  	// len(z.mant) > 0
1310  
1311  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
1312  }
1313  
1314  // z = x * y, ignoring signs of x and y for the multiplication
1315  // but using the sign of z for rounding the result.
1316  // x and y must have a non-empty mantissa and valid exponent.
1317  func (z *Float) umul(x, y *Float) {
1318  	if debugFloat {
1319  		validateBinaryOperands(x, y)
1320  	}
1321  
1322  	// Note: This is doing too much work if the precision
1323  	// of z is less than the sum of the precisions of x
1324  	// and y which is often the case (e.g., if all floats
1325  	// have the same precision).
1326  	// TODO(gri) Optimize this for the common case.
1327  
1328  	e := int64(x.exp) + int64(y.exp)
1329  	if x == y {
1330  		z.mant = z.mant.sqr(nil, x.mant)
1331  	} else {
1332  		z.mant = z.mant.mul(nil, x.mant, y.mant)
1333  	}
1334  	z.setExpAndRound(e-fnorm(z.mant), 0)
1335  }
1336  
1337  // z = x / y, ignoring signs of x and y for the division
1338  // but using the sign of z for rounding the result.
1339  // x and y must have a non-empty mantissa and valid exponent.
1340  func (z *Float) uquo(x, y *Float) {
1341  	if debugFloat {
1342  		validateBinaryOperands(x, y)
1343  	}
1344  
1345  	// mantissa length in words for desired result precision + 1
1346  	// (at least one extra bit so we get the rounding bit after
1347  	// the division)
1348  	n := int(z.prec/_W) + 1
1349  
1350  	// compute adjusted x.mant such that we get enough result precision
1351  	xadj := x.mant
1352  	if d := n - len(x.mant) + len(y.mant); d > 0 {
1353  		// d extra words needed => add d "0 digits" to x
1354  		xadj = make(nat, len(x.mant)+d)
1355  		copy(xadj[d:], x.mant)
1356  	}
1357  	// TODO(gri): If we have too many digits (d < 0), we should be able
1358  	// to shorten x for faster division. But we must be extra careful
1359  	// with rounding in that case.
1360  
1361  	// Compute d before division since there may be aliasing of x.mant
1362  	// (via xadj) or y.mant with z.mant.
1363  	d := len(xadj) - len(y.mant)
1364  
1365  	// divide
1366  	stk := getStack()
1367  	defer stk.free()
1368  	var r nat
1369  	z.mant, r = z.mant.div(stk, nil, xadj, y.mant)
1370  	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
1371  
1372  	// The result is long enough to include (at least) the rounding bit.
1373  	// If there's a non-zero remainder, the corresponding fractional part
1374  	// (if it were computed), would have a non-zero sticky bit (if it were
1375  	// zero, it couldn't have a non-zero remainder).
1376  	var sbit uint
1377  	if len(r) > 0 {
1378  		sbit = 1
1379  	}
1380  
1381  	z.setExpAndRound(e-fnorm(z.mant), sbit)
1382  }
1383  
1384  // ucmp returns -1, 0, or +1, depending on whether
1385  // |x| < |y|, |x| == |y|, or |x| > |y|.
1386  // x and y must have a non-empty mantissa and valid exponent.
1387  func (x *Float) ucmp(y *Float) int {
1388  	if debugFloat {
1389  		validateBinaryOperands(x, y)
1390  	}
1391  
1392  	switch {
1393  	case x.exp < y.exp:
1394  		return -1
1395  	case x.exp > y.exp:
1396  		return +1
1397  	}
1398  	// x.exp == y.exp
1399  
1400  	// compare mantissas
1401  	i := len(x.mant)
1402  	j := len(y.mant)
1403  	for i > 0 || j > 0 {
1404  		var xm, ym Word
1405  		if i > 0 {
1406  			i--
1407  			xm = x.mant[i]
1408  		}
1409  		if j > 0 {
1410  			j--
1411  			ym = y.mant[j]
1412  		}
1413  		switch {
1414  		case xm < ym:
1415  			return -1
1416  		case xm > ym:
1417  			return +1
1418  		}
1419  	}
1420  
1421  	return 0
1422  }
1423  
1424  // Handling of sign bit as defined by IEEE 754-2008, section 6.3:
1425  //
1426  // When neither the inputs nor result are NaN, the sign of a product or
1427  // quotient is the exclusive OR of the operands’ signs; the sign of a sum,
1428  // or of a difference x−y regarded as a sum x+(−y), differs from at most
1429  // one of the addends’ signs; and the sign of the result of conversions,
1430  // the quantize operation, the roundToIntegral operations, and the
1431  // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
1432  // These rules shall apply even when operands or results are zero or infinite.
1433  //
1434  // When the sum of two operands with opposite signs (or the difference of
1435  // two operands with like signs) is exactly zero, the sign of that sum (or
1436  // difference) shall be +0 in all rounding-direction attributes except
1437  // roundTowardNegative; under that attribute, the sign of an exact zero
1438  // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
1439  // sign as x even when x is zero.
1440  //
1441  // See also: https://play.golang.org/p/RtH3UCt5IH
1442  
1443  // Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
1444  // it is changed to the larger of x's or y's precision before the operation.
1445  // Rounding is performed according to z's precision and rounding mode; and
1446  // z's accuracy reports the result error relative to the exact (not rounded)
1447  // result. Add panics with [ErrNaN] if x and y are infinities with opposite
1448  // signs. The value of z is undefined in that case.
1449  func (z *Float) Add(x, y *Float) *Float {
1450  	if debugFloat {
1451  		x.validate()
1452  		y.validate()
1453  	}
1454  
1455  	if z.prec == 0 {
1456  		z.prec = max(x.prec, y.prec)
1457  	}
1458  
1459  	if x.form == finite && y.form == finite {
1460  		// x + y (common case)
1461  
1462  		// Below we set z.neg = x.neg, and when z aliases y this will
1463  		// change the y operand's sign. This is fine, because if an
1464  		// operand aliases the receiver it'll be overwritten, but we still
1465  		// want the original x.neg and y.neg values when we evaluate
1466  		// x.neg != y.neg, so we need to save y.neg before setting z.neg.
1467  		yneg := y.neg
1468  
1469  		z.neg = x.neg
1470  		if x.neg == yneg {
1471  			// x + y == x + y
1472  			// (-x) + (-y) == -(x + y)
1473  			z.uadd(x, y)
1474  		} else {
1475  			// x + (-y) == x - y == -(y - x)
1476  			// (-x) + y == y - x == -(x - y)
1477  			if x.ucmp(y) > 0 {
1478  				z.usub(x, y)
1479  			} else {
1480  				z.neg = !z.neg
1481  				z.usub(y, x)
1482  			}
1483  		}
1484  		if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
1485  			z.neg = true
1486  		}
1487  		return z
1488  	}
1489  
1490  	if x.form == inf && y.form == inf && x.neg != y.neg {
1491  		// +Inf + -Inf
1492  		// -Inf + +Inf
1493  		// value of z is undefined but make sure it's valid
1494  		z.acc = Exact
1495  		z.form = zero
1496  		z.neg = false
1497  		panic(ErrNaN{"addition of infinities with opposite signs"})
1498  	}
1499  
1500  	if x.form == zero && y.form == zero {
1501  		// ±0 + ±0
1502  		z.acc = Exact
1503  		z.form = zero
1504  		z.neg = x.neg && y.neg // -0 + -0 == -0
1505  		return z
1506  	}
1507  
1508  	if x.form == inf || y.form == zero {
1509  		// ±Inf + y
1510  		// x + ±0
1511  		return z.Set(x)
1512  	}
1513  
1514  	// ±0 + y
1515  	// x + ±Inf
1516  	return z.Set(y)
1517  }
1518  
1519  // Sub sets z to the rounded difference x-y and returns z.
1520  // Precision, rounding, and accuracy reporting are as for [Float.Add].
1521  // Sub panics with [ErrNaN] if x and y are infinities with equal
1522  // signs. The value of z is undefined in that case.
1523  func (z *Float) Sub(x, y *Float) *Float {
1524  	if debugFloat {
1525  		x.validate()
1526  		y.validate()
1527  	}
1528  
1529  	if z.prec == 0 {
1530  		z.prec = max(x.prec, y.prec)
1531  	}
1532  
1533  	if x.form == finite && y.form == finite {
1534  		// x - y (common case)
1535  		yneg := y.neg
1536  		z.neg = x.neg
1537  		if x.neg != yneg {
1538  			// x - (-y) == x + y
1539  			// (-x) - y == -(x + y)
1540  			z.uadd(x, y)
1541  		} else {
1542  			// x - y == x - y == -(y - x)
1543  			// (-x) - (-y) == y - x == -(x - y)
1544  			if x.ucmp(y) > 0 {
1545  				z.usub(x, y)
1546  			} else {
1547  				z.neg = !z.neg
1548  				z.usub(y, x)
1549  			}
1550  		}
1551  		if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
1552  			z.neg = true
1553  		}
1554  		return z
1555  	}
1556  
1557  	if x.form == inf && y.form == inf && x.neg == y.neg {
1558  		// +Inf - +Inf
1559  		// -Inf - -Inf
1560  		// value of z is undefined but make sure it's valid
1561  		z.acc = Exact
1562  		z.form = zero
1563  		z.neg = false
1564  		panic(ErrNaN{"subtraction of infinities with equal signs"})
1565  	}
1566  
1567  	if x.form == zero && y.form == zero {
1568  		// ±0 - ±0
1569  		z.acc = Exact
1570  		z.form = zero
1571  		z.neg = x.neg && !y.neg // -0 - +0 == -0
1572  		return z
1573  	}
1574  
1575  	if x.form == inf || y.form == zero {
1576  		// ±Inf - y
1577  		// x - ±0
1578  		return z.Set(x)
1579  	}
1580  
1581  	// ±0 - y
1582  	// x - ±Inf
1583  	return z.Neg(y)
1584  }
1585  
1586  // Mul sets z to the rounded product x*y and returns z.
1587  // Precision, rounding, and accuracy reporting are as for [Float.Add].
1588  // Mul panics with [ErrNaN] if one operand is zero and the other
1589  // operand an infinity. The value of z is undefined in that case.
1590  func (z *Float) Mul(x, y *Float) *Float {
1591  	if debugFloat {
1592  		x.validate()
1593  		y.validate()
1594  	}
1595  
1596  	if z.prec == 0 {
1597  		z.prec = max(x.prec, y.prec)
1598  	}
1599  
1600  	z.neg = x.neg != y.neg
1601  
1602  	if x.form == finite && y.form == finite {
1603  		// x * y (common case)
1604  		z.umul(x, y)
1605  		return z
1606  	}
1607  
1608  	z.acc = Exact
1609  	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
1610  		// ±0 * ±Inf
1611  		// ±Inf * ±0
1612  		// value of z is undefined but make sure it's valid
1613  		z.form = zero
1614  		z.neg = false
1615  		panic(ErrNaN{"multiplication of zero with infinity"})
1616  	}
1617  
1618  	if x.form == inf || y.form == inf {
1619  		// ±Inf * y
1620  		// x * ±Inf
1621  		z.form = inf
1622  		return z
1623  	}
1624  
1625  	// ±0 * y
1626  	// x * ±0
1627  	z.form = zero
1628  	return z
1629  }
1630  
1631  // Quo sets z to the rounded quotient x/y and returns z.
1632  // Precision, rounding, and accuracy reporting are as for [Float.Add].
1633  // Quo panics with [ErrNaN] if both operands are zero or infinities.
1634  // The value of z is undefined in that case.
1635  func (z *Float) Quo(x, y *Float) *Float {
1636  	if debugFloat {
1637  		x.validate()
1638  		y.validate()
1639  	}
1640  
1641  	if z.prec == 0 {
1642  		z.prec = max(x.prec, y.prec)
1643  	}
1644  
1645  	z.neg = x.neg != y.neg
1646  
1647  	if x.form == finite && y.form == finite {
1648  		// x / y (common case)
1649  		z.uquo(x, y)
1650  		return z
1651  	}
1652  
1653  	z.acc = Exact
1654  	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
1655  		// ±0 / ±0
1656  		// ±Inf / ±Inf
1657  		// value of z is undefined but make sure it's valid
1658  		z.form = zero
1659  		z.neg = false
1660  		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
1661  	}
1662  
1663  	if x.form == zero || y.form == inf {
1664  		// ±0 / y
1665  		// x / ±Inf
1666  		z.form = zero
1667  		return z
1668  	}
1669  
1670  	// x / ±0
1671  	// ±Inf / y
1672  	z.form = inf
1673  	return z
1674  }
1675  
1676  // Cmp compares x and y and returns:
1677  //   - -1 if x < y;
1678  //   - 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf);
1679  //   - +1 if x > y.
1680  func (x *Float) Cmp(y *Float) int {
1681  	if debugFloat {
1682  		x.validate()
1683  		y.validate()
1684  	}
1685  
1686  	mx := x.ord()
1687  	my := y.ord()
1688  	switch {
1689  	case mx < my:
1690  		return -1
1691  	case mx > my:
1692  		return +1
1693  	}
1694  	// mx == my
1695  
1696  	// only if |mx| == 1 we have to compare the mantissae
1697  	switch mx {
1698  	case -1:
1699  		return y.ucmp(x)
1700  	case +1:
1701  		return x.ucmp(y)
1702  	}
1703  
1704  	return 0
1705  }
1706  
1707  // ord classifies x and returns:
1708  //
1709  //	-2 if -Inf == x
1710  //	-1 if -Inf < x < 0
1711  //	 0 if x == 0 (signed or unsigned)
1712  //	+1 if 0 < x < +Inf
1713  //	+2 if x == +Inf
1714  func (x *Float) ord() int {
1715  	var m int
1716  	switch x.form {
1717  	case finite:
1718  		m = 1
1719  	case zero:
1720  		return 0
1721  	case inf:
1722  		m = 2
1723  	}
1724  	if x.neg {
1725  		m = -m
1726  	}
1727  	return m
1728  }
1729