atof.mx raw

   1  // Copyright 2009 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package strconv
   6  
   7  // decimal to binary floating point conversion.
   8  // Algorithm:
   9  //   1) Store input in multiprecision decimal.
  10  //   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
  11  //   3) Multiply by 2^precision and round to get mantissa.
  12  
  13  import "math"
  14  
  15  var optimize = true // set to false to force slow-path conversions for testing
  16  
  17  // commonPrefixLenIgnoreCase returns the length of the common
  18  // prefix of s and prefix, with the character case of s ignored.
  19  // The prefix argument must be all lower-case.
  20  func commonPrefixLenIgnoreCase(s, prefix []byte) int {
  21  	n := min(len(prefix), len(s))
  22  	for i := 0; i < n; i++ {
  23  		c := s[i]
  24  		if 'A' <= c && c <= 'Z' {
  25  			c += 'a' - 'A'
  26  		}
  27  		if c != prefix[i] {
  28  			return i
  29  		}
  30  	}
  31  	return n
  32  }
  33  
  34  // special returns the floating-point value for the special,
  35  // possibly signed floating-point representations inf, infinity,
  36  // and NaN. The result is ok if a prefix of s contains one
  37  // of these representations and n is the length of that prefix.
  38  // The character case is ignored.
  39  func special(s []byte) (f float64, n int, ok bool) {
  40  	if len(s) == 0 {
  41  		return 0, 0, false
  42  	}
  43  	sign := 1
  44  	nsign := 0
  45  	switch s[0] {
  46  	case '+', '-':
  47  		if s[0] == '-' {
  48  			sign = -1
  49  		}
  50  		nsign = 1
  51  		s = s[1:]
  52  		n := commonPrefixLenIgnoreCase(s, "infinity")
  53  		// Anything longer than "inf" is ok, but if we
  54  		// don't have "infinity", only consume "inf".
  55  		if 3 < n && n < 8 {
  56  			n = 3
  57  		}
  58  		if n == 3 || n == 8 {
  59  			return math.Inf(sign), nsign + n, true
  60  		}
  61  	case 'i', 'I':
  62  		n := commonPrefixLenIgnoreCase(s, "infinity")
  63  		// Anything longer than "inf" is ok, but if we
  64  		// don't have "infinity", only consume "inf".
  65  		if 3 < n && n < 8 {
  66  			n = 3
  67  		}
  68  		if n == 3 || n == 8 {
  69  			return math.Inf(sign), nsign + n, true
  70  		}
  71  	case 'n', 'N':
  72  		if commonPrefixLenIgnoreCase(s, "nan") == 3 {
  73  			return math.NaN(), 3, true
  74  		}
  75  	}
  76  	return 0, 0, false
  77  }
  78  
  79  func (b *decimal) set(s []byte) (ok bool) {
  80  	i := 0
  81  	b.neg = false
  82  	b.trunc = false
  83  
  84  	// optional sign
  85  	if i >= len(s) {
  86  		return
  87  	}
  88  	switch s[i] {
  89  	case '+':
  90  		i++
  91  	case '-':
  92  		i++
  93  		b.neg = true
  94  	}
  95  
  96  	// digits
  97  	sawdot := false
  98  	sawdigits := false
  99  	for ; i < len(s); i++ {
 100  		switch {
 101  		case s[i] == '_':
 102  			// readFloat already checked underscores
 103  			continue
 104  		case s[i] == '.':
 105  			if sawdot {
 106  				return
 107  			}
 108  			sawdot = true
 109  			b.dp = b.nd
 110  			continue
 111  
 112  		case '0' <= s[i] && s[i] <= '9':
 113  			sawdigits = true
 114  			if s[i] == '0' && b.nd == 0 { // ignore leading zeros
 115  				b.dp--
 116  				continue
 117  			}
 118  			if b.nd < len(b.d) {
 119  				b.d[b.nd] = s[i]
 120  				b.nd++
 121  			} else if s[i] != '0' {
 122  				b.trunc = true
 123  			}
 124  			continue
 125  		}
 126  		break
 127  	}
 128  	if !sawdigits {
 129  		return
 130  	}
 131  	if !sawdot {
 132  		b.dp = b.nd
 133  	}
 134  
 135  	// optional exponent moves decimal point.
 136  	// if we read a very large, very long number,
 137  	// just be sure to move the decimal point by
 138  	// a lot (say, 100000).  it doesn't matter if it's
 139  	// not the exact number.
 140  	if i < len(s) && lower(s[i]) == 'e' {
 141  		i++
 142  		if i >= len(s) {
 143  			return
 144  		}
 145  		esign := 1
 146  		switch s[i] {
 147  		case '+':
 148  			i++
 149  		case '-':
 150  			i++
 151  			esign = -1
 152  		}
 153  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
 154  			return
 155  		}
 156  		e := 0
 157  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
 158  			if s[i] == '_' {
 159  				// readFloat already checked underscores
 160  				continue
 161  			}
 162  			if e < 10000 {
 163  				e = e*10 + int(s[i]) - '0'
 164  			}
 165  		}
 166  		b.dp += e * esign
 167  	}
 168  
 169  	if i != len(s) {
 170  		return
 171  	}
 172  
 173  	ok = true
 174  	return
 175  }
 176  
 177  // readFloat reads a decimal or hexadecimal mantissa and exponent from a float
 178  // string representation in s; the number may be followed by other characters.
 179  // readFloat reports the number of bytes consumed (i), and whether the number
 180  // is valid (ok).
 181  func readFloat(s []byte) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) {
 182  	underscores := false
 183  
 184  	// optional sign
 185  	if i >= len(s) {
 186  		return
 187  	}
 188  	switch s[i] {
 189  	case '+':
 190  		i++
 191  	case '-':
 192  		i++
 193  		neg = true
 194  	}
 195  
 196  	// digits
 197  	base := uint64(10)
 198  	maxMantDigits := 19 // 10^19 fits in uint64
 199  	expChar := byte('e')
 200  	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
 201  		base = 16
 202  		maxMantDigits = 16 // 16^16 fits in uint64
 203  		i += 2
 204  		expChar = 'p'
 205  		hex = true
 206  	}
 207  	sawdot := false
 208  	sawdigits := false
 209  	nd := 0
 210  	ndMant := 0
 211  	dp := 0
 212  loop:
 213  	for ; i < len(s); i++ {
 214  		switch c := s[i]; true {
 215  		case c == '_':
 216  			underscores = true
 217  			continue
 218  
 219  		case c == '.':
 220  			if sawdot {
 221  				break loop
 222  			}
 223  			sawdot = true
 224  			dp = nd
 225  			continue
 226  
 227  		case '0' <= c && c <= '9':
 228  			sawdigits = true
 229  			if c == '0' && nd == 0 { // ignore leading zeros
 230  				dp--
 231  				continue
 232  			}
 233  			nd++
 234  			if ndMant < maxMantDigits {
 235  				mantissa *= base
 236  				mantissa += uint64(c - '0')
 237  				ndMant++
 238  			} else if c != '0' {
 239  				trunc = true
 240  			}
 241  			continue
 242  
 243  		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
 244  			sawdigits = true
 245  			nd++
 246  			if ndMant < maxMantDigits {
 247  				mantissa *= 16
 248  				mantissa += uint64(lower(c) - 'a' + 10)
 249  				ndMant++
 250  			} else {
 251  				trunc = true
 252  			}
 253  			continue
 254  		}
 255  		break
 256  	}
 257  	if !sawdigits {
 258  		return
 259  	}
 260  	if !sawdot {
 261  		dp = nd
 262  	}
 263  
 264  	if base == 16 {
 265  		dp *= 4
 266  		ndMant *= 4
 267  	}
 268  
 269  	// optional exponent moves decimal point.
 270  	// if we read a very large, very long number,
 271  	// just be sure to move the decimal point by
 272  	// a lot (say, 100000).  it doesn't matter if it's
 273  	// not the exact number.
 274  	if i < len(s) && lower(s[i]) == expChar {
 275  		i++
 276  		if i >= len(s) {
 277  			return
 278  		}
 279  		esign := 1
 280  		switch s[i] {
 281  		case '+':
 282  			i++
 283  		case '-':
 284  			i++
 285  			esign = -1
 286  		}
 287  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
 288  			return
 289  		}
 290  		e := 0
 291  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
 292  			if s[i] == '_' {
 293  				underscores = true
 294  				continue
 295  			}
 296  			if e < 10000 {
 297  				e = e*10 + int(s[i]) - '0'
 298  			}
 299  		}
 300  		dp += e * esign
 301  	} else if base == 16 {
 302  		// Must have exponent.
 303  		return
 304  	}
 305  
 306  	if mantissa != 0 {
 307  		exp = dp - ndMant
 308  	}
 309  
 310  	if underscores && !underscoreOK(s[:i]) {
 311  		return
 312  	}
 313  
 314  	ok = true
 315  	return
 316  }
 317  
 318  // decimal power of ten to binary power of two.
 319  var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
 320  
 321  func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
 322  	var exp int
 323  	var mant uint64
 324  
 325  	// Zero is always a special case.
 326  	if d.nd == 0 {
 327  		mant = 0
 328  		exp = flt.bias
 329  		goto out
 330  	}
 331  
 332  	// Obvious overflow/underflow.
 333  	// These bounds are for 64-bit floats.
 334  	// Will have to change if we want to support 80-bit floats in the future.
 335  	if d.dp > 310 {
 336  		goto overflow
 337  	}
 338  	if d.dp < -330 {
 339  		// zero
 340  		mant = 0
 341  		exp = flt.bias
 342  		goto out
 343  	}
 344  
 345  	// Scale by powers of two until in range [0.5, 1.0)
 346  	exp = 0
 347  	for d.dp > 0 {
 348  		var n int
 349  		if d.dp >= len(powtab) {
 350  			n = 27
 351  		} else {
 352  			n = powtab[d.dp]
 353  		}
 354  		d.Shift(-n)
 355  		exp += n
 356  	}
 357  	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
 358  		var n int
 359  		if -d.dp >= len(powtab) {
 360  			n = 27
 361  		} else {
 362  			n = powtab[-d.dp]
 363  		}
 364  		d.Shift(n)
 365  		exp -= n
 366  	}
 367  
 368  	// Our range is [0.5,1) but floating point range is [1,2).
 369  	exp--
 370  
 371  	// Minimum representable exponent is flt.bias+1.
 372  	// If the exponent is smaller, move it up and
 373  	// adjust d accordingly.
 374  	if exp < flt.bias+1 {
 375  		n := flt.bias + 1 - exp
 376  		d.Shift(-n)
 377  		exp += n
 378  	}
 379  
 380  	if exp-flt.bias >= 1<<flt.expbits-1 {
 381  		goto overflow
 382  	}
 383  
 384  	// Extract 1+flt.mantbits bits.
 385  	d.Shift(int(1 + flt.mantbits))
 386  	mant = d.RoundedInteger()
 387  
 388  	// Rounding might have added a bit; shift down.
 389  	if mant == 2<<flt.mantbits {
 390  		mant >>= 1
 391  		exp++
 392  		if exp-flt.bias >= 1<<flt.expbits-1 {
 393  			goto overflow
 394  		}
 395  	}
 396  
 397  	// Denormalized?
 398  	if mant&(1<<flt.mantbits) == 0 {
 399  		exp = flt.bias
 400  	}
 401  	goto out
 402  
 403  overflow:
 404  	// ±Inf
 405  	mant = 0
 406  	exp = 1<<flt.expbits - 1 + flt.bias
 407  	overflow = true
 408  
 409  out:
 410  	// Assemble bits.
 411  	bits := mant & (uint64(1)<<flt.mantbits - 1)
 412  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
 413  	if d.neg {
 414  		bits |= 1 << flt.mantbits << flt.expbits
 415  	}
 416  	return bits, overflow
 417  }
 418  
 419  // Exact powers of 10.
 420  var float64pow10 = []float64{
 421  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
 422  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
 423  	1e20, 1e21, 1e22,
 424  }
 425  var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
 426  
 427  // If possible to convert decimal representation to 64-bit float f exactly,
 428  // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
 429  // Three common cases:
 430  //
 431  //	value is exact integer
 432  //	value is exact integer * exact power of ten
 433  //	value is exact integer / exact power of ten
 434  //
 435  // These all produce potentially inexact but correctly rounded answers.
 436  func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
 437  	if mantissa>>float64info.mantbits != 0 {
 438  		return
 439  	}
 440  	f = float64(mantissa)
 441  	if neg {
 442  		f = -f
 443  	}
 444  	switch {
 445  	case exp == 0:
 446  		// an integer.
 447  		return f, true
 448  	// Exact integers are <= 10^15.
 449  	// Exact powers of ten are <= 10^22.
 450  	case exp > 0 && exp <= 15+22: // int * 10^k
 451  		// If exponent is big but number of digits is not,
 452  		// can move a few zeros into the integer part.
 453  		if exp > 22 {
 454  			f *= float64pow10[exp-22]
 455  			exp = 22
 456  		}
 457  		if f > 1e15 || f < -1e15 {
 458  			// the exponent was really too large.
 459  			return
 460  		}
 461  		return f * float64pow10[exp], true
 462  	case exp < 0 && exp >= -22: // int / 10^k
 463  		return f / float64pow10[-exp], true
 464  	}
 465  	return
 466  }
 467  
 468  // If possible to compute mantissa*10^exp to 32-bit float f exactly,
 469  // entirely in floating-point math, do so, avoiding the machinery above.
 470  func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
 471  	if mantissa>>float32info.mantbits != 0 {
 472  		return
 473  	}
 474  	f = float32(mantissa)
 475  	if neg {
 476  		f = -f
 477  	}
 478  	switch {
 479  	case exp == 0:
 480  		return f, true
 481  	// Exact integers are <= 10^7.
 482  	// Exact powers of ten are <= 10^10.
 483  	case exp > 0 && exp <= 7+10: // int * 10^k
 484  		// If exponent is big but number of digits is not,
 485  		// can move a few zeros into the integer part.
 486  		if exp > 10 {
 487  			f *= float32pow10[exp-10]
 488  			exp = 10
 489  		}
 490  		if f > 1e7 || f < -1e7 {
 491  			// the exponent was really too large.
 492  			return
 493  		}
 494  		return f * float32pow10[exp], true
 495  	case exp < 0 && exp >= -10: // int / 10^k
 496  		return f / float32pow10[-exp], true
 497  	}
 498  	return
 499  }
 500  
 501  // atofHex converts the hex floating-point string s
 502  // to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
 503  // and returns it as a float64.
 504  // The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
 505  // If trunc is true, trailing non-zero bits have been omitted from the mantissa.
 506  func atofHex(s []byte, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
 507  	maxExp := 1<<flt.expbits + flt.bias - 2
 508  	minExp := flt.bias + 1
 509  	exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
 510  
 511  	// Shift mantissa and exponent to bring representation into float range.
 512  	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
 513  	// For rounding, we need two more, where the bottom bit represents
 514  	// whether that bit or any later bit was non-zero.
 515  	// (If the mantissa has already lost non-zero bits, trunc is true,
 516  	// and we OR in a 1 below after shifting left appropriately.)
 517  	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
 518  		mantissa <<= 1
 519  		exp--
 520  	}
 521  	if trunc {
 522  		mantissa |= 1
 523  	}
 524  	for mantissa>>(1+flt.mantbits+2) != 0 {
 525  		mantissa = mantissa>>1 | mantissa&1
 526  		exp++
 527  	}
 528  
 529  	// If exponent is too negative,
 530  	// denormalize in hopes of making it representable.
 531  	// (The -2 is for the rounding bits.)
 532  	for mantissa > 1 && exp < minExp-2 {
 533  		mantissa = mantissa>>1 | mantissa&1
 534  		exp++
 535  	}
 536  
 537  	// Round using two bottom bits.
 538  	round := mantissa & 3
 539  	mantissa >>= 2
 540  	round |= mantissa & 1 // round to even (round up if mantissa is odd)
 541  	exp += 2
 542  	if round == 3 {
 543  		mantissa++
 544  		if mantissa == 1<<(1+flt.mantbits) {
 545  			mantissa >>= 1
 546  			exp++
 547  		}
 548  	}
 549  
 550  	if mantissa>>flt.mantbits == 0 { // Denormal or zero.
 551  		exp = flt.bias
 552  	}
 553  	var err error
 554  	if exp > maxExp { // infinity and range error
 555  		mantissa = 1 << flt.mantbits
 556  		exp = maxExp + 1
 557  		err = rangeError(fnParseFloat, s)
 558  	}
 559  
 560  	bits := mantissa & (1<<flt.mantbits - 1)
 561  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
 562  	if neg {
 563  		bits |= 1 << flt.mantbits << flt.expbits
 564  	}
 565  	if flt == &float32info {
 566  		return float64(math.Float32frombits(uint32(bits))), err
 567  	}
 568  	return math.Float64frombits(bits), err
 569  }
 570  
 571  const fnParseFloat = "ParseFloat"
 572  
 573  func atof32(s []byte) (f float32, n int, err error) {
 574  	if val, n, ok := special(s); ok {
 575  		return float32(val), n, nil
 576  	}
 577  
 578  	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
 579  	if !ok {
 580  		return 0, n, syntaxError(fnParseFloat, s)
 581  	}
 582  
 583  	if hex {
 584  		f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc)
 585  		return float32(f), n, err
 586  	}
 587  
 588  	if optimize {
 589  		// Try pure floating-point arithmetic conversion, and if that fails,
 590  		// the Eisel-Lemire algorithm.
 591  		if !trunc {
 592  			if f, ok := atof32exact(mantissa, exp, neg); ok {
 593  				return f, n, nil
 594  			}
 595  		}
 596  		f, ok := eiselLemire32(mantissa, exp, neg)
 597  		if ok {
 598  			if !trunc {
 599  				return f, n, nil
 600  			}
 601  			// Even if the mantissa was truncated, we may
 602  			// have found the correct result. Confirm by
 603  			// converting the upper mantissa bound.
 604  			fUp, ok := eiselLemire32(mantissa+1, exp, neg)
 605  			if ok && f == fUp {
 606  				return f, n, nil
 607  			}
 608  		}
 609  	}
 610  
 611  	// Slow fallback.
 612  	var d decimal
 613  	if !d.set(s[:n]) {
 614  		return 0, n, syntaxError(fnParseFloat, s)
 615  	}
 616  	b, ovf := d.floatBits(&float32info)
 617  	f = math.Float32frombits(uint32(b))
 618  	if ovf {
 619  		err = rangeError(fnParseFloat, s)
 620  	}
 621  	return f, n, err
 622  }
 623  
 624  func atof64(s []byte) (f float64, n int, err error) {
 625  	if val, n, ok := special(s); ok {
 626  		return val, n, nil
 627  	}
 628  
 629  	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
 630  	if !ok {
 631  		return 0, n, syntaxError(fnParseFloat, s)
 632  	}
 633  
 634  	if hex {
 635  		f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
 636  		return f, n, err
 637  	}
 638  
 639  	if optimize {
 640  		// Try pure floating-point arithmetic conversion, and if that fails,
 641  		// the Eisel-Lemire algorithm.
 642  		if !trunc {
 643  			if f, ok := atof64exact(mantissa, exp, neg); ok {
 644  				return f, n, nil
 645  			}
 646  		}
 647  		f, ok := eiselLemire64(mantissa, exp, neg)
 648  		if ok {
 649  			if !trunc {
 650  				return f, n, nil
 651  			}
 652  			// Even if the mantissa was truncated, we may
 653  			// have found the correct result. Confirm by
 654  			// converting the upper mantissa bound.
 655  			fUp, ok := eiselLemire64(mantissa+1, exp, neg)
 656  			if ok && f == fUp {
 657  				return f, n, nil
 658  			}
 659  		}
 660  	}
 661  
 662  	// Slow fallback.
 663  	var d decimal
 664  	if !d.set(s[:n]) {
 665  		return 0, n, syntaxError(fnParseFloat, s)
 666  	}
 667  	b, ovf := d.floatBits(&float64info)
 668  	f = math.Float64frombits(b)
 669  	if ovf {
 670  		err = rangeError(fnParseFloat, s)
 671  	}
 672  	return f, n, err
 673  }
 674  
 675  // ParseFloat converts the string s to a floating-point number
 676  // with the precision specified by bitSize: 32 for float32, or 64 for float64.
 677  // When bitSize=32, the result still has type float64, but it will be
 678  // convertible to float32 without changing its value.
 679  //
 680  // ParseFloat accepts decimal and hexadecimal floating-point numbers
 681  // as defined by the Go syntax for [floating-point literals].
 682  // If s is well-formed and near a valid floating-point number,
 683  // ParseFloat returns the nearest floating-point number rounded
 684  // using IEEE754 unbiased rounding.
 685  // (Parsing a hexadecimal floating-point value only rounds when
 686  // there are more bits in the hexadecimal representation than
 687  // will fit in the mantissa.)
 688  //
 689  // The errors that ParseFloat returns have concrete type *NumError
 690  // and include err.Num = s.
 691  //
 692  // If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
 693  //
 694  // If s is syntactically well-formed but is more than 1/2 ULP
 695  // away from the largest floating point number of the given size,
 696  // ParseFloat returns f = ±Inf, err.Err = ErrRange.
 697  //
 698  // ParseFloat recognizes the string "NaN", and the (possibly signed) strings "Inf" and "Infinity"
 699  // as their respective special floating point values. It ignores case when matching.
 700  //
 701  // [floating-point literals]: https://go.dev/ref/spec#Floating-point_literals
 702  func ParseFloat(s []byte, bitSize int) (float64, error) {
 703  	f, n, err := parseFloatPrefix(s, bitSize)
 704  	if n != len(s) && (err == nil || err.(*NumError).Err != ErrSyntax) {
 705  		return 0, syntaxError(fnParseFloat, s)
 706  	}
 707  	return f, err
 708  }
 709  
 710  func parseFloatPrefix(s []byte, bitSize int) (float64, int, error) {
 711  	if bitSize == 32 {
 712  		f, n, err := atof32(s)
 713  		return float64(f), n, err
 714  	}
 715  	return atof64(s)
 716  }
 717