decode_number.go raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package text
   6  
   7  // parseNumberValue parses a number from the input and returns a Token object.
   8  func (d *Decoder) parseNumberValue() (Token, bool) {
   9  	in := d.in
  10  	num := parseNumber(in)
  11  	if num.size == 0 {
  12  		return Token{}, false
  13  	}
  14  	numAttrs := num.kind
  15  	if num.neg {
  16  		numAttrs |= isNegative
  17  	}
  18  	tok := Token{
  19  		kind:     Scalar,
  20  		attrs:    numberValue,
  21  		pos:      len(d.orig) - len(d.in),
  22  		raw:      d.in[:num.size],
  23  		str:      num.string(d.in),
  24  		numAttrs: numAttrs,
  25  	}
  26  	d.consume(num.size)
  27  	return tok, true
  28  }
  29  
  30  const (
  31  	numDec uint8 = (1 << iota) / 2
  32  	numHex
  33  	numOct
  34  	numFloat
  35  )
  36  
  37  // number is the result of parsing out a valid number from parseNumber. It
  38  // contains data for doing float or integer conversion via the strconv package
  39  // in conjunction with the input bytes.
  40  type number struct {
  41  	kind uint8
  42  	neg  bool
  43  	size int
  44  	// if neg, this is the length of whitespace and comments between
  45  	// the minus sign and the rest fo the number literal
  46  	sep int
  47  }
  48  
  49  func (num number) string(data []byte) string {
  50  	strSize := num.size
  51  	last := num.size - 1
  52  	if num.kind == numFloat && (data[last] == 'f' || data[last] == 'F') {
  53  		strSize = last
  54  	}
  55  	if num.neg && num.sep > 0 {
  56  		// strip whitespace/comments between negative sign and the rest
  57  		strLen := strSize - num.sep
  58  		str := make([]byte, strLen)
  59  		str[0] = data[0]
  60  		copy(str[1:], data[num.sep+1:strSize])
  61  		return string(str)
  62  	}
  63  	return string(data[:strSize])
  64  
  65  }
  66  
  67  // parseNumber constructs a number object from given input. It allows for the
  68  // following patterns:
  69  //
  70  //	integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
  71  //	float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
  72  //
  73  // It also returns the number of parsed bytes for the given number, 0 if it is
  74  // not a number.
  75  func parseNumber(input []byte) number {
  76  	kind := numDec
  77  	var size int
  78  	var neg bool
  79  
  80  	s := input
  81  	if len(s) == 0 {
  82  		return number{}
  83  	}
  84  
  85  	// Optional -
  86  	var sep int
  87  	if s[0] == '-' {
  88  		neg = true
  89  		s = s[1:]
  90  		size++
  91  		// Consume any whitespace or comments between the
  92  		// negative sign and the rest of the number
  93  		lenBefore := len(s)
  94  		s = consume(s, 0)
  95  		sep = lenBefore - len(s)
  96  		size += sep
  97  		if len(s) == 0 {
  98  			return number{}
  99  		}
 100  	}
 101  
 102  	switch {
 103  	case s[0] == '0':
 104  		if len(s) > 1 {
 105  			switch {
 106  			case s[1] == 'x' || s[1] == 'X':
 107  				// Parse as hex number.
 108  				kind = numHex
 109  				n := 2
 110  				s = s[2:]
 111  				for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
 112  					('a' <= s[0] && s[0] <= 'f') ||
 113  					('A' <= s[0] && s[0] <= 'F')) {
 114  					s = s[1:]
 115  					n++
 116  				}
 117  				if n == 2 {
 118  					return number{}
 119  				}
 120  				size += n
 121  
 122  			case '0' <= s[1] && s[1] <= '7':
 123  				// Parse as octal number.
 124  				kind = numOct
 125  				n := 2
 126  				s = s[2:]
 127  				for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
 128  					s = s[1:]
 129  					n++
 130  				}
 131  				size += n
 132  			}
 133  
 134  			if kind&(numHex|numOct) > 0 {
 135  				if len(s) > 0 && !isDelim(s[0]) {
 136  					return number{}
 137  				}
 138  				return number{kind: kind, neg: neg, size: size, sep: sep}
 139  			}
 140  		}
 141  		s = s[1:]
 142  		size++
 143  
 144  	case '1' <= s[0] && s[0] <= '9':
 145  		n := 1
 146  		s = s[1:]
 147  		for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
 148  			s = s[1:]
 149  			n++
 150  		}
 151  		size += n
 152  
 153  	case s[0] == '.':
 154  		// Set kind to numFloat to signify the intent to parse as float. And
 155  		// that it needs to have other digits after '.'.
 156  		kind = numFloat
 157  
 158  	default:
 159  		return number{}
 160  	}
 161  
 162  	// . followed by 0 or more digits.
 163  	if len(s) > 0 && s[0] == '.' {
 164  		n := 1
 165  		s = s[1:]
 166  		// If decimal point was before any digits, it should be followed by
 167  		// other digits.
 168  		if len(s) == 0 && kind == numFloat {
 169  			return number{}
 170  		}
 171  		for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
 172  			s = s[1:]
 173  			n++
 174  		}
 175  		size += n
 176  		kind = numFloat
 177  	}
 178  
 179  	// e or E followed by an optional - or + and 1 or more digits.
 180  	if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
 181  		kind = numFloat
 182  		s = s[1:]
 183  		n := 1
 184  		if s[0] == '+' || s[0] == '-' {
 185  			s = s[1:]
 186  			n++
 187  			if len(s) == 0 {
 188  				return number{}
 189  			}
 190  		}
 191  		for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
 192  			s = s[1:]
 193  			n++
 194  		}
 195  		size += n
 196  	}
 197  
 198  	// Optional suffix f or F for floats.
 199  	if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
 200  		kind = numFloat
 201  		s = s[1:]
 202  		size++
 203  	}
 204  
 205  	// Check that next byte is a delimiter or it is at the end.
 206  	if len(s) > 0 && !isDelim(s[0]) {
 207  		return number{}
 208  	}
 209  
 210  	return number{kind: kind, neg: neg, size: size, sep: sep}
 211  }
 212