decode_token.go raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package text
   6  
   7  import (
   8  	"bytes"
   9  	"fmt"
  10  	"math"
  11  	"strconv"
  12  	"strings"
  13  
  14  	"google.golang.org/protobuf/internal/flags"
  15  )
  16  
  17  // Kind represents a token kind expressible in the textproto format.
  18  type Kind uint8
  19  
  20  // Kind values.
  21  const (
  22  	Invalid Kind = iota
  23  	EOF
  24  	Name   // Name indicates the field name.
  25  	Scalar // Scalar are scalar values, e.g. "string", 47, ENUM_LITERAL, true.
  26  	MessageOpen
  27  	MessageClose
  28  	ListOpen
  29  	ListClose
  30  
  31  	// comma and semi-colon are only for parsing in between values and should not be exposed.
  32  	comma
  33  	semicolon
  34  
  35  	// bof indicates beginning of file, which is the default token
  36  	// kind at the beginning of parsing.
  37  	bof = Invalid
  38  )
  39  
  40  func (t Kind) String() string {
  41  	switch t {
  42  	case Invalid:
  43  		return "<invalid>"
  44  	case EOF:
  45  		return "eof"
  46  	case Scalar:
  47  		return "scalar"
  48  	case Name:
  49  		return "name"
  50  	case MessageOpen:
  51  		return "{"
  52  	case MessageClose:
  53  		return "}"
  54  	case ListOpen:
  55  		return "["
  56  	case ListClose:
  57  		return "]"
  58  	case comma:
  59  		return ","
  60  	case semicolon:
  61  		return ";"
  62  	default:
  63  		return fmt.Sprintf("<invalid:%v>", uint8(t))
  64  	}
  65  }
  66  
  67  // NameKind represents different types of field names.
  68  type NameKind uint8
  69  
  70  // NameKind values.
  71  const (
  72  	IdentName NameKind = iota + 1
  73  	TypeName
  74  	FieldNumber
  75  )
  76  
  77  func (t NameKind) String() string {
  78  	switch t {
  79  	case IdentName:
  80  		return "IdentName"
  81  	case TypeName:
  82  		return "TypeName"
  83  	case FieldNumber:
  84  		return "FieldNumber"
  85  	default:
  86  		return fmt.Sprintf("<invalid:%v>", uint8(t))
  87  	}
  88  }
  89  
  90  // Bit mask in Token.attrs to indicate if a Name token is followed by the
  91  // separator char ':'. The field name separator char is optional for message
  92  // field or repeated message field, but required for all other types. Decoder
  93  // simply indicates whether a Name token is followed by separator or not.  It is
  94  // up to the prototext package to validate.
  95  const hasSeparator = 1 << 7
  96  
  97  // Scalar value types.
  98  const (
  99  	numberValue = iota + 1
 100  	stringValue
 101  	literalValue
 102  )
 103  
 104  // Bit mask in Token.numAttrs to indicate that the number is a negative.
 105  const isNegative = 1 << 7
 106  
 107  // Token provides a parsed token kind and value. Values are provided by the
 108  // different accessor methods.
 109  type Token struct {
 110  	// Kind of the Token object.
 111  	kind Kind
 112  	// attrs contains metadata for the following Kinds:
 113  	// Name: hasSeparator bit and one of NameKind.
 114  	// Scalar: one of numberValue, stringValue, literalValue.
 115  	attrs uint8
 116  	// numAttrs contains metadata for numberValue:
 117  	// - highest bit is whether negative or positive.
 118  	// - lower bits indicate one of numDec, numHex, numOct, numFloat.
 119  	numAttrs uint8
 120  	// pos provides the position of the token in the original input.
 121  	pos int
 122  	// raw bytes of the serialized token.
 123  	// This is a subslice into the original input.
 124  	raw []byte
 125  	// str contains parsed string for the following:
 126  	// - stringValue of Scalar kind
 127  	// - numberValue of Scalar kind
 128  	// - TypeName of Name kind
 129  	str string
 130  }
 131  
 132  // Kind returns the token kind.
 133  func (t Token) Kind() Kind {
 134  	return t.kind
 135  }
 136  
 137  // RawString returns the read value in string.
 138  func (t Token) RawString() string {
 139  	return string(t.raw)
 140  }
 141  
 142  // Pos returns the token position from the input.
 143  func (t Token) Pos() int {
 144  	return t.pos
 145  }
 146  
 147  // NameKind returns IdentName, TypeName or FieldNumber.
 148  // It panics if type is not Name.
 149  func (t Token) NameKind() NameKind {
 150  	if t.kind == Name {
 151  		return NameKind(t.attrs &^ hasSeparator)
 152  	}
 153  	panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
 154  }
 155  
 156  // HasSeparator returns true if the field name is followed by the separator char
 157  // ':', else false. It panics if type is not Name.
 158  func (t Token) HasSeparator() bool {
 159  	if t.kind == Name {
 160  		return t.attrs&hasSeparator != 0
 161  	}
 162  	panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
 163  }
 164  
 165  // IdentName returns the value for IdentName type.
 166  func (t Token) IdentName() string {
 167  	if t.kind == Name && t.attrs&uint8(IdentName) != 0 {
 168  		return string(t.raw)
 169  	}
 170  	panic(fmt.Sprintf("Token is not an IdentName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
 171  }
 172  
 173  // TypeName returns the value for TypeName type.
 174  func (t Token) TypeName() string {
 175  	if t.kind == Name && t.attrs&uint8(TypeName) != 0 {
 176  		return t.str
 177  	}
 178  	panic(fmt.Sprintf("Token is not a TypeName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
 179  }
 180  
 181  // FieldNumber returns the value for FieldNumber type. It returns a
 182  // non-negative int32 value. Caller will still need to validate for the correct
 183  // field number range.
 184  func (t Token) FieldNumber() int32 {
 185  	if t.kind != Name || t.attrs&uint8(FieldNumber) == 0 {
 186  		panic(fmt.Sprintf("Token is not a FieldNumber: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
 187  	}
 188  	// Following should not return an error as it had already been called right
 189  	// before this Token was constructed.
 190  	num, _ := strconv.ParseInt(string(t.raw), 10, 32)
 191  	return int32(num)
 192  }
 193  
 194  // String returns the string value for a Scalar type.
 195  func (t Token) String() (string, bool) {
 196  	if t.kind != Scalar || t.attrs != stringValue {
 197  		return "", false
 198  	}
 199  	return t.str, true
 200  }
 201  
 202  // Enum returns the literal value for a Scalar type for use as enum literals.
 203  func (t Token) Enum() (string, bool) {
 204  	if t.kind != Scalar || t.attrs != literalValue || (len(t.raw) > 0 && t.raw[0] == '-') {
 205  		return "", false
 206  	}
 207  	return string(t.raw), true
 208  }
 209  
 210  // Bool returns the bool value for a Scalar type.
 211  func (t Token) Bool() (bool, bool) {
 212  	if t.kind != Scalar {
 213  		return false, false
 214  	}
 215  	switch t.attrs {
 216  	case literalValue:
 217  		if b, ok := boolLits[string(t.raw)]; ok {
 218  			return b, true
 219  		}
 220  	case numberValue:
 221  		// Unsigned integer representation of 0 or 1 is permitted: 00, 0x0, 01,
 222  		// 0x1, etc.
 223  		n, err := strconv.ParseUint(t.str, 0, 64)
 224  		if err == nil {
 225  			switch n {
 226  			case 0:
 227  				return false, true
 228  			case 1:
 229  				return true, true
 230  			}
 231  		}
 232  	}
 233  	return false, false
 234  }
 235  
 236  // These exact boolean literals are the ones supported in C++.
 237  var boolLits = map[string]bool{
 238  	"t":     true,
 239  	"true":  true,
 240  	"True":  true,
 241  	"f":     false,
 242  	"false": false,
 243  	"False": false,
 244  }
 245  
 246  // Uint64 returns the uint64 value for a Scalar type.
 247  func (t Token) Uint64() (uint64, bool) {
 248  	if t.kind != Scalar || t.attrs != numberValue ||
 249  		t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
 250  		return 0, false
 251  	}
 252  	n, err := strconv.ParseUint(t.str, 0, 64)
 253  	if err != nil {
 254  		return 0, false
 255  	}
 256  	return n, true
 257  }
 258  
 259  // Uint32 returns the uint32 value for a Scalar type.
 260  func (t Token) Uint32() (uint32, bool) {
 261  	if t.kind != Scalar || t.attrs != numberValue ||
 262  		t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
 263  		return 0, false
 264  	}
 265  	n, err := strconv.ParseUint(t.str, 0, 32)
 266  	if err != nil {
 267  		return 0, false
 268  	}
 269  	return uint32(n), true
 270  }
 271  
 272  // Int64 returns the int64 value for a Scalar type.
 273  func (t Token) Int64() (int64, bool) {
 274  	if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
 275  		return 0, false
 276  	}
 277  	if n, err := strconv.ParseInt(t.str, 0, 64); err == nil {
 278  		return n, true
 279  	}
 280  	// C++ accepts large positive hex numbers as negative values.
 281  	// This feature is here for proto1 backwards compatibility purposes.
 282  	if flags.ProtoLegacy && (t.numAttrs == numHex) {
 283  		if n, err := strconv.ParseUint(t.str, 0, 64); err == nil {
 284  			return int64(n), true
 285  		}
 286  	}
 287  	return 0, false
 288  }
 289  
 290  // Int32 returns the int32 value for a Scalar type.
 291  func (t Token) Int32() (int32, bool) {
 292  	if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
 293  		return 0, false
 294  	}
 295  	if n, err := strconv.ParseInt(t.str, 0, 32); err == nil {
 296  		return int32(n), true
 297  	}
 298  	// C++ accepts large positive hex numbers as negative values.
 299  	// This feature is here for proto1 backwards compatibility purposes.
 300  	if flags.ProtoLegacy && (t.numAttrs == numHex) {
 301  		if n, err := strconv.ParseUint(t.str, 0, 32); err == nil {
 302  			return int32(n), true
 303  		}
 304  	}
 305  	return 0, false
 306  }
 307  
 308  // Float64 returns the float64 value for a Scalar type.
 309  func (t Token) Float64() (float64, bool) {
 310  	if t.kind != Scalar {
 311  		return 0, false
 312  	}
 313  	switch t.attrs {
 314  	case literalValue:
 315  		if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
 316  			return f, true
 317  		}
 318  	case numberValue:
 319  		n, err := strconv.ParseFloat(t.str, 64)
 320  		if err == nil {
 321  			return n, true
 322  		}
 323  		nerr := err.(*strconv.NumError)
 324  		if nerr.Err == strconv.ErrRange {
 325  			return n, true
 326  		}
 327  	}
 328  	return 0, false
 329  }
 330  
 331  // Float32 returns the float32 value for a Scalar type.
 332  func (t Token) Float32() (float32, bool) {
 333  	if t.kind != Scalar {
 334  		return 0, false
 335  	}
 336  	switch t.attrs {
 337  	case literalValue:
 338  		if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
 339  			return float32(f), true
 340  		}
 341  	case numberValue:
 342  		n, err := strconv.ParseFloat(t.str, 64)
 343  		if err == nil {
 344  			// Overflows are treated as (-)infinity.
 345  			return float32(n), true
 346  		}
 347  		nerr := err.(*strconv.NumError)
 348  		if nerr.Err == strconv.ErrRange {
 349  			return float32(n), true
 350  		}
 351  	}
 352  	return 0, false
 353  }
 354  
 355  // These are the supported float literals which C++ permits case-insensitive
 356  // variants of these.
 357  var floatLits = map[string]float64{
 358  	"nan":       math.NaN(),
 359  	"inf":       math.Inf(1),
 360  	"infinity":  math.Inf(1),
 361  	"-inf":      math.Inf(-1),
 362  	"-infinity": math.Inf(-1),
 363  }
 364  
 365  // TokenEquals returns true if given Tokens are equal, else false.
 366  func TokenEquals(x, y Token) bool {
 367  	return x.kind == y.kind &&
 368  		x.attrs == y.attrs &&
 369  		x.numAttrs == y.numAttrs &&
 370  		x.pos == y.pos &&
 371  		bytes.Equal(x.raw, y.raw) &&
 372  		x.str == y.str
 373  }
 374