decode_token.go raw
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package text
6
7 import (
8 "bytes"
9 "fmt"
10 "math"
11 "strconv"
12 "strings"
13
14 "google.golang.org/protobuf/internal/flags"
15 )
16
17 // Kind represents a token kind expressible in the textproto format.
18 type Kind uint8
19
20 // Kind values.
21 const (
22 Invalid Kind = iota
23 EOF
24 Name // Name indicates the field name.
25 Scalar // Scalar are scalar values, e.g. "string", 47, ENUM_LITERAL, true.
26 MessageOpen
27 MessageClose
28 ListOpen
29 ListClose
30
31 // comma and semi-colon are only for parsing in between values and should not be exposed.
32 comma
33 semicolon
34
35 // bof indicates beginning of file, which is the default token
36 // kind at the beginning of parsing.
37 bof = Invalid
38 )
39
40 func (t Kind) String() string {
41 switch t {
42 case Invalid:
43 return "<invalid>"
44 case EOF:
45 return "eof"
46 case Scalar:
47 return "scalar"
48 case Name:
49 return "name"
50 case MessageOpen:
51 return "{"
52 case MessageClose:
53 return "}"
54 case ListOpen:
55 return "["
56 case ListClose:
57 return "]"
58 case comma:
59 return ","
60 case semicolon:
61 return ";"
62 default:
63 return fmt.Sprintf("<invalid:%v>", uint8(t))
64 }
65 }
66
67 // NameKind represents different types of field names.
68 type NameKind uint8
69
70 // NameKind values.
71 const (
72 IdentName NameKind = iota + 1
73 TypeName
74 FieldNumber
75 )
76
77 func (t NameKind) String() string {
78 switch t {
79 case IdentName:
80 return "IdentName"
81 case TypeName:
82 return "TypeName"
83 case FieldNumber:
84 return "FieldNumber"
85 default:
86 return fmt.Sprintf("<invalid:%v>", uint8(t))
87 }
88 }
89
90 // Bit mask in Token.attrs to indicate if a Name token is followed by the
91 // separator char ':'. The field name separator char is optional for message
92 // field or repeated message field, but required for all other types. Decoder
93 // simply indicates whether a Name token is followed by separator or not. It is
94 // up to the prototext package to validate.
95 const hasSeparator = 1 << 7
96
97 // Scalar value types.
98 const (
99 numberValue = iota + 1
100 stringValue
101 literalValue
102 )
103
104 // Bit mask in Token.numAttrs to indicate that the number is a negative.
105 const isNegative = 1 << 7
106
107 // Token provides a parsed token kind and value. Values are provided by the
108 // different accessor methods.
109 type Token struct {
110 // Kind of the Token object.
111 kind Kind
112 // attrs contains metadata for the following Kinds:
113 // Name: hasSeparator bit and one of NameKind.
114 // Scalar: one of numberValue, stringValue, literalValue.
115 attrs uint8
116 // numAttrs contains metadata for numberValue:
117 // - highest bit is whether negative or positive.
118 // - lower bits indicate one of numDec, numHex, numOct, numFloat.
119 numAttrs uint8
120 // pos provides the position of the token in the original input.
121 pos int
122 // raw bytes of the serialized token.
123 // This is a subslice into the original input.
124 raw []byte
125 // str contains parsed string for the following:
126 // - stringValue of Scalar kind
127 // - numberValue of Scalar kind
128 // - TypeName of Name kind
129 str string
130 }
131
132 // Kind returns the token kind.
133 func (t Token) Kind() Kind {
134 return t.kind
135 }
136
137 // RawString returns the read value in string.
138 func (t Token) RawString() string {
139 return string(t.raw)
140 }
141
142 // Pos returns the token position from the input.
143 func (t Token) Pos() int {
144 return t.pos
145 }
146
147 // NameKind returns IdentName, TypeName or FieldNumber.
148 // It panics if type is not Name.
149 func (t Token) NameKind() NameKind {
150 if t.kind == Name {
151 return NameKind(t.attrs &^ hasSeparator)
152 }
153 panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
154 }
155
156 // HasSeparator returns true if the field name is followed by the separator char
157 // ':', else false. It panics if type is not Name.
158 func (t Token) HasSeparator() bool {
159 if t.kind == Name {
160 return t.attrs&hasSeparator != 0
161 }
162 panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
163 }
164
165 // IdentName returns the value for IdentName type.
166 func (t Token) IdentName() string {
167 if t.kind == Name && t.attrs&uint8(IdentName) != 0 {
168 return string(t.raw)
169 }
170 panic(fmt.Sprintf("Token is not an IdentName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
171 }
172
173 // TypeName returns the value for TypeName type.
174 func (t Token) TypeName() string {
175 if t.kind == Name && t.attrs&uint8(TypeName) != 0 {
176 return t.str
177 }
178 panic(fmt.Sprintf("Token is not a TypeName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
179 }
180
181 // FieldNumber returns the value for FieldNumber type. It returns a
182 // non-negative int32 value. Caller will still need to validate for the correct
183 // field number range.
184 func (t Token) FieldNumber() int32 {
185 if t.kind != Name || t.attrs&uint8(FieldNumber) == 0 {
186 panic(fmt.Sprintf("Token is not a FieldNumber: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
187 }
188 // Following should not return an error as it had already been called right
189 // before this Token was constructed.
190 num, _ := strconv.ParseInt(string(t.raw), 10, 32)
191 return int32(num)
192 }
193
194 // String returns the string value for a Scalar type.
195 func (t Token) String() (string, bool) {
196 if t.kind != Scalar || t.attrs != stringValue {
197 return "", false
198 }
199 return t.str, true
200 }
201
202 // Enum returns the literal value for a Scalar type for use as enum literals.
203 func (t Token) Enum() (string, bool) {
204 if t.kind != Scalar || t.attrs != literalValue || (len(t.raw) > 0 && t.raw[0] == '-') {
205 return "", false
206 }
207 return string(t.raw), true
208 }
209
210 // Bool returns the bool value for a Scalar type.
211 func (t Token) Bool() (bool, bool) {
212 if t.kind != Scalar {
213 return false, false
214 }
215 switch t.attrs {
216 case literalValue:
217 if b, ok := boolLits[string(t.raw)]; ok {
218 return b, true
219 }
220 case numberValue:
221 // Unsigned integer representation of 0 or 1 is permitted: 00, 0x0, 01,
222 // 0x1, etc.
223 n, err := strconv.ParseUint(t.str, 0, 64)
224 if err == nil {
225 switch n {
226 case 0:
227 return false, true
228 case 1:
229 return true, true
230 }
231 }
232 }
233 return false, false
234 }
235
236 // These exact boolean literals are the ones supported in C++.
237 var boolLits = map[string]bool{
238 "t": true,
239 "true": true,
240 "True": true,
241 "f": false,
242 "false": false,
243 "False": false,
244 }
245
246 // Uint64 returns the uint64 value for a Scalar type.
247 func (t Token) Uint64() (uint64, bool) {
248 if t.kind != Scalar || t.attrs != numberValue ||
249 t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
250 return 0, false
251 }
252 n, err := strconv.ParseUint(t.str, 0, 64)
253 if err != nil {
254 return 0, false
255 }
256 return n, true
257 }
258
259 // Uint32 returns the uint32 value for a Scalar type.
260 func (t Token) Uint32() (uint32, bool) {
261 if t.kind != Scalar || t.attrs != numberValue ||
262 t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
263 return 0, false
264 }
265 n, err := strconv.ParseUint(t.str, 0, 32)
266 if err != nil {
267 return 0, false
268 }
269 return uint32(n), true
270 }
271
272 // Int64 returns the int64 value for a Scalar type.
273 func (t Token) Int64() (int64, bool) {
274 if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
275 return 0, false
276 }
277 if n, err := strconv.ParseInt(t.str, 0, 64); err == nil {
278 return n, true
279 }
280 // C++ accepts large positive hex numbers as negative values.
281 // This feature is here for proto1 backwards compatibility purposes.
282 if flags.ProtoLegacy && (t.numAttrs == numHex) {
283 if n, err := strconv.ParseUint(t.str, 0, 64); err == nil {
284 return int64(n), true
285 }
286 }
287 return 0, false
288 }
289
290 // Int32 returns the int32 value for a Scalar type.
291 func (t Token) Int32() (int32, bool) {
292 if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
293 return 0, false
294 }
295 if n, err := strconv.ParseInt(t.str, 0, 32); err == nil {
296 return int32(n), true
297 }
298 // C++ accepts large positive hex numbers as negative values.
299 // This feature is here for proto1 backwards compatibility purposes.
300 if flags.ProtoLegacy && (t.numAttrs == numHex) {
301 if n, err := strconv.ParseUint(t.str, 0, 32); err == nil {
302 return int32(n), true
303 }
304 }
305 return 0, false
306 }
307
308 // Float64 returns the float64 value for a Scalar type.
309 func (t Token) Float64() (float64, bool) {
310 if t.kind != Scalar {
311 return 0, false
312 }
313 switch t.attrs {
314 case literalValue:
315 if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
316 return f, true
317 }
318 case numberValue:
319 n, err := strconv.ParseFloat(t.str, 64)
320 if err == nil {
321 return n, true
322 }
323 nerr := err.(*strconv.NumError)
324 if nerr.Err == strconv.ErrRange {
325 return n, true
326 }
327 }
328 return 0, false
329 }
330
331 // Float32 returns the float32 value for a Scalar type.
332 func (t Token) Float32() (float32, bool) {
333 if t.kind != Scalar {
334 return 0, false
335 }
336 switch t.attrs {
337 case literalValue:
338 if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
339 return float32(f), true
340 }
341 case numberValue:
342 n, err := strconv.ParseFloat(t.str, 64)
343 if err == nil {
344 // Overflows are treated as (-)infinity.
345 return float32(n), true
346 }
347 nerr := err.(*strconv.NumError)
348 if nerr.Err == strconv.ErrRange {
349 return float32(n), true
350 }
351 }
352 return 0, false
353 }
354
355 // These are the supported float literals which C++ permits case-insensitive
356 // variants of these.
357 var floatLits = map[string]float64{
358 "nan": math.NaN(),
359 "inf": math.Inf(1),
360 "infinity": math.Inf(1),
361 "-inf": math.Inf(-1),
362 "-infinity": math.Inf(-1),
363 }
364
365 // TokenEquals returns true if given Tokens are equal, else false.
366 func TokenEquals(x, y Token) bool {
367 return x.kind == y.kind &&
368 x.attrs == y.attrs &&
369 x.numAttrs == y.numAttrs &&
370 x.pos == y.pos &&
371 bytes.Equal(x.raw, y.raw) &&
372 x.str == y.str
373 }
374