1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package json
6 7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "regexp"
12 "unicode/utf8"
13 14 "google.golang.org/protobuf/internal/errors"
15 )
16 17 // call specifies which Decoder method was invoked.
18 type call uint8
19 20 const (
21 readCall call = iota
22 peekCall
23 )
24 25 const unexpectedFmt = "unexpected token %s"
26 27 // ErrUnexpectedEOF means that EOF was encountered in the middle of the input.
28 var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF)
29 30 // Decoder is a token-based JSON decoder.
31 type Decoder struct {
32 // lastCall is last method called, either readCall or peekCall.
33 // Initial value is readCall.
34 lastCall call
35 36 // lastToken contains the last read token.
37 lastToken Token
38 39 // lastErr contains the last read error.
40 lastErr error
41 42 // openStack is a stack containing ObjectOpen and ArrayOpen values. The
43 // top of stack represents the object or the array the current value is
44 // directly located in.
45 openStack []Kind
46 47 // orig is used in reporting line and column.
48 orig []byte
49 // in contains the unconsumed input.
50 in []byte
51 }
52 53 // NewDecoder returns a Decoder to read the given []byte.
54 func NewDecoder(b []byte) *Decoder {
55 return &Decoder{orig: b, in: b}
56 }
57 58 // Peek looks ahead and returns the next token kind without advancing a read.
59 func (d *Decoder) Peek() (Token, error) {
60 defer func() { d.lastCall = peekCall }()
61 if d.lastCall == readCall {
62 d.lastToken, d.lastErr = d.Read()
63 }
64 return d.lastToken, d.lastErr
65 }
66 67 // Read returns the next JSON token.
68 // It will return an error if there is no valid token.
69 func (d *Decoder) Read() (Token, error) {
70 const scalar = Null | Bool | Number | String
71 72 defer func() { d.lastCall = readCall }()
73 if d.lastCall == peekCall {
74 return d.lastToken, d.lastErr
75 }
76 77 tok, err := d.parseNext()
78 if err != nil {
79 return Token{}, err
80 }
81 82 switch tok.kind {
83 case EOF:
84 if len(d.openStack) != 0 ||
85 d.lastToken.kind&scalar|ObjectClose|ArrayClose == 0 {
86 return Token{}, ErrUnexpectedEOF
87 }
88 89 case Null:
90 if !d.isValueNext() {
91 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
92 }
93 94 case Bool, Number:
95 if !d.isValueNext() {
96 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
97 }
98 99 case String:
100 if d.isValueNext() {
101 break
102 }
103 // This string token should only be for a field name.
104 if d.lastToken.kind&(ObjectOpen|comma) == 0 {
105 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
106 }
107 if len(d.in) == 0 {
108 return Token{}, ErrUnexpectedEOF
109 }
110 if c := d.in[0]; c != ':' {
111 return Token{}, d.newSyntaxError(d.currPos(), `unexpected character %s, missing ":" after field name`, string(c))
112 }
113 tok.kind = Name
114 d.consume(1)
115 116 case ObjectOpen, ArrayOpen:
117 if !d.isValueNext() {
118 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
119 }
120 d.openStack = append(d.openStack, tok.kind)
121 122 case ObjectClose:
123 if len(d.openStack) == 0 ||
124 d.lastToken.kind&(Name|comma) != 0 ||
125 d.openStack[len(d.openStack)-1] != ObjectOpen {
126 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
127 }
128 d.openStack = d.openStack[:len(d.openStack)-1]
129 130 case ArrayClose:
131 if len(d.openStack) == 0 ||
132 d.lastToken.kind == comma ||
133 d.openStack[len(d.openStack)-1] != ArrayOpen {
134 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
135 }
136 d.openStack = d.openStack[:len(d.openStack)-1]
137 138 case comma:
139 if len(d.openStack) == 0 ||
140 d.lastToken.kind&(scalar|ObjectClose|ArrayClose) == 0 {
141 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString())
142 }
143 }
144 145 // Update d.lastToken only after validating token to be in the right sequence.
146 d.lastToken = tok
147 148 if d.lastToken.kind == comma {
149 return d.Read()
150 }
151 return tok, nil
152 }
153 154 // Any sequence that looks like a non-delimiter (for error reporting).
155 var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
156 157 // parseNext parses for the next JSON token. It returns a Token object for
158 // different types, except for Name. It does not handle whether the next token
159 // is in a valid sequence or not.
160 func (d *Decoder) parseNext() (Token, error) {
161 // Trim leading spaces.
162 d.consume(0)
163 164 in := d.in
165 if len(in) == 0 {
166 return d.consumeToken(EOF, 0), nil
167 }
168 169 switch in[0] {
170 case 'n':
171 if n := matchWithDelim("null", in); n != 0 {
172 return d.consumeToken(Null, n), nil
173 }
174 175 case 't':
176 if n := matchWithDelim("true", in); n != 0 {
177 return d.consumeBoolToken(true, n), nil
178 }
179 180 case 'f':
181 if n := matchWithDelim("false", in); n != 0 {
182 return d.consumeBoolToken(false, n), nil
183 }
184 185 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
186 if n, ok := parseNumber(in); ok {
187 return d.consumeToken(Number, n), nil
188 }
189 190 case '"':
191 s, n, err := d.parseString(in)
192 if err != nil {
193 return Token{}, err
194 }
195 return d.consumeStringToken(s, n), nil
196 197 case '{':
198 return d.consumeToken(ObjectOpen, 1), nil
199 200 case '}':
201 return d.consumeToken(ObjectClose, 1), nil
202 203 case '[':
204 return d.consumeToken(ArrayOpen, 1), nil
205 206 case ']':
207 return d.consumeToken(ArrayClose, 1), nil
208 209 case ',':
210 return d.consumeToken(comma, 1), nil
211 }
212 return Token{}, d.newSyntaxError(d.currPos(), "invalid value %s", errRegexp.Find(in))
213 }
214 215 // newSyntaxError returns an error with line and column information useful for
216 // syntax errors.
217 func (d *Decoder) newSyntaxError(pos int, f string, x ...any) error {
218 e := errors.New(f, x...)
219 line, column := d.Position(pos)
220 return errors.New("syntax error (line %d:%d): %v", line, column, e)
221 }
222 223 // Position returns line and column number of given index of the original input.
224 // It will panic if index is out of range.
225 func (d *Decoder) Position(idx int) (line int, column int) {
226 b := d.orig[:idx]
227 line = bytes.Count(b, []byte("\n")) + 1
228 if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
229 b = b[i+1:]
230 }
231 column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
232 return line, column
233 }
234 235 // currPos returns the current index position of d.in from d.orig.
236 func (d *Decoder) currPos() int {
237 return len(d.orig) - len(d.in)
238 }
239 240 // matchWithDelim matches s with the input b and verifies that the match
241 // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
242 // As a special case, EOF is considered a delimiter. It returns the length of s
243 // if there is a match, else 0.
244 func matchWithDelim(s string, b []byte) int {
245 if !bytes.HasPrefix(b, []byte(s)) {
246 return 0
247 }
248 249 n := len(s)
250 if n < len(b) && isNotDelim(b[n]) {
251 return 0
252 }
253 return n
254 }
255 256 // isNotDelim returns true if given byte is a not delimiter character.
257 func isNotDelim(c byte) bool {
258 return (c == '-' || c == '+' || c == '.' || c == '_' ||
259 ('a' <= c && c <= 'z') ||
260 ('A' <= c && c <= 'Z') ||
261 ('0' <= c && c <= '9'))
262 }
263 264 // consume consumes n bytes of input and any subsequent whitespace.
265 func (d *Decoder) consume(n int) {
266 d.in = d.in[n:]
267 for len(d.in) > 0 {
268 switch d.in[0] {
269 case ' ', '\n', '\r', '\t':
270 d.in = d.in[1:]
271 default:
272 return
273 }
274 }
275 }
276 277 // isValueNext returns true if next type should be a JSON value: Null,
278 // Number, String or Bool.
279 func (d *Decoder) isValueNext() bool {
280 if len(d.openStack) == 0 {
281 return d.lastToken.kind == 0
282 }
283 284 start := d.openStack[len(d.openStack)-1]
285 switch start {
286 case ObjectOpen:
287 return d.lastToken.kind&Name != 0
288 case ArrayOpen:
289 return d.lastToken.kind&(ArrayOpen|comma) != 0
290 }
291 panic(fmt.Sprintf(
292 "unreachable logic in Decoder.isValueNext, lastToken.kind: %v, openStack: %v",
293 d.lastToken.kind, start))
294 }
295 296 // consumeToken constructs a Token for given Kind with raw value derived from
297 // current d.in and given size, and consumes the given size-length of it.
298 func (d *Decoder) consumeToken(kind Kind, size int) Token {
299 tok := Token{
300 kind: kind,
301 raw: d.in[:size],
302 pos: len(d.orig) - len(d.in),
303 }
304 d.consume(size)
305 return tok
306 }
307 308 // consumeBoolToken constructs a Token for a Bool kind with raw value derived from
309 // current d.in and given size.
310 func (d *Decoder) consumeBoolToken(b bool, size int) Token {
311 tok := Token{
312 kind: Bool,
313 raw: d.in[:size],
314 pos: len(d.orig) - len(d.in),
315 boo: b,
316 }
317 d.consume(size)
318 return tok
319 }
320 321 // consumeStringToken constructs a Token for a String kind with raw value derived
322 // from current d.in and given size.
323 func (d *Decoder) consumeStringToken(s string, size int) Token {
324 tok := Token{
325 kind: String,
326 raw: d.in[:size],
327 pos: len(d.orig) - len(d.in),
328 str: s,
329 }
330 d.consume(size)
331 return tok
332 }
333 334 // Clone returns a copy of the Decoder for use in reading ahead the next JSON
335 // object, array or other values without affecting current Decoder.
336 func (d *Decoder) Clone() *Decoder {
337 ret := *d
338 ret.openStack = append([]Kind(nil), ret.openStack...)
339 return &ret
340 }
341