parser.go raw
1 /*
2 * Copyright 2021 ByteDance Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package ast
18
19 import (
20 "fmt"
21 "sync"
22 "sync/atomic"
23
24 "github.com/bytedance/sonic/internal/native/types"
25 "github.com/bytedance/sonic/internal/rt"
26 )
27
28 const (
29 _DEFAULT_NODE_CAP int = 16
30 _APPEND_GROW_SHIFT = 1
31 )
32
33 const (
34 _ERR_NOT_FOUND types.ParsingError = 33
35 _ERR_UNSUPPORT_TYPE types.ParsingError = 34
36 )
37
38 var (
39 // ErrNotExist means both key and value doesn't exist
40 ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists")
41
42 // ErrUnsupportType means API on the node is unsupported
43 ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type")
44 )
45
46 type Parser struct {
47 p int
48 s string
49 noLazy bool
50 loadOnce bool
51 skipValue bool
52 dbuf *byte
53 }
54
55 /** Parser Private Methods **/
56
57 func (self *Parser) delim() types.ParsingError {
58 n := len(self.s)
59 p := self.lspace(self.p)
60
61 /* check for EOF */
62 if p >= n {
63 return types.ERR_EOF
64 }
65
66 /* check for the delimtier */
67 if self.s[p] != ':' {
68 return types.ERR_INVALID_CHAR
69 }
70
71 /* update the read pointer */
72 self.p = p + 1
73 return 0
74 }
75
76 func (self *Parser) object() types.ParsingError {
77 n := len(self.s)
78 p := self.lspace(self.p)
79
80 /* check for EOF */
81 if p >= n {
82 return types.ERR_EOF
83 }
84
85 /* check for the delimtier */
86 if self.s[p] != '{' {
87 return types.ERR_INVALID_CHAR
88 }
89
90 /* update the read pointer */
91 self.p = p + 1
92 return 0
93 }
94
95 func (self *Parser) array() types.ParsingError {
96 n := len(self.s)
97 p := self.lspace(self.p)
98
99 /* check for EOF */
100 if p >= n {
101 return types.ERR_EOF
102 }
103
104 /* check for the delimtier */
105 if self.s[p] != '[' {
106 return types.ERR_INVALID_CHAR
107 }
108
109 /* update the read pointer */
110 self.p = p + 1
111 return 0
112 }
113
114 func (self *Parser) lspace(sp int) int {
115 ns := len(self.s)
116 for ; sp<ns && isSpace(self.s[sp]); sp+=1 {}
117
118 return sp
119 }
120
121 func (self *Parser) backward() {
122 for ; self.p >= 0 && isSpace(self.s[self.p]); self.p-=1 {}
123 }
124
125 func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
126 sp := self.p
127 ns := len(self.s)
128
129 /* check for EOF */
130 if self.p = self.lspace(sp); self.p >= ns {
131 return Node{}, types.ERR_EOF
132 }
133
134 /* check for empty array */
135 if self.s[self.p] == ']' {
136 self.p++
137 return Node{t: types.V_ARRAY}, 0
138 }
139
140 /* allocate array space and parse every element */
141 for {
142 var val Node
143 var err types.ParsingError
144
145 if self.skipValue {
146 /* skip the value */
147 var start int
148 if start, err = self.skipFast(); err != 0 {
149 return Node{}, err
150 }
151 if self.p > ns {
152 return Node{}, types.ERR_EOF
153 }
154 t := switchRawType(self.s[start])
155 if t == _V_NONE {
156 return Node{}, types.ERR_INVALID_CHAR
157 }
158 val = newRawNode(self.s[start:self.p], t, false)
159 }else{
160 /* decode the value */
161 if val, err = self.Parse(); err != 0 {
162 return Node{}, err
163 }
164 }
165
166 /* add the value to result */
167 ret.Push(val)
168 self.p = self.lspace(self.p)
169
170 /* check for EOF */
171 if self.p >= ns {
172 return Node{}, types.ERR_EOF
173 }
174
175 /* check for the next character */
176 switch self.s[self.p] {
177 case ',' : self.p++
178 case ']' : self.p++; return newArray(ret), 0
179 default:
180 // if val.isLazy() {
181 // return newLazyArray(self, ret), 0
182 // }
183 return Node{}, types.ERR_INVALID_CHAR
184 }
185 }
186 }
187
188 func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
189 sp := self.p
190 ns := len(self.s)
191
192 /* check for EOF */
193 if self.p = self.lspace(sp); self.p >= ns {
194 return Node{}, types.ERR_EOF
195 }
196
197 /* check for empty object */
198 if self.s[self.p] == '}' {
199 self.p++
200 return Node{t: types.V_OBJECT}, 0
201 }
202
203 /* decode each pair */
204 for {
205 var val Node
206 var njs types.JsonState
207 var err types.ParsingError
208
209 /* decode the key */
210 if njs = self.decodeValue(); njs.Vt != types.V_STRING {
211 return Node{}, types.ERR_INVALID_CHAR
212 }
213
214 /* extract the key */
215 idx := self.p - 1
216 key := self.s[njs.Iv:idx]
217
218 /* check for escape sequence */
219 if njs.Ep != -1 {
220 if key, err = unquote(key); err != 0 {
221 return Node{}, err
222 }
223 }
224
225 /* expect a ':' delimiter */
226 if err = self.delim(); err != 0 {
227 return Node{}, err
228 }
229
230
231 if self.skipValue {
232 /* skip the value */
233 var start int
234 if start, err = self.skipFast(); err != 0 {
235 return Node{}, err
236 }
237 if self.p > ns {
238 return Node{}, types.ERR_EOF
239 }
240 t := switchRawType(self.s[start])
241 if t == _V_NONE {
242 return Node{}, types.ERR_INVALID_CHAR
243 }
244 val = newRawNode(self.s[start:self.p], t, false)
245 } else {
246 /* decode the value */
247 if val, err = self.Parse(); err != 0 {
248 return Node{}, err
249 }
250 }
251
252 /* add the value to result */
253 // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !!
254 ret.Push(NewPair(key, val))
255 self.p = self.lspace(self.p)
256
257 /* check for EOF */
258 if self.p >= ns {
259 return Node{}, types.ERR_EOF
260 }
261
262 /* check for the next character */
263 switch self.s[self.p] {
264 case ',' : self.p++
265 case '}' : self.p++; return newObject(ret), 0
266 default:
267 // if val.isLazy() {
268 // return newLazyObject(self, ret), 0
269 // }
270 return Node{}, types.ERR_INVALID_CHAR
271 }
272 }
273 }
274
275 func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
276 p := self.p - 1
277 s := self.s[iv:p]
278
279 /* fast path: no escape sequence */
280 if ep == -1 {
281 return NewString(s), 0
282 }
283
284 /* unquote the string */
285 out, err := unquote(s)
286
287 /* check for errors */
288 if err != 0 {
289 return Node{}, err
290 } else {
291 return newBytes(rt.Str2Mem(out)), 0
292 }
293 }
294
295 /** Parser Interface **/
296
297 func (self *Parser) Pos() int {
298 return self.p
299 }
300
301
302 // Parse returns a ast.Node representing the parser's JSON.
303 // NOTICE: the specific parsing lazy dependens parser's option
304 // It only parse first layer and first child for Object or Array be default
305 func (self *Parser) Parse() (Node, types.ParsingError) {
306 switch val := self.decodeValue(); val.Vt {
307 case types.V_EOF : return Node{}, types.ERR_EOF
308 case types.V_NULL : return nullNode, 0
309 case types.V_TRUE : return trueNode, 0
310 case types.V_FALSE : return falseNode, 0
311 case types.V_STRING : return self.decodeString(val.Iv, val.Ep)
312 case types.V_ARRAY:
313 s := self.p - 1;
314 if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' {
315 self.p = p + 1
316 return Node{t: types.V_ARRAY}, 0
317 }
318 if self.noLazy {
319 if self.loadOnce {
320 self.noLazy = false
321 }
322 return self.decodeArray(new(linkedNodes))
323 }
324 // NOTICE: loadOnce always keep raw json for object or array
325 if self.loadOnce {
326 self.p = s
327 s, e := self.skipFast()
328 if e != 0 {
329 return Node{}, e
330 }
331 return newRawNode(self.s[s:self.p], types.V_ARRAY, true), 0
332 }
333 return newLazyArray(self), 0
334 case types.V_OBJECT:
335 s := self.p - 1;
336 if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' {
337 self.p = p + 1
338 return Node{t: types.V_OBJECT}, 0
339 }
340 // NOTICE: loadOnce always keep raw json for object or array
341 if self.noLazy {
342 if self.loadOnce {
343 self.noLazy = false
344 }
345 return self.decodeObject(new(linkedPairs))
346 }
347 if self.loadOnce {
348 self.p = s
349 s, e := self.skipFast()
350 if e != 0 {
351 return Node{}, e
352 }
353 return newRawNode(self.s[s:self.p], types.V_OBJECT, true), 0
354 }
355 return newLazyObject(self), 0
356 case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0
357 case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0
358 default : return Node{}, types.ParsingError(-val.Vt)
359 }
360 }
361
362 func (self *Parser) searchKey(match string) types.ParsingError {
363 ns := len(self.s)
364 if err := self.object(); err != 0 {
365 return err
366 }
367
368 /* check for EOF */
369 if self.p = self.lspace(self.p); self.p >= ns {
370 return types.ERR_EOF
371 }
372
373 /* check for empty object */
374 if self.s[self.p] == '}' {
375 self.p++
376 return _ERR_NOT_FOUND
377 }
378
379 var njs types.JsonState
380 var err types.ParsingError
381 /* decode each pair */
382 for {
383
384 /* decode the key */
385 if njs = self.decodeValue(); njs.Vt != types.V_STRING {
386 return types.ERR_INVALID_CHAR
387 }
388
389 /* extract the key */
390 idx := self.p - 1
391 key := self.s[njs.Iv:idx]
392
393 /* check for escape sequence */
394 if njs.Ep != -1 {
395 if key, err = unquote(key); err != 0 {
396 return err
397 }
398 }
399
400 /* expect a ':' delimiter */
401 if err = self.delim(); err != 0 {
402 return err
403 }
404
405 /* skip value */
406 if key != match {
407 if _, err = self.skipFast(); err != 0 {
408 return err
409 }
410 } else {
411 return 0
412 }
413
414 /* check for EOF */
415 self.p = self.lspace(self.p)
416 if self.p >= ns {
417 return types.ERR_EOF
418 }
419
420 /* check for the next character */
421 switch self.s[self.p] {
422 case ',':
423 self.p++
424 case '}':
425 self.p++
426 return _ERR_NOT_FOUND
427 default:
428 return types.ERR_INVALID_CHAR
429 }
430 }
431 }
432
433 func (self *Parser) searchIndex(idx int) types.ParsingError {
434 ns := len(self.s)
435 if err := self.array(); err != 0 {
436 return err
437 }
438
439 /* check for EOF */
440 if self.p = self.lspace(self.p); self.p >= ns {
441 return types.ERR_EOF
442 }
443
444 /* check for empty array */
445 if self.s[self.p] == ']' {
446 self.p++
447 return _ERR_NOT_FOUND
448 }
449
450 var err types.ParsingError
451 /* allocate array space and parse every element */
452 for i := 0; i < idx; i++ {
453
454 /* decode the value */
455 if _, err = self.skipFast(); err != 0 {
456 return err
457 }
458
459 /* check for EOF */
460 self.p = self.lspace(self.p)
461 if self.p >= ns {
462 return types.ERR_EOF
463 }
464
465 /* check for the next character */
466 switch self.s[self.p] {
467 case ',':
468 self.p++
469 case ']':
470 self.p++
471 return _ERR_NOT_FOUND
472 default:
473 return types.ERR_INVALID_CHAR
474 }
475 }
476
477 return 0
478 }
479
480 func (self *Node) skipNextNode() *Node {
481 if !self.isLazy() {
482 return nil
483 }
484
485 parser, stack := self.getParserAndArrayStack()
486 ret := &stack.v
487 sp := parser.p
488 ns := len(parser.s)
489
490 /* check for EOF */
491 if parser.p = parser.lspace(sp); parser.p >= ns {
492 return newSyntaxError(parser.syntaxError(types.ERR_EOF))
493 }
494
495 /* check for empty array */
496 if parser.s[parser.p] == ']' {
497 parser.p++
498 self.setArray(ret)
499 return nil
500 }
501
502 var val Node
503 /* skip the value */
504 if start, err := parser.skipFast(); err != 0 {
505 return newSyntaxError(parser.syntaxError(err))
506 } else {
507 t := switchRawType(parser.s[start])
508 if t == _V_NONE {
509 return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
510 }
511 val = newRawNode(parser.s[start:parser.p], t, false)
512 }
513
514 /* add the value to result */
515 ret.Push(val)
516 self.l++
517 parser.p = parser.lspace(parser.p)
518
519 /* check for EOF */
520 if parser.p >= ns {
521 return newSyntaxError(parser.syntaxError(types.ERR_EOF))
522 }
523
524 /* check for the next character */
525 switch parser.s[parser.p] {
526 case ',':
527 parser.p++
528 return ret.At(ret.Len()-1)
529 case ']':
530 parser.p++
531 self.setArray(ret)
532 return ret.At(ret.Len()-1)
533 default:
534 return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
535 }
536 }
537
538 func (self *Node) skipNextPair() (*Pair) {
539 if !self.isLazy() {
540 return nil
541 }
542
543 parser, stack := self.getParserAndObjectStack()
544 ret := &stack.v
545 sp := parser.p
546 ns := len(parser.s)
547
548 /* check for EOF */
549 if parser.p = parser.lspace(sp); parser.p >= ns {
550 return newErrorPair(parser.syntaxError(types.ERR_EOF))
551 }
552
553 /* check for empty object */
554 if parser.s[parser.p] == '}' {
555 parser.p++
556 self.setObject(ret)
557 return nil
558 }
559
560 /* decode one pair */
561 var val Node
562 var njs types.JsonState
563 var err types.ParsingError
564
565 /* decode the key */
566 if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
567 return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR))
568 }
569
570 /* extract the key */
571 idx := parser.p - 1
572 key := parser.s[njs.Iv:idx]
573
574 /* check for escape sequence */
575 if njs.Ep != -1 {
576 if key, err = unquote(key); err != 0 {
577 return newErrorPair(parser.syntaxError(err))
578 }
579 }
580
581 /* expect a ':' delimiter */
582 if err = parser.delim(); err != 0 {
583 return newErrorPair(parser.syntaxError(err))
584 }
585
586 /* skip the value */
587 if start, err := parser.skipFast(); err != 0 {
588 return newErrorPair(parser.syntaxError(err))
589 } else {
590 t := switchRawType(parser.s[start])
591 if t == _V_NONE {
592 return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR))
593 }
594 val = newRawNode(parser.s[start:parser.p], t, false)
595 }
596
597 /* add the value to result */
598 ret.Push(NewPair(key, val))
599 self.l++
600 parser.p = parser.lspace(parser.p)
601
602 /* check for EOF */
603 if parser.p >= ns {
604 return newErrorPair(parser.syntaxError(types.ERR_EOF))
605 }
606
607 /* check for the next character */
608 switch parser.s[parser.p] {
609 case ',':
610 parser.p++
611 return ret.At(ret.Len()-1)
612 case '}':
613 parser.p++
614 self.setObject(ret)
615 return ret.At(ret.Len()-1)
616 default:
617 return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR))
618 }
619 }
620
621
622 /** Parser Factory **/
623
624 // Loads parse all json into interface{}
625 func Loads(src string) (int, interface{}, error) {
626 ps := &Parser{s: src}
627 np, err := ps.Parse()
628
629 /* check for errors */
630 if err != 0 {
631 return 0, nil, ps.ExportError(err)
632 } else {
633 x, err := np.Interface()
634 if err != nil {
635 return 0, nil, err
636 }
637 return ps.Pos(), x, nil
638 }
639 }
640
641 // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number
642 func LoadsUseNumber(src string) (int, interface{}, error) {
643 ps := &Parser{s: src}
644 np, err := ps.Parse()
645
646 /* check for errors */
647 if err != 0 {
648 return 0, nil, err
649 } else {
650 x, err := np.InterfaceUseNumber()
651 if err != nil {
652 return 0, nil, err
653 }
654 return ps.Pos(), x, nil
655 }
656 }
657
658 // NewParser returns pointer of new allocated parser
659 func NewParser(src string) *Parser {
660 return &Parser{s: src}
661 }
662
663 // NewParser returns new allocated parser
664 func NewParserObj(src string) Parser {
665 return Parser{s: src}
666 }
667
668 // decodeNumber controls if parser decodes the number values instead of skip them
669 // WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser
670 // otherwise the memory CANNOT be reused
671 func (self *Parser) decodeNumber(decode bool) {
672 if !decode && self.dbuf != nil {
673 types.FreeDbuf(self.dbuf)
674 self.dbuf = nil
675 return
676 }
677 if decode && self.dbuf == nil {
678 self.dbuf = types.NewDbuf()
679 }
680 }
681
682 // ExportError converts types.ParsingError to std Error
683 func (self *Parser) ExportError(err types.ParsingError) error {
684 if err == _ERR_NOT_FOUND {
685 return ErrNotExist
686 }
687 return fmt.Errorf("%q", SyntaxError{
688 Pos : self.p,
689 Src : self.s,
690 Code: err,
691 }.Description())
692 }
693
694 func backward(src string, i int) int {
695 for ; i>=0 && isSpace(src[i]); i-- {}
696 return i
697 }
698
699
700 func newRawNode(str string, typ types.ValueType, lock bool) Node {
701 ret := Node{
702 t: typ | _V_RAW,
703 p: rt.StrPtr(str),
704 l: uint(len(str)),
705 }
706 if lock {
707 ret.m = new(sync.RWMutex)
708 }
709 return ret
710 }
711
712 var typeJumpTable = [256]types.ValueType{
713 '"' : types.V_STRING,
714 '-' : _V_NUMBER,
715 '0' : _V_NUMBER,
716 '1' : _V_NUMBER,
717 '2' : _V_NUMBER,
718 '3' : _V_NUMBER,
719 '4' : _V_NUMBER,
720 '5' : _V_NUMBER,
721 '6' : _V_NUMBER,
722 '7' : _V_NUMBER,
723 '8' : _V_NUMBER,
724 '9' : _V_NUMBER,
725 '[' : types.V_ARRAY,
726 'f' : types.V_FALSE,
727 'n' : types.V_NULL,
728 't' : types.V_TRUE,
729 '{' : types.V_OBJECT,
730 }
731
732 func switchRawType(c byte) types.ValueType {
733 return typeJumpTable[c]
734 }
735
736 func (self *Node) loadt() types.ValueType {
737 return (types.ValueType)(atomic.LoadInt64(&self.t))
738 }
739
740 func (self *Node) lock() bool {
741 if m := self.m; m != nil {
742 m.Lock()
743 return true
744 }
745 return false
746 }
747
748 func (self *Node) unlock() {
749 if m := self.m; m != nil {
750 m.Unlock()
751 }
752 }
753
754 func (self *Node) rlock() bool {
755 if m := self.m; m != nil {
756 m.RLock()
757 return true
758 }
759 return false
760 }
761
762 func (self *Node) runlock() {
763 if m := self.m; m != nil {
764 m.RUnlock()
765 }
766 }
767