parser.go raw

   1  /*
   2   * Copyright 2021 ByteDance Inc.
   3   *
   4   * Licensed under the Apache License, Version 2.0 (the "License");
   5   * you may not use this file except in compliance with the License.
   6   * You may obtain a copy of the License at
   7   *
   8   *     http://www.apache.org/licenses/LICENSE-2.0
   9   *
  10   * Unless required by applicable law or agreed to in writing, software
  11   * distributed under the License is distributed on an "AS IS" BASIS,
  12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13   * See the License for the specific language governing permissions and
  14   * limitations under the License.
  15   */
  16  
  17  package ast
  18  
  19  import (
  20  	"fmt"
  21  	"sync"
  22  	"sync/atomic"
  23  
  24  	"github.com/bytedance/sonic/internal/native/types"
  25  	"github.com/bytedance/sonic/internal/rt"
  26  )
  27  
  28  const (
  29      _DEFAULT_NODE_CAP int = 16
  30      _APPEND_GROW_SHIFT = 1
  31  )
  32  
  33  const (
  34      _ERR_NOT_FOUND      types.ParsingError = 33
  35      _ERR_UNSUPPORT_TYPE types.ParsingError = 34
  36  )
  37  
  38  var (
  39      // ErrNotExist means both key and value doesn't exist 
  40      ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists")
  41  
  42      // ErrUnsupportType means API on the node is unsupported
  43      ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type")
  44  )
  45  
  46  type Parser struct {
  47      p           int
  48      s           string
  49      noLazy      bool
  50      loadOnce  bool
  51      skipValue   bool
  52      dbuf        *byte
  53  }
  54  
  55  /** Parser Private Methods **/
  56  
  57  func (self *Parser) delim() types.ParsingError {
  58      n := len(self.s)
  59      p := self.lspace(self.p)
  60  
  61      /* check for EOF */
  62      if p >= n {
  63          return types.ERR_EOF
  64      }
  65  
  66      /* check for the delimtier */
  67      if self.s[p] != ':' {
  68          return types.ERR_INVALID_CHAR
  69      }
  70  
  71      /* update the read pointer */
  72      self.p = p + 1
  73      return 0
  74  }
  75  
  76  func (self *Parser) object() types.ParsingError {
  77      n := len(self.s)
  78      p := self.lspace(self.p)
  79  
  80      /* check for EOF */
  81      if p >= n {
  82          return types.ERR_EOF
  83      }
  84  
  85      /* check for the delimtier */
  86      if self.s[p] != '{' {
  87          return types.ERR_INVALID_CHAR
  88      }
  89  
  90      /* update the read pointer */
  91      self.p = p + 1
  92      return 0
  93  }
  94  
  95  func (self *Parser) array() types.ParsingError {
  96      n := len(self.s)
  97      p := self.lspace(self.p)
  98  
  99      /* check for EOF */
 100      if p >= n {
 101          return types.ERR_EOF
 102      }
 103  
 104      /* check for the delimtier */
 105      if self.s[p] != '[' {
 106          return types.ERR_INVALID_CHAR
 107      }
 108  
 109      /* update the read pointer */
 110      self.p = p + 1
 111      return 0
 112  }
 113  
 114  func (self *Parser) lspace(sp int) int {
 115      ns := len(self.s)
 116      for ; sp<ns && isSpace(self.s[sp]); sp+=1 {}
 117  
 118      return sp
 119  }
 120  
 121  func (self *Parser) backward() {
 122      for ; self.p >= 0 && isSpace(self.s[self.p]); self.p-=1 {}
 123  }
 124  
 125  func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
 126      sp := self.p
 127      ns := len(self.s)
 128  
 129      /* check for EOF */
 130      if self.p = self.lspace(sp); self.p >= ns {
 131          return Node{}, types.ERR_EOF
 132      }
 133  
 134      /* check for empty array */
 135      if self.s[self.p] == ']' {
 136          self.p++
 137          return Node{t: types.V_ARRAY}, 0
 138      }
 139  
 140      /* allocate array space and parse every element */
 141      for {
 142          var val Node
 143          var err types.ParsingError
 144  
 145          if self.skipValue {
 146              /* skip the value */
 147              var start int
 148              if start, err = self.skipFast(); err != 0 {
 149                  return Node{}, err
 150              }
 151              if self.p > ns {
 152                  return Node{}, types.ERR_EOF
 153              }
 154              t := switchRawType(self.s[start])
 155              if t == _V_NONE {
 156                  return Node{}, types.ERR_INVALID_CHAR
 157              }
 158              val = newRawNode(self.s[start:self.p], t, false)
 159          }else{
 160              /* decode the value */
 161              if val, err = self.Parse(); err != 0 {
 162                  return Node{}, err
 163              }
 164          }
 165  
 166          /* add the value to result */
 167          ret.Push(val)
 168          self.p = self.lspace(self.p)
 169  
 170          /* check for EOF */
 171          if self.p >= ns {
 172              return Node{}, types.ERR_EOF
 173          }
 174  
 175          /* check for the next character */
 176          switch self.s[self.p] {
 177              case ',' : self.p++
 178              case ']' : self.p++; return newArray(ret), 0
 179              default:
 180                  // if val.isLazy() {
 181                  //     return newLazyArray(self, ret), 0
 182                  // }
 183                  return Node{}, types.ERR_INVALID_CHAR
 184          }
 185      }
 186  }
 187  
 188  func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
 189      sp := self.p
 190      ns := len(self.s)
 191  
 192      /* check for EOF */
 193      if self.p = self.lspace(sp); self.p >= ns {
 194          return Node{}, types.ERR_EOF
 195      }
 196  
 197      /* check for empty object */
 198      if self.s[self.p] == '}' {
 199          self.p++
 200          return Node{t: types.V_OBJECT}, 0
 201      }
 202  
 203      /* decode each pair */
 204      for {
 205          var val Node
 206          var njs types.JsonState
 207          var err types.ParsingError
 208  
 209          /* decode the key */
 210          if njs = self.decodeValue(); njs.Vt != types.V_STRING {
 211              return Node{}, types.ERR_INVALID_CHAR
 212          }
 213  
 214          /* extract the key */
 215          idx := self.p - 1
 216          key := self.s[njs.Iv:idx]
 217  
 218          /* check for escape sequence */
 219          if njs.Ep != -1 {
 220              if key, err = unquote(key); err != 0 {
 221                  return Node{}, err
 222              }
 223          }
 224  
 225          /* expect a ':' delimiter */
 226          if err = self.delim(); err != 0 {
 227              return Node{}, err
 228          }
 229  
 230          
 231          if self.skipValue {
 232              /* skip the value */
 233              var start int
 234              if start, err = self.skipFast(); err != 0 {
 235                  return Node{}, err
 236              }
 237              if self.p > ns {
 238                  return Node{}, types.ERR_EOF
 239              }
 240              t := switchRawType(self.s[start])
 241              if t == _V_NONE {
 242                  return Node{}, types.ERR_INVALID_CHAR
 243              }
 244              val = newRawNode(self.s[start:self.p], t, false)
 245          } else {
 246              /* decode the value */
 247              if val, err = self.Parse(); err != 0 {
 248                  return Node{}, err
 249              }
 250          }
 251  
 252          /* add the value to result */
 253          // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !!
 254          ret.Push(NewPair(key, val))
 255          self.p = self.lspace(self.p)
 256  
 257          /* check for EOF */
 258          if self.p >= ns {
 259              return Node{}, types.ERR_EOF
 260          }
 261  
 262          /* check for the next character */
 263          switch self.s[self.p] {
 264              case ',' : self.p++
 265              case '}' : self.p++; return newObject(ret), 0
 266          default:
 267              // if val.isLazy() {
 268              //     return newLazyObject(self, ret), 0
 269              // }
 270              return Node{}, types.ERR_INVALID_CHAR
 271          }
 272      }
 273  }
 274  
 275  func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
 276      p := self.p - 1
 277      s := self.s[iv:p]
 278  
 279      /* fast path: no escape sequence */
 280      if ep == -1 {
 281          return NewString(s), 0
 282      }
 283  
 284      /* unquote the string */
 285      out, err := unquote(s)
 286  
 287      /* check for errors */
 288      if err != 0 {
 289          return Node{}, err
 290      } else {
 291          return newBytes(rt.Str2Mem(out)), 0
 292      }
 293  }
 294  
 295  /** Parser Interface **/
 296  
 297  func (self *Parser) Pos() int {
 298      return self.p
 299  }
 300  
 301  
 302  // Parse returns a ast.Node representing the parser's JSON.
 303  // NOTICE: the specific parsing lazy dependens parser's option
 304  // It only parse first layer and first child for Object or Array be default
 305  func (self *Parser) Parse() (Node, types.ParsingError) {
 306      switch val := self.decodeValue(); val.Vt {
 307          case types.V_EOF     : return Node{}, types.ERR_EOF
 308          case types.V_NULL    : return nullNode, 0
 309          case types.V_TRUE    : return trueNode, 0
 310          case types.V_FALSE   : return falseNode, 0
 311          case types.V_STRING  : return self.decodeString(val.Iv, val.Ep)
 312          case types.V_ARRAY:
 313              s := self.p - 1;
 314              if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' {
 315                  self.p = p + 1
 316                  return Node{t: types.V_ARRAY}, 0
 317              }
 318              if self.noLazy {
 319                  if self.loadOnce {
 320                      self.noLazy = false
 321                  }
 322                  return self.decodeArray(new(linkedNodes))
 323              }
 324              // NOTICE: loadOnce always keep raw json for object or array
 325              if self.loadOnce {
 326                  self.p = s
 327                  s, e := self.skipFast()
 328                  if e != 0 {
 329                      return Node{}, e
 330                  }
 331                  return newRawNode(self.s[s:self.p], types.V_ARRAY, true), 0
 332              }
 333              return newLazyArray(self), 0
 334          case types.V_OBJECT:
 335              s := self.p - 1;
 336              if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' {
 337                  self.p = p + 1
 338                  return Node{t: types.V_OBJECT}, 0
 339              }
 340              // NOTICE: loadOnce always keep raw json for object or array
 341              if self.noLazy {
 342                  if self.loadOnce {
 343                      self.noLazy = false
 344                  }
 345                  return self.decodeObject(new(linkedPairs))
 346              }
 347              if self.loadOnce {
 348                  self.p = s
 349                  s, e := self.skipFast()
 350                  if e != 0 {
 351                      return Node{}, e
 352                  }
 353                  return newRawNode(self.s[s:self.p], types.V_OBJECT, true), 0
 354              }
 355              return newLazyObject(self), 0
 356          case types.V_DOUBLE  : return NewNumber(self.s[val.Ep:self.p]), 0
 357          case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0
 358          default              : return Node{}, types.ParsingError(-val.Vt)
 359      }
 360  }
 361  
 362  func (self *Parser) searchKey(match string) types.ParsingError {
 363      ns := len(self.s)
 364      if err := self.object(); err != 0 {
 365          return err
 366      }
 367  
 368      /* check for EOF */
 369      if self.p = self.lspace(self.p); self.p >= ns {
 370          return types.ERR_EOF
 371      }
 372  
 373      /* check for empty object */
 374      if self.s[self.p] == '}' {
 375          self.p++
 376          return _ERR_NOT_FOUND
 377      }
 378  
 379      var njs types.JsonState
 380      var err types.ParsingError
 381      /* decode each pair */
 382      for {
 383  
 384          /* decode the key */
 385          if njs = self.decodeValue(); njs.Vt != types.V_STRING {
 386              return types.ERR_INVALID_CHAR
 387          }
 388  
 389          /* extract the key */
 390          idx := self.p - 1
 391          key := self.s[njs.Iv:idx]
 392  
 393          /* check for escape sequence */
 394          if njs.Ep != -1 {
 395              if key, err = unquote(key); err != 0 {
 396                  return err
 397              }
 398          }
 399  
 400          /* expect a ':' delimiter */
 401          if err = self.delim(); err != 0 {
 402              return err
 403          }
 404  
 405          /* skip value */
 406          if key != match {
 407              if _, err = self.skipFast(); err != 0 {
 408                  return err
 409              }
 410          } else {
 411              return 0
 412          }
 413  
 414          /* check for EOF */
 415          self.p = self.lspace(self.p)
 416          if self.p >= ns {
 417              return types.ERR_EOF
 418          }
 419  
 420          /* check for the next character */
 421          switch self.s[self.p] {
 422          case ',':
 423              self.p++
 424          case '}':
 425              self.p++
 426              return _ERR_NOT_FOUND
 427          default:
 428              return types.ERR_INVALID_CHAR
 429          }
 430      }
 431  }
 432  
 433  func (self *Parser) searchIndex(idx int) types.ParsingError {
 434      ns := len(self.s)
 435      if err := self.array(); err != 0 {
 436          return err
 437      }
 438  
 439      /* check for EOF */
 440      if self.p = self.lspace(self.p); self.p >= ns {
 441          return types.ERR_EOF
 442      }
 443  
 444      /* check for empty array */
 445      if self.s[self.p] == ']' {
 446          self.p++
 447          return _ERR_NOT_FOUND
 448      }
 449  
 450      var err types.ParsingError
 451      /* allocate array space and parse every element */
 452      for i := 0; i < idx; i++ {
 453  
 454          /* decode the value */
 455          if _, err = self.skipFast(); err != 0 {
 456              return err
 457          }
 458  
 459          /* check for EOF */
 460          self.p = self.lspace(self.p)
 461          if self.p >= ns {
 462              return types.ERR_EOF
 463          }
 464  
 465          /* check for the next character */
 466          switch self.s[self.p] {
 467          case ',':
 468              self.p++
 469          case ']':
 470              self.p++
 471              return _ERR_NOT_FOUND
 472          default:
 473              return types.ERR_INVALID_CHAR
 474          }
 475      }
 476  
 477      return 0
 478  }
 479  
 480  func (self *Node) skipNextNode() *Node {
 481      if !self.isLazy() {
 482          return nil
 483      }
 484  
 485      parser, stack := self.getParserAndArrayStack()
 486      ret := &stack.v
 487      sp := parser.p
 488      ns := len(parser.s)
 489  
 490      /* check for EOF */
 491      if parser.p = parser.lspace(sp); parser.p >= ns {
 492          return newSyntaxError(parser.syntaxError(types.ERR_EOF))
 493      }
 494  
 495      /* check for empty array */
 496      if parser.s[parser.p] == ']' {
 497          parser.p++
 498          self.setArray(ret)
 499          return nil
 500      }
 501  
 502      var val Node
 503      /* skip the value */
 504      if start, err := parser.skipFast(); err != 0 {
 505          return newSyntaxError(parser.syntaxError(err))
 506      } else {
 507          t := switchRawType(parser.s[start])
 508          if t == _V_NONE {
 509              return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
 510          }
 511          val = newRawNode(parser.s[start:parser.p], t, false)
 512      }
 513  
 514      /* add the value to result */
 515      ret.Push(val)
 516      self.l++
 517      parser.p = parser.lspace(parser.p)
 518  
 519      /* check for EOF */
 520      if parser.p >= ns {
 521          return newSyntaxError(parser.syntaxError(types.ERR_EOF))
 522      }
 523  
 524      /* check for the next character */
 525      switch parser.s[parser.p] {
 526      case ',':
 527          parser.p++
 528          return ret.At(ret.Len()-1)
 529      case ']':
 530          parser.p++
 531          self.setArray(ret)
 532          return ret.At(ret.Len()-1)
 533      default:
 534          return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
 535      }
 536  }
 537  
 538  func (self *Node) skipNextPair() (*Pair) {
 539      if !self.isLazy() {
 540          return nil
 541      }
 542  
 543      parser, stack := self.getParserAndObjectStack()
 544      ret := &stack.v
 545      sp := parser.p
 546      ns := len(parser.s)
 547  
 548      /* check for EOF */
 549      if parser.p = parser.lspace(sp); parser.p >= ns {
 550          return newErrorPair(parser.syntaxError(types.ERR_EOF))
 551      }
 552  
 553      /* check for empty object */
 554      if parser.s[parser.p] == '}' {
 555          parser.p++
 556          self.setObject(ret)
 557          return nil
 558      }
 559  
 560      /* decode one pair */
 561      var val Node
 562      var njs types.JsonState
 563      var err types.ParsingError
 564  
 565      /* decode the key */
 566      if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
 567          return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR))
 568      }
 569  
 570      /* extract the key */
 571      idx := parser.p - 1
 572      key := parser.s[njs.Iv:idx]
 573  
 574      /* check for escape sequence */
 575      if njs.Ep != -1 {
 576          if key, err = unquote(key); err != 0 {
 577              return newErrorPair(parser.syntaxError(err))
 578          }
 579      }
 580  
 581      /* expect a ':' delimiter */
 582      if err = parser.delim(); err != 0 {
 583          return newErrorPair(parser.syntaxError(err))
 584      }
 585  
 586      /* skip the value */
 587      if start, err := parser.skipFast(); err != 0 {
 588          return newErrorPair(parser.syntaxError(err))
 589      } else {
 590          t := switchRawType(parser.s[start])
 591          if t == _V_NONE {
 592              return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR))
 593          }
 594          val = newRawNode(parser.s[start:parser.p], t, false)
 595      }
 596  
 597      /* add the value to result */
 598      ret.Push(NewPair(key, val))
 599      self.l++
 600      parser.p = parser.lspace(parser.p)
 601  
 602      /* check for EOF */
 603      if parser.p >= ns {
 604          return newErrorPair(parser.syntaxError(types.ERR_EOF))
 605      }
 606  
 607      /* check for the next character */
 608      switch parser.s[parser.p] {
 609      case ',':
 610          parser.p++
 611          return ret.At(ret.Len()-1)
 612      case '}':
 613          parser.p++
 614          self.setObject(ret)
 615          return ret.At(ret.Len()-1)
 616      default:
 617          return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR))
 618      }
 619  }
 620  
 621  
 622  /** Parser Factory **/
 623  
 624  // Loads parse all json into interface{}
 625  func Loads(src string) (int, interface{}, error) {
 626      ps := &Parser{s: src}
 627      np, err := ps.Parse()
 628  
 629      /* check for errors */
 630      if err != 0 {
 631          return 0, nil, ps.ExportError(err)
 632      } else {
 633          x, err := np.Interface()
 634          if err != nil {
 635              return 0, nil, err
 636          }
 637          return ps.Pos(), x, nil
 638      }
 639  }
 640  
 641  // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number
 642  func LoadsUseNumber(src string) (int, interface{}, error) {
 643      ps := &Parser{s: src}
 644      np, err := ps.Parse()
 645  
 646      /* check for errors */
 647      if err != 0 {
 648          return 0, nil, err
 649      } else {
 650          x, err := np.InterfaceUseNumber()
 651          if err != nil {
 652              return 0, nil, err
 653          }
 654          return ps.Pos(), x, nil
 655      }
 656  }
 657  
 658  // NewParser returns pointer of new allocated parser
 659  func NewParser(src string) *Parser {
 660      return &Parser{s: src}
 661  }
 662  
 663  // NewParser returns new allocated parser
 664  func NewParserObj(src string) Parser {
 665      return Parser{s: src}
 666  }
 667  
 668  // decodeNumber controls if parser decodes the number values instead of skip them
 669  //   WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser 
 670  //   otherwise the memory CANNOT be reused
 671  func (self *Parser) decodeNumber(decode bool) {
 672      if !decode && self.dbuf != nil {
 673          types.FreeDbuf(self.dbuf)
 674          self.dbuf = nil
 675          return
 676      }
 677      if decode && self.dbuf == nil {
 678          self.dbuf = types.NewDbuf()
 679      }
 680  }
 681  
 682  // ExportError converts types.ParsingError to std Error
 683  func (self *Parser) ExportError(err types.ParsingError) error {
 684      if err == _ERR_NOT_FOUND {
 685          return ErrNotExist
 686      }
 687      return fmt.Errorf("%q", SyntaxError{
 688          Pos : self.p,
 689          Src : self.s,
 690          Code: err,
 691      }.Description())
 692  }
 693  
 694  func backward(src string, i int) int {
 695      for ; i>=0 && isSpace(src[i]); i-- {}
 696      return i
 697  }
 698  
 699  
 700  func newRawNode(str string, typ types.ValueType, lock bool) Node {
 701      ret := Node{
 702          t: typ | _V_RAW,
 703          p: rt.StrPtr(str),
 704          l: uint(len(str)),
 705      }
 706      if lock {
 707          ret.m = new(sync.RWMutex)
 708      }
 709      return ret
 710  }
 711  
 712  var typeJumpTable = [256]types.ValueType{
 713      '"' : types.V_STRING,
 714      '-' : _V_NUMBER,
 715      '0' : _V_NUMBER,
 716      '1' : _V_NUMBER,
 717      '2' : _V_NUMBER,
 718      '3' : _V_NUMBER,
 719      '4' : _V_NUMBER,
 720      '5' : _V_NUMBER,
 721      '6' : _V_NUMBER,
 722      '7' : _V_NUMBER,
 723      '8' : _V_NUMBER,
 724      '9' : _V_NUMBER,
 725      '[' : types.V_ARRAY,
 726      'f' : types.V_FALSE,
 727      'n' : types.V_NULL,
 728      't' : types.V_TRUE,
 729      '{' : types.V_OBJECT,
 730  }
 731  
 732  func switchRawType(c byte) types.ValueType {
 733      return typeJumpTable[c]
 734  }
 735  
 736  func (self *Node) loadt() types.ValueType {
 737      return (types.ValueType)(atomic.LoadInt64(&self.t))
 738  }
 739  
 740  func (self *Node) lock() bool {
 741      if m := self.m; m != nil {
 742          m.Lock()
 743          return true
 744      }
 745      return false
 746  }
 747  
 748  func (self *Node) unlock() {
 749      if m := self.m; m != nil {
 750          m.Unlock()
 751      }
 752  }
 753  
 754  func (self *Node) rlock() bool {
 755      if m := self.m; m != nil {
 756          m.RLock()
 757          return true
 758      }
 759      return false
 760  }
 761  
 762  func (self *Node) runlock() {
 763      if m := self.m; m != nil {
 764          m.RUnlock()
 765      }
 766  }
 767