decode.go raw

   1  /*
   2   * Copyright 2022 ByteDance Inc.
   3   *
   4   * Licensed under the Apache License, Version 2.0 (the "License");
   5   * you may not use this file except in compliance with the License.
   6   * You may obtain a copy of the License at
   7   *
   8   *     http://www.apache.org/licenses/LICENSE-2.0
   9   *
  10   * Unless required by applicable law or agreed to in writing, software
  11   * distributed under the License is distributed on an "AS IS" BASIS,
  12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13   * See the License for the specific language governing permissions and
  14   * limitations under the License.
  15   */
  16  
  17  package ast
  18  
  19  import (
  20  	"encoding/base64"
  21  	"runtime"
  22  	"strconv"
  23  	"unsafe"
  24  
  25  	"github.com/bytedance/sonic/internal/native/types"
  26  	"github.com/bytedance/sonic/internal/rt"
  27  	"github.com/bytedance/sonic/internal/utils"
  28  )
  29  
  30  // Hack: this is used for both checking space and cause friendly compile errors in 32-bit arch.
  31  const _Sonic_Not_Support_32Bit_Arch__Checking_32Bit_Arch_Here = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
  32  
  33  var bytesNull   = []byte("null")
  34  
  35  const (
  36      strNull   = "null"
  37      bytesTrue   = "true"
  38      bytesFalse  = "false"
  39      bytesObject = "{}"
  40      bytesArray  = "[]"
  41  )
  42  
  43  func isSpace(c byte) bool {
  44      return (int(1<<c) & _Sonic_Not_Support_32Bit_Arch__Checking_32Bit_Arch_Here) != 0
  45  }
  46  
  47  //go:nocheckptr
  48  func skipBlank(src string, pos int) int {
  49      se := uintptr(rt.IndexChar(src, len(src)))
  50      sp := uintptr(rt.IndexChar(src, pos))
  51  
  52      for sp < se {
  53          if !isSpace(*(*byte)(unsafe.Pointer(sp))) {
  54              break
  55          }
  56          sp += 1
  57      }
  58      if sp >= se {
  59          return -int(types.ERR_EOF)
  60      }
  61      runtime.KeepAlive(src)
  62      return int(sp - uintptr(rt.IndexChar(src, 0)))
  63  }
  64  
  65  func decodeNull(src string, pos int) (ret int) {
  66      ret = pos + 4
  67      if ret > len(src) {
  68          return -int(types.ERR_EOF)
  69      }
  70      if src[pos:ret] == strNull {
  71          return ret
  72      } else {
  73          return -int(types.ERR_INVALID_CHAR)
  74      }
  75  }
  76  
  77  func decodeTrue(src string, pos int) (ret int) {
  78      ret = pos + 4
  79      if ret > len(src) {
  80          return -int(types.ERR_EOF)
  81      }
  82      if src[pos:ret] == bytesTrue {
  83          return ret
  84      } else {
  85          return -int(types.ERR_INVALID_CHAR)
  86      }
  87  
  88  }
  89  
  90  func decodeFalse(src string, pos int) (ret int) {
  91      ret = pos + 5
  92      if ret > len(src) {
  93          return -int(types.ERR_EOF)
  94      }
  95      if src[pos:ret] == bytesFalse {
  96          return ret
  97      }
  98      return -int(types.ERR_INVALID_CHAR)
  99  }
 100  
 101  //go:nocheckptr
 102  func decodeString(src string, pos int) (ret int, v string) {
 103      ret, ep := skipString(src, pos)
 104      if ep == -1 {
 105          (*rt.GoString)(unsafe.Pointer(&v)).Ptr = rt.IndexChar(src, pos+1)
 106          (*rt.GoString)(unsafe.Pointer(&v)).Len = ret - pos - 2
 107          return ret, v
 108      }
 109  
 110      vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret]))
 111      if !ok {
 112          return -int(types.ERR_INVALID_CHAR), ""
 113      }
 114  
 115      runtime.KeepAlive(src)
 116      return ret, rt.Mem2Str(vv)
 117  }
 118  
 119  func decodeBinary(src string, pos int) (ret int, v []byte) {
 120      var vv string
 121      ret, vv = decodeString(src, pos)
 122      if ret < 0 {
 123          return ret, nil
 124      }
 125      var err error
 126      v, err = base64.StdEncoding.DecodeString(vv)
 127      if err != nil {
 128          return -int(types.ERR_INVALID_CHAR), nil
 129      }
 130      return ret, v
 131  }
 132  
 133  func isDigit(c byte) bool {
 134      return c >= '0' && c <= '9'
 135  }
 136  
 137  //go:nocheckptr
 138  func decodeInt64(src string, pos int) (ret int, v int64, err error) {
 139      sp := uintptr(rt.IndexChar(src, pos))
 140      ss := uintptr(sp)
 141      se := uintptr(rt.IndexChar(src, len(src)))
 142      if uintptr(sp) >= se {
 143          return -int(types.ERR_EOF), 0, nil
 144      }
 145  
 146      if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
 147          sp += 1
 148      }
 149      if sp == se {
 150          return -int(types.ERR_EOF), 0, nil
 151      }
 152  
 153      for ; sp < se; sp += uintptr(1) {
 154          if !isDigit(*(*byte)(unsafe.Pointer(sp))) {
 155              break
 156          }
 157      }
 158  
 159      if sp < se {
 160          if c := *(*byte)(unsafe.Pointer(sp)); c == '.' || c == 'e' || c == 'E' {
 161              return -int(types.ERR_INVALID_NUMBER_FMT), 0, nil
 162          }
 163      }
 164  
 165      var vv string
 166      ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
 167      (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
 168      (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
 169  
 170      v, err = strconv.ParseInt(vv, 10, 64)
 171      if err != nil {
 172          //NOTICE: allow overflow here
 173          if err.(*strconv.NumError).Err == strconv.ErrRange {
 174              return ret, 0, err
 175          }
 176          return -int(types.ERR_INVALID_CHAR), 0, err
 177      }
 178  
 179      runtime.KeepAlive(src)
 180      return ret, v, nil
 181  }
 182  
 183  func isNumberChars(c byte) bool {
 184      return (c >= '0' && c <= '9') || c == '+' || c == '-' || c == 'e' || c == 'E' || c == '.'
 185  }
 186  
 187  //go:nocheckptr
 188  func decodeFloat64(src string, pos int) (ret int, v float64, err error) {
 189      sp := uintptr(rt.IndexChar(src, pos))
 190      ss := uintptr(sp)
 191      se := uintptr(rt.IndexChar(src, len(src)))
 192      if uintptr(sp) >= se {
 193          return -int(types.ERR_EOF), 0, nil
 194      }
 195  
 196      if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
 197          sp += 1
 198      }
 199      if sp == se {
 200          return -int(types.ERR_EOF), 0, nil
 201      }
 202  
 203      for ; sp < se; sp += uintptr(1) {
 204          if !isNumberChars(*(*byte)(unsafe.Pointer(sp))) {
 205              break
 206          }
 207      }
 208  
 209      var vv string
 210      ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
 211      (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
 212      (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
 213  
 214      v, err = strconv.ParseFloat(vv, 64)
 215      if err != nil {
 216          //NOTICE: allow overflow here
 217          if err.(*strconv.NumError).Err == strconv.ErrRange {
 218              return ret, 0, err
 219          }
 220          return -int(types.ERR_INVALID_CHAR), 0, err
 221      }
 222  
 223      runtime.KeepAlive(src)
 224      return ret, v, nil
 225  }
 226  
 227  func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState) {
 228      pos = skipBlank(src, pos)
 229      if pos < 0 {
 230          return pos, types.JsonState{Vt: types.ValueType(pos)}
 231      }
 232      switch c := src[pos]; c {
 233      case 'n':
 234          ret = decodeNull(src, pos)
 235          if ret < 0 {
 236              return ret, types.JsonState{Vt: types.ValueType(ret)}
 237          }
 238          return ret, types.JsonState{Vt: types.V_NULL}
 239      case '"':
 240          var ep int
 241          ret, ep = skipString(src, pos)
 242          if ret < 0 {
 243              return ret, types.JsonState{Vt: types.ValueType(ret)}
 244          }
 245          return ret, types.JsonState{Vt: types.V_STRING, Iv: int64(pos + 1), Ep: ep}
 246      case '{':
 247          return pos + 1, types.JsonState{Vt: types.V_OBJECT}
 248      case '[':
 249          return pos + 1, types.JsonState{Vt: types.V_ARRAY}
 250      case 't':
 251          ret = decodeTrue(src, pos)
 252          if ret < 0 {
 253              return ret, types.JsonState{Vt: types.ValueType(ret)}
 254          }
 255          return ret, types.JsonState{Vt: types.V_TRUE}
 256      case 'f':
 257          ret = decodeFalse(src, pos)
 258          if ret < 0 {
 259              return ret, types.JsonState{Vt: types.ValueType(ret)}
 260          }
 261          return ret, types.JsonState{Vt: types.V_FALSE}
 262      case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 263          if skipnum {
 264              ret = skipNumber(src, pos)
 265              if ret >= 0 {
 266                  return ret, types.JsonState{Vt: types.V_DOUBLE, Iv: 0, Ep: pos}
 267              } else {
 268                  return ret, types.JsonState{Vt: types.ValueType(ret)}
 269              }
 270          } else {
 271              var iv int64
 272              ret, iv, _ = decodeInt64(src, pos)
 273              if ret >= 0 {
 274                  return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos}
 275              } else if ret != -int(types.ERR_INVALID_NUMBER_FMT) {
 276                  return ret, types.JsonState{Vt: types.ValueType(ret)}
 277              }
 278              var fv float64
 279              ret, fv, _ = decodeFloat64(src, pos)
 280              if ret >= 0 {
 281                  return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos}
 282              } else {
 283                  return ret, types.JsonState{Vt: types.ValueType(ret)}
 284              }
 285          }
 286          
 287      default:
 288          return -int(types.ERR_INVALID_CHAR), types.JsonState{Vt:-types.ValueType(types.ERR_INVALID_CHAR)}
 289      }
 290  }
 291  
 292  //go:nocheckptr
 293  func skipNumber(src string, pos int) (ret int) {
 294      return utils.SkipNumber(src, pos)
 295  }
 296  
 297  //go:nocheckptr
 298  func skipString(src string, pos int) (ret int, ep int) {
 299      if pos+1 >= len(src) {
 300          return -int(types.ERR_EOF), -1
 301      }
 302  
 303      sp := uintptr(rt.IndexChar(src, pos))
 304      se := uintptr(rt.IndexChar(src, len(src)))
 305  
 306      // not start with quote
 307      if *(*byte)(unsafe.Pointer(sp)) != '"' {
 308          return -int(types.ERR_INVALID_CHAR), -1
 309      }
 310      sp += 1
 311  
 312      ep = -1
 313      for sp < se {
 314          c := *(*byte)(unsafe.Pointer(sp))
 315          if c == '\\' {
 316              if ep == -1 {
 317                  ep = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
 318              }
 319              sp += 2
 320              continue
 321          }
 322          sp += 1
 323          if c == '"' {
 324              return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)), ep
 325          }
 326      }
 327  
 328      runtime.KeepAlive(src)
 329      // not found the closed quote until EOF
 330      return -int(types.ERR_EOF), -1
 331  }
 332  
 333  //go:nocheckptr
 334  func skipPair(src string, pos int, lchar byte, rchar byte) (ret int) {
 335      if pos+1 >= len(src) {
 336          return -int(types.ERR_EOF)
 337      }
 338  
 339      sp := uintptr(rt.IndexChar(src, pos))
 340      se := uintptr(rt.IndexChar(src, len(src)))
 341  
 342      if *(*byte)(unsafe.Pointer(sp)) != lchar {
 343          return -int(types.ERR_INVALID_CHAR)
 344      }
 345  
 346      sp += 1
 347      nbrace := 1
 348      inquote := false
 349  
 350      for sp < se {
 351          c := *(*byte)(unsafe.Pointer(sp))
 352          if c == '\\' {
 353              sp += 2
 354              continue
 355          } else if c == '"' {
 356              inquote = !inquote
 357          } else if c == lchar {
 358              if !inquote {
 359                  nbrace += 1
 360              }
 361          } else if c == rchar {
 362              if !inquote {
 363                  nbrace -= 1
 364                  if nbrace == 0 {
 365                      sp += 1
 366                      break
 367                  }
 368              }
 369          }
 370          sp += 1
 371      }
 372  
 373      if nbrace != 0 {
 374          return -int(types.ERR_INVALID_CHAR)
 375      }
 376  
 377      runtime.KeepAlive(src)
 378      return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
 379  }
 380  
 381  func skipValueFast(src string, pos int) (ret int, start int) {
 382      pos = skipBlank(src, pos)
 383      if pos < 0 {
 384          return pos, -1
 385      }
 386      switch c := src[pos]; c {
 387      case 'n':
 388          ret = decodeNull(src, pos)
 389      case '"':
 390          ret, _ = skipString(src, pos)
 391      case '{':
 392          ret = skipPair(src, pos, '{', '}')
 393      case '[':
 394          ret = skipPair(src, pos, '[', ']')
 395      case 't':
 396          ret = decodeTrue(src, pos)
 397      case 'f':
 398          ret = decodeFalse(src, pos)
 399      case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 400          ret = skipNumber(src, pos)
 401      default:
 402          ret = -int(types.ERR_INVALID_CHAR)
 403      }
 404      return ret, pos
 405  }
 406  
 407  func skipValue(src string, pos int) (ret int, start int) {
 408      pos = skipBlank(src, pos)
 409      if pos < 0 {
 410          return pos, -1
 411      }
 412      switch c := src[pos]; c {
 413      case 'n':
 414          ret = decodeNull(src, pos)
 415      case '"':
 416          ret, _ = skipString(src, pos)
 417      case '{':
 418          ret, _ = skipObject(src, pos)
 419      case '[':
 420          ret, _ = skipArray(src, pos)
 421      case 't':
 422          ret = decodeTrue(src, pos)
 423      case 'f':
 424          ret = decodeFalse(src, pos)
 425      case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 426          ret = skipNumber(src, pos)
 427      default:
 428          ret = -int(types.ERR_INVALID_CHAR)
 429      }
 430      return ret, pos
 431  }
 432  
 433  func skipObject(src string, pos int) (ret int, start int) {
 434      start = skipBlank(src, pos)
 435      if start < 0 {
 436          return start, -1
 437      }
 438  
 439      if src[start] != '{' {
 440          return -int(types.ERR_INVALID_CHAR), -1
 441      }
 442  
 443      pos = start + 1
 444      pos = skipBlank(src, pos)
 445      if pos < 0 {
 446          return pos, -1
 447      }
 448      if src[pos] == '}' {
 449          return pos + 1, start
 450      }
 451  
 452      for {
 453          pos, _ = skipString(src, pos)
 454          if pos < 0 {
 455              return pos, -1
 456          }
 457  
 458          pos = skipBlank(src, pos)
 459          if pos < 0 {
 460              return pos, -1
 461          }
 462          if src[pos] != ':' {
 463              return -int(types.ERR_INVALID_CHAR), -1
 464          }
 465  
 466          pos++
 467          pos, _ = skipValue(src, pos)
 468          if pos < 0 {
 469              return pos, -1
 470          }
 471  
 472          pos = skipBlank(src, pos)
 473          if pos < 0 {
 474              return pos, -1
 475          }
 476          if src[pos] == '}' {
 477              return pos + 1, start
 478          }
 479          if src[pos] != ',' {
 480              return -int(types.ERR_INVALID_CHAR), -1
 481          }
 482  
 483          pos++
 484          pos = skipBlank(src, pos)
 485          if pos < 0 {
 486              return pos, -1
 487          }
 488  
 489      }
 490  }
 491  
 492  func skipArray(src string, pos int) (ret int, start int) {
 493      start = skipBlank(src, pos)
 494      if start < 0 {
 495          return start, -1
 496      }
 497  
 498      if src[start] != '[' {
 499          return -int(types.ERR_INVALID_CHAR), -1
 500      }
 501  
 502      pos = start + 1
 503      pos = skipBlank(src, pos)
 504      if pos < 0 {
 505          return pos, -1
 506      }
 507      if src[pos] == ']' {
 508          return pos + 1, start
 509      }
 510  
 511      for {
 512          pos, _ = skipValue(src, pos)
 513          if pos < 0 {
 514              return pos, -1
 515          }
 516  
 517          pos = skipBlank(src, pos)
 518          if pos < 0 {
 519              return pos, -1
 520          }
 521          if src[pos] == ']' {
 522              return pos + 1, start
 523          }
 524          if src[pos] != ',' {
 525              return -int(types.ERR_INVALID_CHAR), -1
 526          }
 527          pos++
 528      }
 529  }
 530  
 531  // DecodeString decodes a JSON string from pos and return golang string.
 532  //   - needEsc indicates if to unescaped escaping chars
 533  //   - hasEsc tells if the returned string has escaping chars
 534  //   - validStr enables validating UTF8 charset
 535  //
 536  func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) {
 537      p := NewParserObj(src)
 538      p.p = pos
 539      switch val := p.decodeValue(); val.Vt {
 540      case types.V_STRING:
 541          str := p.s[val.Iv : p.p-1]
 542          if validStr && !validate_utf8(str) {
 543             return "", -int(types.ERR_INVALID_UTF8), false
 544          }
 545          /* fast path: no escape sequence */
 546          if val.Ep == -1 {
 547              return str, p.p, false
 548          } else if !needEsc {
 549              return str, p.p, true
 550          }
 551          /* unquote the string */
 552          out, err := unquote(str)
 553          /* check for errors */
 554          if err != 0 {
 555              return "", -int(err), true
 556          } else {
 557              return out, p.p, true
 558          }
 559      default:
 560          return "", -int(_ERR_UNSUPPORT_TYPE), false
 561      }
 562  }
 563