encoder.go raw

   1  /*
   2   * Copyright 2021 ByteDance Inc.
   3   *
   4   * Licensed under the Apache License, Version 2.0 (the "License");
   5   * you may not use this file except in compliance with the License.
   6   * You may obtain a copy of the License at
   7   *
   8   *     http://www.apache.org/licenses/LICENSE-2.0
   9   *
  10   * Unless required by applicable law or agreed to in writing, software
  11   * distributed under the License is distributed on an "AS IS" BASIS,
  12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13   * See the License for the specific language governing permissions and
  14   * limitations under the License.
  15   */
  16  
  17  package encoder
  18  
  19  import (
  20  	"bytes"
  21  	"encoding/json"
  22  	"reflect"
  23  	"runtime"
  24  	"unsafe"
  25  
  26  	"github.com/bytedance/sonic/utf8"
  27  	"github.com/bytedance/sonic/internal/encoder/alg"
  28  	"github.com/bytedance/sonic/internal/encoder/vars"
  29  	"github.com/bytedance/sonic/internal/rt"
  30  	"github.com/bytedance/sonic/option"
  31  )
  32  
  33  // Options is a set of encoding options.
  34  type Options uint64
  35  
  36  const (
  37      // SortMapKeys indicates that the keys of a map needs to be sorted 
  38      // before serializing into JSON.
  39      // WARNING: This hurts performance A LOT, USE WITH CARE.
  40      SortMapKeys          Options = 1 << alg.BitSortMapKeys
  41  
  42      // EscapeHTML indicates encoder to escape all HTML characters 
  43      // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
  44      // WARNING: This hurts performance A LOT, USE WITH CARE.
  45      EscapeHTML           Options = 1 << alg.BitEscapeHTML
  46  
  47      // CompactMarshaler indicates that the output JSON from json.Marshaler 
  48      // is always compact and needs no validation 
  49      CompactMarshaler     Options = 1 << alg.BitCompactMarshaler
  50  
  51      // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler 
  52      // is always escaped string and needs no quoting
  53      NoQuoteTextMarshaler Options = 1 << alg.BitNoQuoteTextMarshaler
  54  
  55      // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
  56      // instead of 'null'. 
  57      // NOTE: The priority of this option is lower than json tag `omitempty`.
  58      NoNullSliceOrMap     Options = 1 << alg.BitNoNullSliceOrMap
  59  
  60      // ValidateString indicates that encoder should validate the input string
  61      // before encoding it into JSON.
  62      ValidateString       Options = 1 << alg.BitValidateString
  63  
  64      // NoValidateJSONMarshaler indicates that the encoder should not validate the output string
  65      // after encoding the JSONMarshaler to JSON.
  66      NoValidateJSONMarshaler Options = 1 << alg.BitNoValidateJSONMarshaler
  67  
  68      // NoEncoderNewline indicates that the encoder should not add a newline after every message
  69      NoEncoderNewline Options = 1 << alg.BitNoEncoderNewline
  70    
  71      // CompatibleWithStd is used to be compatible with std encoder.
  72      CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
  73  
  74      // Encode Infinity or Nan float into `null`, instead of returning an error.
  75      EncodeNullForInfOrNan Options = 1 << alg.BitEncodeNullForInfOrNan
  76  )
  77  
  78  // Encoder represents a specific set of encoder configurations.
  79  type Encoder struct {
  80      Opts Options
  81      prefix string
  82      indent string
  83  }
  84  
  85  // Encode returns the JSON encoding of v.
  86  func (self *Encoder) Encode(v interface{}) ([]byte, error) {
  87      if self.indent != "" || self.prefix != "" { 
  88          return EncodeIndented(v, self.prefix, self.indent, self.Opts)
  89      }
  90      return Encode(v, self.Opts)
  91  }
  92  
  93  // SortKeys enables the SortMapKeys option.
  94  func (self *Encoder) SortKeys() *Encoder {
  95      self.Opts |= SortMapKeys
  96      return self
  97  }
  98  
  99  // SetEscapeHTML specifies if option EscapeHTML opens
 100  func (self *Encoder) SetEscapeHTML(f bool) {
 101      if f {
 102          self.Opts |= EscapeHTML
 103      } else {
 104          self.Opts &= ^EscapeHTML
 105      }
 106  }
 107  
 108  // SetValidateString specifies if option ValidateString opens
 109  func (self *Encoder) SetValidateString(f bool) {
 110      if f {
 111          self.Opts |= ValidateString
 112      } else {
 113          self.Opts &= ^ValidateString
 114      }
 115  }
 116  
 117  // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
 118  func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
 119      if f {
 120          self.Opts |= NoValidateJSONMarshaler
 121      } else {
 122          self.Opts &= ^NoValidateJSONMarshaler
 123      }
 124  }
 125  
 126  // SetNoEncoderNewline specifies if option NoEncoderNewline opens
 127  func (self *Encoder) SetNoEncoderNewline(f bool) {
 128      if f {
 129          self.Opts |= NoEncoderNewline
 130      } else {
 131          self.Opts &= ^NoEncoderNewline
 132      }
 133  }
 134  
 135  
 136  // SetCompactMarshaler specifies if option CompactMarshaler opens
 137  func (self *Encoder) SetCompactMarshaler(f bool) {
 138      if f {
 139          self.Opts |= CompactMarshaler
 140      } else {
 141          self.Opts &= ^CompactMarshaler
 142      }
 143  }
 144  
 145  // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens
 146  func (self *Encoder) SetNoQuoteTextMarshaler(f bool) {
 147      if f {
 148          self.Opts |= NoQuoteTextMarshaler
 149      } else {
 150          self.Opts &= ^NoQuoteTextMarshaler
 151      }
 152  }
 153  
 154  // SetIndent instructs the encoder to format each subsequent encoded
 155  // value as if indented by the package-level function EncodeIndent().
 156  // Calling SetIndent("", "") disables indentation.
 157  func (enc *Encoder) SetIndent(prefix, indent string) {
 158      enc.prefix = prefix
 159      enc.indent = indent
 160  }
 161  
 162  // Quote returns the JSON-quoted version of s.
 163  func Quote(s string) string {
 164      buf := make([]byte, 0, len(s)+2)
 165      buf = alg.Quote(buf, s, false)
 166      return rt.Mem2Str(buf)
 167  }
 168  
 169  // Encode returns the JSON encoding of val, encoded with opts.
 170  func Encode(val interface{}, opts Options) ([]byte, error) {
 171      var ret []byte
 172  
 173      buf := vars.NewBytes()
 174      err := encodeIntoCheckRace(buf, val, opts)
 175  
 176      /* check for errors */
 177      if err != nil {
 178          vars.FreeBytes(buf)
 179          return nil, err
 180      }
 181  
 182      /* htmlescape or correct UTF-8 if opts enable */
 183      old := buf
 184      *buf = encodeFinish(*old, opts)
 185      pbuf := ((*rt.GoSlice)(unsafe.Pointer(buf))).Ptr
 186      pold := ((*rt.GoSlice)(unsafe.Pointer(old))).Ptr
 187  
 188      /* return when allocated a new buffer */
 189      if pbuf != pold {
 190          vars.FreeBytes(old)
 191          return *buf, nil
 192      }
 193  
 194      /* make a copy of the result */
 195      if rt.CanSizeResue(cap(*buf)) {
 196          ret = make([]byte, len(*buf))
 197          copy(ret, *buf)
 198          vars.FreeBytes(buf)
 199      } else {
 200          ret = *buf
 201      }
 202      
 203      /* return the buffer into pool */
 204      return ret, nil
 205  }
 206  
 207  // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
 208  // a new one.
 209  func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
 210      err := encodeIntoCheckRace(buf, val, opts)
 211      if err != nil {
 212          return err
 213      }
 214      *buf = encodeFinish(*buf, opts)
 215      return err
 216  }
 217  
 218  func encodeInto(buf *[]byte, val interface{}, opts Options) error {
 219      stk := vars.NewStack()
 220      efv := rt.UnpackEface(val)
 221      err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
 222  
 223      /* return the stack into pool */
 224      if err != nil {
 225          vars.ResetStack(stk)
 226      }
 227      vars.FreeStack(stk)
 228  
 229      /* avoid GC ahead */
 230      runtime.KeepAlive(buf)
 231      runtime.KeepAlive(efv)
 232      return err
 233  }
 234  
 235  func encodeFinish(buf []byte, opts Options) []byte {
 236      if opts & EscapeHTML != 0 {
 237          buf = HTMLEscape(nil, buf)
 238      }
 239      if (opts & ValidateString != 0) && !utf8.Validate(buf) {
 240          buf = utf8.CorrectWith(nil, buf, `\ufffd`)
 241      }
 242      return buf
 243  }
 244  
 245  
 246  // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
 247  // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
 248  // so that the JSON will be safe to embed inside HTML <script> tags.
 249  // For historical reasons, web browsers don't honor standard HTML
 250  // escaping within <script> tags, so an alternative JSON encoding must
 251  // be used.
 252  func HTMLEscape(dst []byte, src []byte) []byte {
 253      return alg.HtmlEscape(dst, src)
 254  }
 255  
 256  // EncodeIndented is like Encode but applies Indent to format the output.
 257  // Each JSON element in the output will begin on a new line beginning with prefix
 258  // followed by one or more copies of indent according to the indentation nesting.
 259  func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
 260      var err error
 261      var buf *bytes.Buffer
 262  
 263      /* encode into the buffer */
 264      out := vars.NewBytes()
 265      err = EncodeInto(out, val, opts)
 266  
 267      /* check for errors */
 268      if err != nil {
 269          vars.FreeBytes(out)
 270          return nil, err
 271      }
 272  
 273      /* indent the JSON */
 274      buf = vars.NewBuffer()
 275      err = json.Indent(buf, *out, prefix, indent)
 276      vars.FreeBytes(out)
 277  
 278      /* check for errors */
 279      if err != nil {
 280          vars.FreeBuffer(buf)
 281          return nil, err
 282      }
 283  
 284      /* copy to the result buffer */
 285      var ret []byte
 286      if rt.CanSizeResue(cap(buf.Bytes())) {
 287          ret = make([]byte, buf.Len())
 288          copy(ret, buf.Bytes())
 289          /* return the buffers into pool */
 290          vars.FreeBuffer(buf)
 291      } else {
 292          ret = buf.Bytes()
 293      }
 294      
 295      return ret, nil
 296  }
 297  
 298  // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
 299  // order to reduce the first-hit latency.
 300  //
 301  // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
 302  // a compile option to set the depth of recursive compile for the nested struct type.
 303  func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
 304      cfg := option.DefaultCompileOptions()
 305      for _, opt := range opts {
 306          opt(&cfg)
 307      }
 308      return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg)
 309  }
 310  
 311  // Valid validates json and returns first non-blank character position,
 312  // if it is only one valid json value.
 313  // Otherwise returns invalid character position using start.
 314  //
 315  // Note: it does not check for the invalid UTF-8 characters.
 316  func Valid(data []byte) (ok bool, start int) {
 317      return alg.Valid(data)
 318  }
 319