encoder.go raw
1 /*
2 * Copyright 2021 ByteDance Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package encoder
18
19 import (
20 "bytes"
21 "encoding/json"
22 "reflect"
23 "runtime"
24 "unsafe"
25
26 "github.com/bytedance/sonic/utf8"
27 "github.com/bytedance/sonic/internal/encoder/alg"
28 "github.com/bytedance/sonic/internal/encoder/vars"
29 "github.com/bytedance/sonic/internal/rt"
30 "github.com/bytedance/sonic/option"
31 )
32
33 // Options is a set of encoding options.
34 type Options uint64
35
36 const (
37 // SortMapKeys indicates that the keys of a map needs to be sorted
38 // before serializing into JSON.
39 // WARNING: This hurts performance A LOT, USE WITH CARE.
40 SortMapKeys Options = 1 << alg.BitSortMapKeys
41
42 // EscapeHTML indicates encoder to escape all HTML characters
43 // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
44 // WARNING: This hurts performance A LOT, USE WITH CARE.
45 EscapeHTML Options = 1 << alg.BitEscapeHTML
46
47 // CompactMarshaler indicates that the output JSON from json.Marshaler
48 // is always compact and needs no validation
49 CompactMarshaler Options = 1 << alg.BitCompactMarshaler
50
51 // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler
52 // is always escaped string and needs no quoting
53 NoQuoteTextMarshaler Options = 1 << alg.BitNoQuoteTextMarshaler
54
55 // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
56 // instead of 'null'.
57 // NOTE: The priority of this option is lower than json tag `omitempty`.
58 NoNullSliceOrMap Options = 1 << alg.BitNoNullSliceOrMap
59
60 // ValidateString indicates that encoder should validate the input string
61 // before encoding it into JSON.
62 ValidateString Options = 1 << alg.BitValidateString
63
64 // NoValidateJSONMarshaler indicates that the encoder should not validate the output string
65 // after encoding the JSONMarshaler to JSON.
66 NoValidateJSONMarshaler Options = 1 << alg.BitNoValidateJSONMarshaler
67
68 // NoEncoderNewline indicates that the encoder should not add a newline after every message
69 NoEncoderNewline Options = 1 << alg.BitNoEncoderNewline
70
71 // CompatibleWithStd is used to be compatible with std encoder.
72 CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
73
74 // Encode Infinity or Nan float into `null`, instead of returning an error.
75 EncodeNullForInfOrNan Options = 1 << alg.BitEncodeNullForInfOrNan
76 )
77
78 // Encoder represents a specific set of encoder configurations.
79 type Encoder struct {
80 Opts Options
81 prefix string
82 indent string
83 }
84
85 // Encode returns the JSON encoding of v.
86 func (self *Encoder) Encode(v interface{}) ([]byte, error) {
87 if self.indent != "" || self.prefix != "" {
88 return EncodeIndented(v, self.prefix, self.indent, self.Opts)
89 }
90 return Encode(v, self.Opts)
91 }
92
93 // SortKeys enables the SortMapKeys option.
94 func (self *Encoder) SortKeys() *Encoder {
95 self.Opts |= SortMapKeys
96 return self
97 }
98
99 // SetEscapeHTML specifies if option EscapeHTML opens
100 func (self *Encoder) SetEscapeHTML(f bool) {
101 if f {
102 self.Opts |= EscapeHTML
103 } else {
104 self.Opts &= ^EscapeHTML
105 }
106 }
107
108 // SetValidateString specifies if option ValidateString opens
109 func (self *Encoder) SetValidateString(f bool) {
110 if f {
111 self.Opts |= ValidateString
112 } else {
113 self.Opts &= ^ValidateString
114 }
115 }
116
117 // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
118 func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
119 if f {
120 self.Opts |= NoValidateJSONMarshaler
121 } else {
122 self.Opts &= ^NoValidateJSONMarshaler
123 }
124 }
125
126 // SetNoEncoderNewline specifies if option NoEncoderNewline opens
127 func (self *Encoder) SetNoEncoderNewline(f bool) {
128 if f {
129 self.Opts |= NoEncoderNewline
130 } else {
131 self.Opts &= ^NoEncoderNewline
132 }
133 }
134
135
136 // SetCompactMarshaler specifies if option CompactMarshaler opens
137 func (self *Encoder) SetCompactMarshaler(f bool) {
138 if f {
139 self.Opts |= CompactMarshaler
140 } else {
141 self.Opts &= ^CompactMarshaler
142 }
143 }
144
145 // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens
146 func (self *Encoder) SetNoQuoteTextMarshaler(f bool) {
147 if f {
148 self.Opts |= NoQuoteTextMarshaler
149 } else {
150 self.Opts &= ^NoQuoteTextMarshaler
151 }
152 }
153
154 // SetIndent instructs the encoder to format each subsequent encoded
155 // value as if indented by the package-level function EncodeIndent().
156 // Calling SetIndent("", "") disables indentation.
157 func (enc *Encoder) SetIndent(prefix, indent string) {
158 enc.prefix = prefix
159 enc.indent = indent
160 }
161
162 // Quote returns the JSON-quoted version of s.
163 func Quote(s string) string {
164 buf := make([]byte, 0, len(s)+2)
165 buf = alg.Quote(buf, s, false)
166 return rt.Mem2Str(buf)
167 }
168
169 // Encode returns the JSON encoding of val, encoded with opts.
170 func Encode(val interface{}, opts Options) ([]byte, error) {
171 var ret []byte
172
173 buf := vars.NewBytes()
174 err := encodeIntoCheckRace(buf, val, opts)
175
176 /* check for errors */
177 if err != nil {
178 vars.FreeBytes(buf)
179 return nil, err
180 }
181
182 /* htmlescape or correct UTF-8 if opts enable */
183 old := buf
184 *buf = encodeFinish(*old, opts)
185 pbuf := ((*rt.GoSlice)(unsafe.Pointer(buf))).Ptr
186 pold := ((*rt.GoSlice)(unsafe.Pointer(old))).Ptr
187
188 /* return when allocated a new buffer */
189 if pbuf != pold {
190 vars.FreeBytes(old)
191 return *buf, nil
192 }
193
194 /* make a copy of the result */
195 if rt.CanSizeResue(cap(*buf)) {
196 ret = make([]byte, len(*buf))
197 copy(ret, *buf)
198 vars.FreeBytes(buf)
199 } else {
200 ret = *buf
201 }
202
203 /* return the buffer into pool */
204 return ret, nil
205 }
206
207 // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
208 // a new one.
209 func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
210 err := encodeIntoCheckRace(buf, val, opts)
211 if err != nil {
212 return err
213 }
214 *buf = encodeFinish(*buf, opts)
215 return err
216 }
217
218 func encodeInto(buf *[]byte, val interface{}, opts Options) error {
219 stk := vars.NewStack()
220 efv := rt.UnpackEface(val)
221 err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
222
223 /* return the stack into pool */
224 if err != nil {
225 vars.ResetStack(stk)
226 }
227 vars.FreeStack(stk)
228
229 /* avoid GC ahead */
230 runtime.KeepAlive(buf)
231 runtime.KeepAlive(efv)
232 return err
233 }
234
235 func encodeFinish(buf []byte, opts Options) []byte {
236 if opts & EscapeHTML != 0 {
237 buf = HTMLEscape(nil, buf)
238 }
239 if (opts & ValidateString != 0) && !utf8.Validate(buf) {
240 buf = utf8.CorrectWith(nil, buf, `\ufffd`)
241 }
242 return buf
243 }
244
245
246 // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
247 // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
248 // so that the JSON will be safe to embed inside HTML <script> tags.
249 // For historical reasons, web browsers don't honor standard HTML
250 // escaping within <script> tags, so an alternative JSON encoding must
251 // be used.
252 func HTMLEscape(dst []byte, src []byte) []byte {
253 return alg.HtmlEscape(dst, src)
254 }
255
256 // EncodeIndented is like Encode but applies Indent to format the output.
257 // Each JSON element in the output will begin on a new line beginning with prefix
258 // followed by one or more copies of indent according to the indentation nesting.
259 func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
260 var err error
261 var buf *bytes.Buffer
262
263 /* encode into the buffer */
264 out := vars.NewBytes()
265 err = EncodeInto(out, val, opts)
266
267 /* check for errors */
268 if err != nil {
269 vars.FreeBytes(out)
270 return nil, err
271 }
272
273 /* indent the JSON */
274 buf = vars.NewBuffer()
275 err = json.Indent(buf, *out, prefix, indent)
276 vars.FreeBytes(out)
277
278 /* check for errors */
279 if err != nil {
280 vars.FreeBuffer(buf)
281 return nil, err
282 }
283
284 /* copy to the result buffer */
285 var ret []byte
286 if rt.CanSizeResue(cap(buf.Bytes())) {
287 ret = make([]byte, buf.Len())
288 copy(ret, buf.Bytes())
289 /* return the buffers into pool */
290 vars.FreeBuffer(buf)
291 } else {
292 ret = buf.Bytes()
293 }
294
295 return ret, nil
296 }
297
298 // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
299 // order to reduce the first-hit latency.
300 //
301 // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
302 // a compile option to set the depth of recursive compile for the nested struct type.
303 func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
304 cfg := option.DefaultCompileOptions()
305 for _, opt := range opts {
306 opt(&cfg)
307 }
308 return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg)
309 }
310
311 // Valid validates json and returns first non-blank character position,
312 // if it is only one valid json value.
313 // Otherwise returns invalid character position using start.
314 //
315 // Note: it does not check for the invalid UTF-8 characters.
316 func Valid(data []byte) (ok bool, start int) {
317 return alg.Valid(data)
318 }
319