decode.go raw
1 /*
2 * Copyright 2022 ByteDance Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package ast
18
19 import (
20 "encoding/base64"
21 "runtime"
22 "strconv"
23 "unsafe"
24
25 "github.com/bytedance/sonic/internal/native/types"
26 "github.com/bytedance/sonic/internal/rt"
27 "github.com/bytedance/sonic/internal/utils"
28 )
29
30 // Hack: this is used for both checking space and cause friendly compile errors in 32-bit arch.
31 const _Sonic_Not_Support_32Bit_Arch__Checking_32Bit_Arch_Here = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
32
33 var bytesNull = []byte("null")
34
35 const (
36 strNull = "null"
37 bytesTrue = "true"
38 bytesFalse = "false"
39 bytesObject = "{}"
40 bytesArray = "[]"
41 )
42
43 func isSpace(c byte) bool {
44 return (int(1<<c) & _Sonic_Not_Support_32Bit_Arch__Checking_32Bit_Arch_Here) != 0
45 }
46
47 //go:nocheckptr
48 func skipBlank(src string, pos int) int {
49 se := uintptr(rt.IndexChar(src, len(src)))
50 sp := uintptr(rt.IndexChar(src, pos))
51
52 for sp < se {
53 if !isSpace(*(*byte)(unsafe.Pointer(sp))) {
54 break
55 }
56 sp += 1
57 }
58 if sp >= se {
59 return -int(types.ERR_EOF)
60 }
61 runtime.KeepAlive(src)
62 return int(sp - uintptr(rt.IndexChar(src, 0)))
63 }
64
65 func decodeNull(src string, pos int) (ret int) {
66 ret = pos + 4
67 if ret > len(src) {
68 return -int(types.ERR_EOF)
69 }
70 if src[pos:ret] == strNull {
71 return ret
72 } else {
73 return -int(types.ERR_INVALID_CHAR)
74 }
75 }
76
77 func decodeTrue(src string, pos int) (ret int) {
78 ret = pos + 4
79 if ret > len(src) {
80 return -int(types.ERR_EOF)
81 }
82 if src[pos:ret] == bytesTrue {
83 return ret
84 } else {
85 return -int(types.ERR_INVALID_CHAR)
86 }
87
88 }
89
90 func decodeFalse(src string, pos int) (ret int) {
91 ret = pos + 5
92 if ret > len(src) {
93 return -int(types.ERR_EOF)
94 }
95 if src[pos:ret] == bytesFalse {
96 return ret
97 }
98 return -int(types.ERR_INVALID_CHAR)
99 }
100
101 //go:nocheckptr
102 func decodeString(src string, pos int) (ret int, v string) {
103 ret, ep := skipString(src, pos)
104 if ep == -1 {
105 (*rt.GoString)(unsafe.Pointer(&v)).Ptr = rt.IndexChar(src, pos+1)
106 (*rt.GoString)(unsafe.Pointer(&v)).Len = ret - pos - 2
107 return ret, v
108 }
109
110 vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret]))
111 if !ok {
112 return -int(types.ERR_INVALID_CHAR), ""
113 }
114
115 runtime.KeepAlive(src)
116 return ret, rt.Mem2Str(vv)
117 }
118
119 func decodeBinary(src string, pos int) (ret int, v []byte) {
120 var vv string
121 ret, vv = decodeString(src, pos)
122 if ret < 0 {
123 return ret, nil
124 }
125 var err error
126 v, err = base64.StdEncoding.DecodeString(vv)
127 if err != nil {
128 return -int(types.ERR_INVALID_CHAR), nil
129 }
130 return ret, v
131 }
132
133 func isDigit(c byte) bool {
134 return c >= '0' && c <= '9'
135 }
136
137 //go:nocheckptr
138 func decodeInt64(src string, pos int) (ret int, v int64, err error) {
139 sp := uintptr(rt.IndexChar(src, pos))
140 ss := uintptr(sp)
141 se := uintptr(rt.IndexChar(src, len(src)))
142 if uintptr(sp) >= se {
143 return -int(types.ERR_EOF), 0, nil
144 }
145
146 if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
147 sp += 1
148 }
149 if sp == se {
150 return -int(types.ERR_EOF), 0, nil
151 }
152
153 for ; sp < se; sp += uintptr(1) {
154 if !isDigit(*(*byte)(unsafe.Pointer(sp))) {
155 break
156 }
157 }
158
159 if sp < se {
160 if c := *(*byte)(unsafe.Pointer(sp)); c == '.' || c == 'e' || c == 'E' {
161 return -int(types.ERR_INVALID_NUMBER_FMT), 0, nil
162 }
163 }
164
165 var vv string
166 ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
167 (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
168 (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
169
170 v, err = strconv.ParseInt(vv, 10, 64)
171 if err != nil {
172 //NOTICE: allow overflow here
173 if err.(*strconv.NumError).Err == strconv.ErrRange {
174 return ret, 0, err
175 }
176 return -int(types.ERR_INVALID_CHAR), 0, err
177 }
178
179 runtime.KeepAlive(src)
180 return ret, v, nil
181 }
182
183 func isNumberChars(c byte) bool {
184 return (c >= '0' && c <= '9') || c == '+' || c == '-' || c == 'e' || c == 'E' || c == '.'
185 }
186
187 //go:nocheckptr
188 func decodeFloat64(src string, pos int) (ret int, v float64, err error) {
189 sp := uintptr(rt.IndexChar(src, pos))
190 ss := uintptr(sp)
191 se := uintptr(rt.IndexChar(src, len(src)))
192 if uintptr(sp) >= se {
193 return -int(types.ERR_EOF), 0, nil
194 }
195
196 if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
197 sp += 1
198 }
199 if sp == se {
200 return -int(types.ERR_EOF), 0, nil
201 }
202
203 for ; sp < se; sp += uintptr(1) {
204 if !isNumberChars(*(*byte)(unsafe.Pointer(sp))) {
205 break
206 }
207 }
208
209 var vv string
210 ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
211 (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
212 (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
213
214 v, err = strconv.ParseFloat(vv, 64)
215 if err != nil {
216 //NOTICE: allow overflow here
217 if err.(*strconv.NumError).Err == strconv.ErrRange {
218 return ret, 0, err
219 }
220 return -int(types.ERR_INVALID_CHAR), 0, err
221 }
222
223 runtime.KeepAlive(src)
224 return ret, v, nil
225 }
226
227 func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState) {
228 pos = skipBlank(src, pos)
229 if pos < 0 {
230 return pos, types.JsonState{Vt: types.ValueType(pos)}
231 }
232 switch c := src[pos]; c {
233 case 'n':
234 ret = decodeNull(src, pos)
235 if ret < 0 {
236 return ret, types.JsonState{Vt: types.ValueType(ret)}
237 }
238 return ret, types.JsonState{Vt: types.V_NULL}
239 case '"':
240 var ep int
241 ret, ep = skipString(src, pos)
242 if ret < 0 {
243 return ret, types.JsonState{Vt: types.ValueType(ret)}
244 }
245 return ret, types.JsonState{Vt: types.V_STRING, Iv: int64(pos + 1), Ep: ep}
246 case '{':
247 return pos + 1, types.JsonState{Vt: types.V_OBJECT}
248 case '[':
249 return pos + 1, types.JsonState{Vt: types.V_ARRAY}
250 case 't':
251 ret = decodeTrue(src, pos)
252 if ret < 0 {
253 return ret, types.JsonState{Vt: types.ValueType(ret)}
254 }
255 return ret, types.JsonState{Vt: types.V_TRUE}
256 case 'f':
257 ret = decodeFalse(src, pos)
258 if ret < 0 {
259 return ret, types.JsonState{Vt: types.ValueType(ret)}
260 }
261 return ret, types.JsonState{Vt: types.V_FALSE}
262 case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
263 if skipnum {
264 ret = skipNumber(src, pos)
265 if ret >= 0 {
266 return ret, types.JsonState{Vt: types.V_DOUBLE, Iv: 0, Ep: pos}
267 } else {
268 return ret, types.JsonState{Vt: types.ValueType(ret)}
269 }
270 } else {
271 var iv int64
272 ret, iv, _ = decodeInt64(src, pos)
273 if ret >= 0 {
274 return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos}
275 } else if ret != -int(types.ERR_INVALID_NUMBER_FMT) {
276 return ret, types.JsonState{Vt: types.ValueType(ret)}
277 }
278 var fv float64
279 ret, fv, _ = decodeFloat64(src, pos)
280 if ret >= 0 {
281 return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos}
282 } else {
283 return ret, types.JsonState{Vt: types.ValueType(ret)}
284 }
285 }
286
287 default:
288 return -int(types.ERR_INVALID_CHAR), types.JsonState{Vt:-types.ValueType(types.ERR_INVALID_CHAR)}
289 }
290 }
291
292 //go:nocheckptr
293 func skipNumber(src string, pos int) (ret int) {
294 return utils.SkipNumber(src, pos)
295 }
296
297 //go:nocheckptr
298 func skipString(src string, pos int) (ret int, ep int) {
299 if pos+1 >= len(src) {
300 return -int(types.ERR_EOF), -1
301 }
302
303 sp := uintptr(rt.IndexChar(src, pos))
304 se := uintptr(rt.IndexChar(src, len(src)))
305
306 // not start with quote
307 if *(*byte)(unsafe.Pointer(sp)) != '"' {
308 return -int(types.ERR_INVALID_CHAR), -1
309 }
310 sp += 1
311
312 ep = -1
313 for sp < se {
314 c := *(*byte)(unsafe.Pointer(sp))
315 if c == '\\' {
316 if ep == -1 {
317 ep = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
318 }
319 sp += 2
320 continue
321 }
322 sp += 1
323 if c == '"' {
324 return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)), ep
325 }
326 }
327
328 runtime.KeepAlive(src)
329 // not found the closed quote until EOF
330 return -int(types.ERR_EOF), -1
331 }
332
333 //go:nocheckptr
334 func skipPair(src string, pos int, lchar byte, rchar byte) (ret int) {
335 if pos+1 >= len(src) {
336 return -int(types.ERR_EOF)
337 }
338
339 sp := uintptr(rt.IndexChar(src, pos))
340 se := uintptr(rt.IndexChar(src, len(src)))
341
342 if *(*byte)(unsafe.Pointer(sp)) != lchar {
343 return -int(types.ERR_INVALID_CHAR)
344 }
345
346 sp += 1
347 nbrace := 1
348 inquote := false
349
350 for sp < se {
351 c := *(*byte)(unsafe.Pointer(sp))
352 if c == '\\' {
353 sp += 2
354 continue
355 } else if c == '"' {
356 inquote = !inquote
357 } else if c == lchar {
358 if !inquote {
359 nbrace += 1
360 }
361 } else if c == rchar {
362 if !inquote {
363 nbrace -= 1
364 if nbrace == 0 {
365 sp += 1
366 break
367 }
368 }
369 }
370 sp += 1
371 }
372
373 if nbrace != 0 {
374 return -int(types.ERR_INVALID_CHAR)
375 }
376
377 runtime.KeepAlive(src)
378 return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
379 }
380
381 func skipValueFast(src string, pos int) (ret int, start int) {
382 pos = skipBlank(src, pos)
383 if pos < 0 {
384 return pos, -1
385 }
386 switch c := src[pos]; c {
387 case 'n':
388 ret = decodeNull(src, pos)
389 case '"':
390 ret, _ = skipString(src, pos)
391 case '{':
392 ret = skipPair(src, pos, '{', '}')
393 case '[':
394 ret = skipPair(src, pos, '[', ']')
395 case 't':
396 ret = decodeTrue(src, pos)
397 case 'f':
398 ret = decodeFalse(src, pos)
399 case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
400 ret = skipNumber(src, pos)
401 default:
402 ret = -int(types.ERR_INVALID_CHAR)
403 }
404 return ret, pos
405 }
406
407 func skipValue(src string, pos int) (ret int, start int) {
408 pos = skipBlank(src, pos)
409 if pos < 0 {
410 return pos, -1
411 }
412 switch c := src[pos]; c {
413 case 'n':
414 ret = decodeNull(src, pos)
415 case '"':
416 ret, _ = skipString(src, pos)
417 case '{':
418 ret, _ = skipObject(src, pos)
419 case '[':
420 ret, _ = skipArray(src, pos)
421 case 't':
422 ret = decodeTrue(src, pos)
423 case 'f':
424 ret = decodeFalse(src, pos)
425 case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
426 ret = skipNumber(src, pos)
427 default:
428 ret = -int(types.ERR_INVALID_CHAR)
429 }
430 return ret, pos
431 }
432
433 func skipObject(src string, pos int) (ret int, start int) {
434 start = skipBlank(src, pos)
435 if start < 0 {
436 return start, -1
437 }
438
439 if src[start] != '{' {
440 return -int(types.ERR_INVALID_CHAR), -1
441 }
442
443 pos = start + 1
444 pos = skipBlank(src, pos)
445 if pos < 0 {
446 return pos, -1
447 }
448 if src[pos] == '}' {
449 return pos + 1, start
450 }
451
452 for {
453 pos, _ = skipString(src, pos)
454 if pos < 0 {
455 return pos, -1
456 }
457
458 pos = skipBlank(src, pos)
459 if pos < 0 {
460 return pos, -1
461 }
462 if src[pos] != ':' {
463 return -int(types.ERR_INVALID_CHAR), -1
464 }
465
466 pos++
467 pos, _ = skipValue(src, pos)
468 if pos < 0 {
469 return pos, -1
470 }
471
472 pos = skipBlank(src, pos)
473 if pos < 0 {
474 return pos, -1
475 }
476 if src[pos] == '}' {
477 return pos + 1, start
478 }
479 if src[pos] != ',' {
480 return -int(types.ERR_INVALID_CHAR), -1
481 }
482
483 pos++
484 pos = skipBlank(src, pos)
485 if pos < 0 {
486 return pos, -1
487 }
488
489 }
490 }
491
492 func skipArray(src string, pos int) (ret int, start int) {
493 start = skipBlank(src, pos)
494 if start < 0 {
495 return start, -1
496 }
497
498 if src[start] != '[' {
499 return -int(types.ERR_INVALID_CHAR), -1
500 }
501
502 pos = start + 1
503 pos = skipBlank(src, pos)
504 if pos < 0 {
505 return pos, -1
506 }
507 if src[pos] == ']' {
508 return pos + 1, start
509 }
510
511 for {
512 pos, _ = skipValue(src, pos)
513 if pos < 0 {
514 return pos, -1
515 }
516
517 pos = skipBlank(src, pos)
518 if pos < 0 {
519 return pos, -1
520 }
521 if src[pos] == ']' {
522 return pos + 1, start
523 }
524 if src[pos] != ',' {
525 return -int(types.ERR_INVALID_CHAR), -1
526 }
527 pos++
528 }
529 }
530
531 // DecodeString decodes a JSON string from pos and return golang string.
532 // - needEsc indicates if to unescaped escaping chars
533 // - hasEsc tells if the returned string has escaping chars
534 // - validStr enables validating UTF8 charset
535 //
536 func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) {
537 p := NewParserObj(src)
538 p.p = pos
539 switch val := p.decodeValue(); val.Vt {
540 case types.V_STRING:
541 str := p.s[val.Iv : p.p-1]
542 if validStr && !validate_utf8(str) {
543 return "", -int(types.ERR_INVALID_UTF8), false
544 }
545 /* fast path: no escape sequence */
546 if val.Ep == -1 {
547 return str, p.p, false
548 } else if !needEsc {
549 return str, p.p, true
550 }
551 /* unquote the string */
552 out, err := unquote(str)
553 /* check for errors */
554 if err != 0 {
555 return "", -int(err), true
556 } else {
557 return out, p.p, true
558 }
559 default:
560 return "", -int(_ERR_UNSUPPORT_TYPE), false
561 }
562 }
563