1 /*
2 * Copyright 2022 ByteDance Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 17 package utf8
18 19 import (
20 `runtime`
21 22 `github.com/bytedance/sonic/internal/rt`
23 `github.com/bytedance/sonic/internal/native/types`
24 `github.com/bytedance/sonic/internal/native`
25 )
26 27 // CorrectWith corrects the invalid utf8 byte with repl string.
28 func CorrectWith(dst []byte, src []byte, repl string) []byte {
29 sstr := rt.Mem2Str(src)
30 sidx := 0
31 32 /* state machine records the invalid positions */
33 m := types.NewStateMachine()
34 m.Sp = 0 // invalid utf8 numbers
35 36 for sidx < len(sstr) {
37 scur := sidx
38 ecode := native.ValidateUTF8(&sstr, &sidx, m)
39 40 if m.Sp != 0 {
41 if m.Sp > len(sstr) {
42 panic("numbers of invalid utf8 exceed the string len!")
43 }
44 }
45 46 for i := 0; i < m.Sp; i++ {
47 ipos := m.Vt[i] // invalid utf8 position
48 dst = append(dst, sstr[scur:ipos]...)
49 dst = append(dst, repl...)
50 scur = m.Vt[i] + 1
51 }
52 /* append the remained valid utf8 bytes */
53 dst = append(dst, sstr[scur:sidx]...)
54 55 /* not enough space, reset and continue */
56 if ecode != 0 {
57 m.Sp = 0
58 }
59 }
60 61 types.FreeStateMachine(m)
62 return dst
63 }
64 65 // Validate is a simd-accelereated drop-in replacement for the standard library's utf8.Valid.
66 func Validate(src []byte) bool {
67 if src == nil {
68 return true
69 }
70 return ValidateString(rt.Mem2Str(src))
71 }
72 73 // ValidateString as Validate, but for string.
74 func ValidateString(src string) bool {
75 if src == "" {
76 return true
77 }
78 ret := native.ValidateUTF8Fast(&src) == 0
79 runtime.KeepAlive(src)
80 return ret
81 }
82