string.mx raw
1 package runtime
2
3 // This file implements functions related to Go strings.
4
5 import (
6 "internal/gclayout"
7 "unsafe"
8 )
9
10 // The underlying struct for the Go string type.
11 // Moxie: 3-word struct matching []byte layout (ptr, len, cap).
12 type _string struct {
13 ptr *byte
14 length uintptr
15 cap uintptr
16 }
17
18 // The iterator state for a range over a string.
19 type stringIterator struct {
20 byteindex uintptr
21 }
22
23 // Return true iff the strings match.
24 //
25 //go:nobounds
26 func stringEqual(x, y string) bool {
27 if len(x) != len(y) {
28 return false
29 }
30 for i := 0; i < len(x); i++ {
31 if x[i] != y[i] {
32 return false
33 }
34 }
35 return true
36 }
37
38 // sliceEqual compares two slices of any comparable element type.
39 // Checks lengths first, then compares raw memory (elemSize * length bytes).
40 func sliceEqual(xPtr, yPtr unsafe.Pointer, xLen, yLen, elemSize uintptr) bool {
41 if xLen != yLen {
42 return false
43 }
44 if xLen == 0 {
45 return true
46 }
47 return memequal(xPtr, yPtr, xLen*elemSize)
48 }
49
50 // Return true iff x < y.
51 //
52 //go:nobounds
53 func stringLess(x, y string) bool {
54 l := len(x)
55 if m := len(y); m < l {
56 l = m
57 }
58 for i := 0; i < l; i++ {
59 if x[i] < y[i] {
60 return true
61 }
62 if x[i] > y[i] {
63 return false
64 }
65 }
66 return len(x) < len(y)
67 }
68
69 // Concatenate two strings.
70 func stringConcat(x, y _string) _string {
71 if x.length == 0 {
72 return y
73 } else if y.length == 0 {
74 return x
75 }
76 length := x.length + y.length
77 buf := alloc(length, gclayout.NoPtrs.AsPtr())
78 memcpy(buf, unsafe.Pointer(x.ptr), x.length)
79 memcpy(unsafe.Add(buf, x.length), unsafe.Pointer(y.ptr), y.length)
80 result := _string{ptr: (*byte)(buf), length: length, cap: length}
81 return result
82 }
83
84 // Concatenate two byte slices. Used for | pipe concat.
85 func bytesConcat(x, y []byte) []byte {
86 if len(x) == 0 {
87 return y
88 } else if len(y) == 0 {
89 return x
90 }
91 length := len(x) + len(y)
92 buf := alloc(uintptr(length), gclayout.NoPtrs.AsPtr())
93 memcpy(buf, unsafe.Pointer(&x[0]), uintptr(len(x)))
94 memcpy(unsafe.Add(buf, uintptr(len(x))), unsafe.Pointer(&y[0]), uintptr(len(y)))
95 result := (*[1 << 30]byte)(buf)[:length:length]
96 return result
97 }
98
99
100 // Convert a []rune slice to a string.
101 func stringFromRunes(runeSlice []rune) (s _string) {
102 // Count the number of characters that will be in the string.
103 for _, r := range runeSlice {
104 _, numBytes := encodeUTF8(r)
105 s.length += numBytes
106 }
107 s.cap = s.length
108
109 // Allocate memory for the string.
110 s.ptr = (*byte)(alloc(s.length, gclayout.NoPtrs.AsPtr()))
111
112 // Encode runes to UTF-8 and store the resulting bytes in the string.
113 index := uintptr(0)
114 for _, r := range runeSlice {
115 array, numBytes := encodeUTF8(r)
116 for _, c := range array[:numBytes] {
117 *(*byte)(unsafe.Add(unsafe.Pointer(s.ptr), index)) = c
118 index++
119 }
120 }
121
122 return
123 }
124
125 // Convert a string to []rune slice.
126 func stringToRunes(s string) []rune {
127 var n = 0
128 for range s {
129 n++
130 }
131 var r = make([]rune, n)
132 n = 0
133 for _, e := range s {
134 r[n] = e
135 n++
136 }
137 return r
138 }
139
140 // Create a string from a Unicode code point.
141 func stringFromUnicode(x rune) _string {
142 array, length := encodeUTF8(x)
143 // Array will be heap allocated.
144 // The heap most likely doesn't work with blocks below 4 bytes, so there's
145 // no point in allocating a smaller buffer for the string here.
146 return _string{ptr: (*byte)(unsafe.Pointer(&array)), length: length, cap: length}
147 }
148
149 // Iterate over a string.
150 // Returns (ok, key, value).
151 func stringNext(s string, it *stringIterator) (bool, int, rune) {
152 if len(s) <= int(it.byteindex) {
153 return false, 0, 0
154 }
155 i := int(it.byteindex)
156 r, length := decodeUTF8(s, it.byteindex)
157 it.byteindex += length
158 return true, i, r
159 }
160
161 // Convert a Unicode code point into an array of bytes and its length.
162 func encodeUTF8(x rune) ([4]byte, uintptr) {
163 // https://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16
164 // Note: this code can probably be optimized (in size and speed).
165 switch {
166 case x <= 0x7f:
167 return [4]byte{byte(x), 0, 0, 0}, 1
168 case x <= 0x7ff:
169 b1 := 0xc0 | byte(x>>6)
170 b2 := 0x80 | byte(x&0x3f)
171 return [4]byte{b1, b2, 0, 0}, 2
172 case 0xd800 <= x && x <= 0xdfff:
173 // utf-16 surrogates are replaced with "invalid code point"
174 return [4]byte{0xef, 0xbf, 0xbd, 0}, 3
175 case x <= 0xffff:
176 b1 := 0xe0 | byte(x>>12)
177 b2 := 0x80 | byte((x>>6)&0x3f)
178 b3 := 0x80 | byte((x>>0)&0x3f)
179 return [4]byte{b1, b2, b3, 0}, 3
180 case x <= 0x10ffff:
181 b1 := 0xf0 | byte(x>>18)
182 b2 := 0x80 | byte((x>>12)&0x3f)
183 b3 := 0x80 | byte((x>>6)&0x3f)
184 b4 := 0x80 | byte((x>>0)&0x3f)
185 return [4]byte{b1, b2, b3, b4}, 4
186 default:
187 // Invalid Unicode code point.
188 return [4]byte{0xef, 0xbf, 0xbd, 0}, 3
189 }
190 }
191
192 // Decode a single UTF-8 character from a string.
193 //
194 //go:nobounds
195 func decodeUTF8(s string, index uintptr) (rune, uintptr) {
196 remaining := uintptr(len(s)) - index // must be >= 1 before calling this function
197 x := s[index]
198 switch {
199 case x&0x80 == 0x00: // 0xxxxxxx
200 return rune(x), 1
201 case x&0xe0 == 0xc0: // 110xxxxx
202 if remaining < 2 || !isContinuation(s[index+1]) {
203 return 0xfffd, 1
204 }
205 r := (rune(x&0x1f) << 6) | (rune(s[index+1]) & 0x3f)
206 if r >= 1<<7 {
207 // Check whether the rune really needed to be encoded as a two-byte
208 // sequence. UTF-8 requires every rune to be encoded in the smallest
209 // sequence possible.
210 return r, 2
211 }
212 case x&0xf0 == 0xe0: // 1110xxxx
213 if remaining < 3 || !isContinuation(s[index+1]) || !isContinuation(s[index+2]) {
214 return 0xfffd, 1
215 }
216 r := (rune(x&0x0f) << 12) | ((rune(s[index+1]) & 0x3f) << 6) | (rune(s[index+2]) & 0x3f)
217 if r >= 1<<11 && !(r >= 0xD800 && r <= 0xDFFF) {
218 // Check whether the rune really needed to be encoded as a
219 // three-byte sequence and check that this is not a Unicode
220 // surrogate pair (which are not allowed by UTF-8).
221 return r, 3
222 }
223 case x&0xf8 == 0xf0: // 11110xxx
224 if remaining < 4 || !isContinuation(s[index+1]) || !isContinuation(s[index+2]) || !isContinuation(s[index+3]) {
225 return 0xfffd, 1
226 }
227 r := (rune(x&0x07) << 18) | ((rune(s[index+1]) & 0x3f) << 12) | ((rune(s[index+2]) & 0x3f) << 6) | (rune(s[index+3]) & 0x3f)
228 if r >= 1<<16 && r <= '\U0010FFFF' {
229 // Check whether this rune really needed to be encoded as a four
230 // byte sequence and check that the resulting rune is in the valid
231 // range (up to at most U+10FFFF).
232 return r, 4
233 }
234 }
235
236 // Failed to decode. Return the Unicode replacement character and a length of 1.
237 return 0xfffd, 1
238 }
239
240 // isContinuation returns true if (and only if) this is a UTF-8 continuation
241 // byte.
242 func isContinuation(b byte) bool {
243 // Continuation bytes have their topmost bits set to 0b10.
244 return b&0xc0 == 0x80
245 }
246
247 // Functions used in CGo.
248
249 // Convert a Go string to a C string.
250 func cgo_CString(s _string) unsafe.Pointer {
251 buf := malloc(s.length + 1)
252 memcpy(buf, unsafe.Pointer(s.ptr), s.length)
253 *(*byte)(unsafe.Add(buf, s.length)) = 0 // trailing 0 byte
254 return buf
255 }
256
257 // Convert a C string to a Go string.
258 func cgo_GoString(cstr unsafe.Pointer) _string {
259 if cstr == nil {
260 return _string{}
261 }
262 return makeGoString(cstr, strlen(cstr))
263 }
264
265 // Convert a C data buffer to a Go string (that possibly contains 0 bytes).
266 func cgo_GoStringN(cstr unsafe.Pointer, length uintptr) _string {
267 return makeGoString(cstr, length)
268 }
269
270 // Make a Go string given a source buffer and a length.
271 func makeGoString(cstr unsafe.Pointer, length uintptr) _string {
272 s := _string{
273 length: length,
274 cap: length,
275 }
276 if s.length != 0 {
277 buf := make([]byte, s.length)
278 s.ptr = &buf[0]
279 memcpy(unsafe.Pointer(s.ptr), cstr, s.length)
280 }
281 return s
282 }
283
284 // Convert a C data buffer to a Go byte slice.
285 func cgo_GoBytes(ptr unsafe.Pointer, length uintptr) []byte {
286 // Note: don't return nil if length is 0, to match the behavior of C.GoBytes
287 // of upstream Go.
288 buf := make([]byte, length)
289 if length != 0 {
290 memcpy(unsafe.Pointer(&buf[0]), ptr, uintptr(length))
291 }
292 return buf
293 }
294
295 func cgo_CBytes(b []byte) unsafe.Pointer {
296 p := malloc(uintptr(len(b)))
297 s := unsafe.Slice((*byte)(p), len(b))
298 copy(s, b)
299 return p
300 }
301