string.mx raw

   1  package runtime
   2  
   3  // This file implements functions related to Go strings.
   4  
   5  import (
   6  	"internal/gclayout"
   7  	"unsafe"
   8  )
   9  
  10  // The underlying struct for the Go string type.
  11  // Moxie: 3-word struct matching []byte layout (ptr, len, cap).
  12  type _string struct {
  13  	ptr    *byte
  14  	length uintptr
  15  	cap    uintptr
  16  }
  17  
  18  // The iterator state for a range over a string.
  19  type stringIterator struct {
  20  	byteindex uintptr
  21  }
  22  
  23  // Return true iff the strings match.
  24  //
  25  //go:nobounds
  26  func stringEqual(x, y string) bool {
  27  	if len(x) != len(y) {
  28  		return false
  29  	}
  30  	for i := 0; i < len(x); i++ {
  31  		if x[i] != y[i] {
  32  			return false
  33  		}
  34  	}
  35  	return true
  36  }
  37  
  38  // sliceEqual compares two slices of any comparable element type.
  39  // Checks lengths first, then compares raw memory (elemSize * length bytes).
  40  func sliceEqual(xPtr, yPtr unsafe.Pointer, xLen, yLen, elemSize uintptr) bool {
  41  	if xLen != yLen {
  42  		return false
  43  	}
  44  	if xLen == 0 {
  45  		return true
  46  	}
  47  	return memequal(xPtr, yPtr, xLen*elemSize)
  48  }
  49  
  50  // Return true iff x < y.
  51  //
  52  //go:nobounds
  53  func stringLess(x, y string) bool {
  54  	l := len(x)
  55  	if m := len(y); m < l {
  56  		l = m
  57  	}
  58  	for i := 0; i < l; i++ {
  59  		if x[i] < y[i] {
  60  			return true
  61  		}
  62  		if x[i] > y[i] {
  63  			return false
  64  		}
  65  	}
  66  	return len(x) < len(y)
  67  }
  68  
  69  // Concatenate two strings.
  70  func stringConcat(x, y _string) _string {
  71  	if x.length == 0 {
  72  		return y
  73  	} else if y.length == 0 {
  74  		return x
  75  	}
  76  	length := x.length + y.length
  77  	buf := alloc(length, gclayout.NoPtrs.AsPtr())
  78  	memcpy(buf, unsafe.Pointer(x.ptr), x.length)
  79  	memcpy(unsafe.Add(buf, x.length), unsafe.Pointer(y.ptr), y.length)
  80  	result := _string{ptr: (*byte)(buf), length: length, cap: length}
  81  	return result
  82  }
  83  
  84  // Concatenate two byte slices. Used for | pipe concat.
  85  func bytesConcat(x, y []byte) []byte {
  86  	if len(x) == 0 {
  87  		return y
  88  	} else if len(y) == 0 {
  89  		return x
  90  	}
  91  	length := len(x) + len(y)
  92  	buf := alloc(uintptr(length), gclayout.NoPtrs.AsPtr())
  93  	memcpy(buf, unsafe.Pointer(&x[0]), uintptr(len(x)))
  94  	memcpy(unsafe.Add(buf, uintptr(len(x))), unsafe.Pointer(&y[0]), uintptr(len(y)))
  95  	result := (*[1 << 30]byte)(buf)[:length:length]
  96  	return result
  97  }
  98  
  99  
 100  // Convert a []rune slice to a string.
 101  func stringFromRunes(runeSlice []rune) (s _string) {
 102  	// Count the number of characters that will be in the string.
 103  	for _, r := range runeSlice {
 104  		_, numBytes := encodeUTF8(r)
 105  		s.length += numBytes
 106  	}
 107  	s.cap = s.length
 108  
 109  	// Allocate memory for the string.
 110  	s.ptr = (*byte)(alloc(s.length, gclayout.NoPtrs.AsPtr()))
 111  
 112  	// Encode runes to UTF-8 and store the resulting bytes in the string.
 113  	index := uintptr(0)
 114  	for _, r := range runeSlice {
 115  		array, numBytes := encodeUTF8(r)
 116  		for _, c := range array[:numBytes] {
 117  			*(*byte)(unsafe.Add(unsafe.Pointer(s.ptr), index)) = c
 118  			index++
 119  		}
 120  	}
 121  
 122  	return
 123  }
 124  
 125  // Convert a string to []rune slice.
 126  func stringToRunes(s string) []rune {
 127  	var n = 0
 128  	for range s {
 129  		n++
 130  	}
 131  	var r = make([]rune, n)
 132  	n = 0
 133  	for _, e := range s {
 134  		r[n] = e
 135  		n++
 136  	}
 137  	return r
 138  }
 139  
 140  // Create a string from a Unicode code point.
 141  func stringFromUnicode(x rune) _string {
 142  	array, length := encodeUTF8(x)
 143  	// Array will be heap allocated.
 144  	// The heap most likely doesn't work with blocks below 4 bytes, so there's
 145  	// no point in allocating a smaller buffer for the string here.
 146  	return _string{ptr: (*byte)(unsafe.Pointer(&array)), length: length, cap: length}
 147  }
 148  
 149  // Iterate over a string.
 150  // Returns (ok, key, value).
 151  func stringNext(s string, it *stringIterator) (bool, int, rune) {
 152  	if len(s) <= int(it.byteindex) {
 153  		return false, 0, 0
 154  	}
 155  	i := int(it.byteindex)
 156  	r, length := decodeUTF8(s, it.byteindex)
 157  	it.byteindex += length
 158  	return true, i, r
 159  }
 160  
 161  // Convert a Unicode code point into an array of bytes and its length.
 162  func encodeUTF8(x rune) ([4]byte, uintptr) {
 163  	// https://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16
 164  	// Note: this code can probably be optimized (in size and speed).
 165  	switch {
 166  	case x <= 0x7f:
 167  		return [4]byte{byte(x), 0, 0, 0}, 1
 168  	case x <= 0x7ff:
 169  		b1 := 0xc0 | byte(x>>6)
 170  		b2 := 0x80 | byte(x&0x3f)
 171  		return [4]byte{b1, b2, 0, 0}, 2
 172  	case 0xd800 <= x && x <= 0xdfff:
 173  		// utf-16 surrogates are replaced with "invalid code point"
 174  		return [4]byte{0xef, 0xbf, 0xbd, 0}, 3
 175  	case x <= 0xffff:
 176  		b1 := 0xe0 | byte(x>>12)
 177  		b2 := 0x80 | byte((x>>6)&0x3f)
 178  		b3 := 0x80 | byte((x>>0)&0x3f)
 179  		return [4]byte{b1, b2, b3, 0}, 3
 180  	case x <= 0x10ffff:
 181  		b1 := 0xf0 | byte(x>>18)
 182  		b2 := 0x80 | byte((x>>12)&0x3f)
 183  		b3 := 0x80 | byte((x>>6)&0x3f)
 184  		b4 := 0x80 | byte((x>>0)&0x3f)
 185  		return [4]byte{b1, b2, b3, b4}, 4
 186  	default:
 187  		// Invalid Unicode code point.
 188  		return [4]byte{0xef, 0xbf, 0xbd, 0}, 3
 189  	}
 190  }
 191  
 192  // Decode a single UTF-8 character from a string.
 193  //
 194  //go:nobounds
 195  func decodeUTF8(s string, index uintptr) (rune, uintptr) {
 196  	remaining := uintptr(len(s)) - index // must be >= 1 before calling this function
 197  	x := s[index]
 198  	switch {
 199  	case x&0x80 == 0x00: // 0xxxxxxx
 200  		return rune(x), 1
 201  	case x&0xe0 == 0xc0: // 110xxxxx
 202  		if remaining < 2 || !isContinuation(s[index+1]) {
 203  			return 0xfffd, 1
 204  		}
 205  		r := (rune(x&0x1f) << 6) | (rune(s[index+1]) & 0x3f)
 206  		if r >= 1<<7 {
 207  			// Check whether the rune really needed to be encoded as a two-byte
 208  			// sequence. UTF-8 requires every rune to be encoded in the smallest
 209  			// sequence possible.
 210  			return r, 2
 211  		}
 212  	case x&0xf0 == 0xe0: // 1110xxxx
 213  		if remaining < 3 || !isContinuation(s[index+1]) || !isContinuation(s[index+2]) {
 214  			return 0xfffd, 1
 215  		}
 216  		r := (rune(x&0x0f) << 12) | ((rune(s[index+1]) & 0x3f) << 6) | (rune(s[index+2]) & 0x3f)
 217  		if r >= 1<<11 && !(r >= 0xD800 && r <= 0xDFFF) {
 218  			// Check whether the rune really needed to be encoded as a
 219  			// three-byte sequence and check that this is not a Unicode
 220  			// surrogate pair (which are not allowed by UTF-8).
 221  			return r, 3
 222  		}
 223  	case x&0xf8 == 0xf0: // 11110xxx
 224  		if remaining < 4 || !isContinuation(s[index+1]) || !isContinuation(s[index+2]) || !isContinuation(s[index+3]) {
 225  			return 0xfffd, 1
 226  		}
 227  		r := (rune(x&0x07) << 18) | ((rune(s[index+1]) & 0x3f) << 12) | ((rune(s[index+2]) & 0x3f) << 6) | (rune(s[index+3]) & 0x3f)
 228  		if r >= 1<<16 && r <= '\U0010FFFF' {
 229  			// Check whether this rune really needed to be encoded as a four
 230  			// byte sequence and check that the resulting rune is in the valid
 231  			// range (up to at most U+10FFFF).
 232  			return r, 4
 233  		}
 234  	}
 235  
 236  	// Failed to decode. Return the Unicode replacement character and a length of 1.
 237  	return 0xfffd, 1
 238  }
 239  
 240  // isContinuation returns true if (and only if) this is a UTF-8 continuation
 241  // byte.
 242  func isContinuation(b byte) bool {
 243  	// Continuation bytes have their topmost bits set to 0b10.
 244  	return b&0xc0 == 0x80
 245  }
 246  
 247  // Functions used in CGo.
 248  
 249  // Convert a Go string to a C string.
 250  func cgo_CString(s _string) unsafe.Pointer {
 251  	buf := malloc(s.length + 1)
 252  	memcpy(buf, unsafe.Pointer(s.ptr), s.length)
 253  	*(*byte)(unsafe.Add(buf, s.length)) = 0 // trailing 0 byte
 254  	return buf
 255  }
 256  
 257  // Convert a C string to a Go string.
 258  func cgo_GoString(cstr unsafe.Pointer) _string {
 259  	if cstr == nil {
 260  		return _string{}
 261  	}
 262  	return makeGoString(cstr, strlen(cstr))
 263  }
 264  
 265  // Convert a C data buffer to a Go string (that possibly contains 0 bytes).
 266  func cgo_GoStringN(cstr unsafe.Pointer, length uintptr) _string {
 267  	return makeGoString(cstr, length)
 268  }
 269  
 270  // Make a Go string given a source buffer and a length.
 271  func makeGoString(cstr unsafe.Pointer, length uintptr) _string {
 272  	s := _string{
 273  		length: length,
 274  		cap:    length,
 275  	}
 276  	if s.length != 0 {
 277  		buf := make([]byte, s.length)
 278  		s.ptr = &buf[0]
 279  		memcpy(unsafe.Pointer(s.ptr), cstr, s.length)
 280  	}
 281  	return s
 282  }
 283  
 284  // Convert a C data buffer to a Go byte slice.
 285  func cgo_GoBytes(ptr unsafe.Pointer, length uintptr) []byte {
 286  	// Note: don't return nil if length is 0, to match the behavior of C.GoBytes
 287  	// of upstream Go.
 288  	buf := make([]byte, length)
 289  	if length != 0 {
 290  		memcpy(unsafe.Pointer(&buf[0]), ptr, uintptr(length))
 291  	}
 292  	return buf
 293  }
 294  
 295  func cgo_CBytes(b []byte) unsafe.Pointer {
 296  	p := malloc(uintptr(len(b)))
 297  	s := unsafe.Slice((*byte)(p), len(b))
 298  	copy(s, b)
 299  	return p
 300  }
 301