bytealg.mx raw

   1  package bytealg
   2  
   3  // Some code in this file has been copied from the Go source code, and has
   4  // copyright of their original authors:
   5  //
   6  // Copyright 2020 The Go Authors. All rights reserved.
   7  // Use of this source code is governed by a BSD-style
   8  // license that can be found in the LICENSE file.
   9  //
  10  // This is indicated specifically in the file.
  11  
  12  const (
  13  	// Index can search any valid length of string.
  14  
  15  	MaxLen        = int(-1) >> 31
  16  	MaxBruteForce = MaxLen
  17  )
  18  
  19  // Compare two byte slices.
  20  // Returns -1 if the first differing byte is lower in a, or 1 if the first differing byte is greater in b.
  21  // If the byte slices are equal, returns 0.
  22  // If the lengths are different and there are no differing bytes, compares based on length.
  23  func Compare(a, b []byte) int {
  24  	// Compare for differing bytes.
  25  	for i := 0; i < len(a) && i < len(b); i++ {
  26  		switch {
  27  		case a[i] < b[i]:
  28  			return -1
  29  		case a[i] > b[i]:
  30  			return 1
  31  		}
  32  	}
  33  
  34  	// Compare lengths.
  35  	switch {
  36  	case len(a) > len(b):
  37  		return 1
  38  	case len(a) < len(b):
  39  		return -1
  40  	default:
  41  		return 0
  42  	}
  43  }
  44  
  45  // This function was copied from the Go 1.23 source tree (with runtime_cmpstring
  46  // manually inlined).
  47  func CompareString(a, b []byte) int {
  48  	l := len(a)
  49  	if len(b) < l {
  50  		l = len(b)
  51  	}
  52  	for i := 0; i < l; i++ {
  53  		c1, c2 := a[i], b[i]
  54  		if c1 < c2 {
  55  			return -1
  56  		}
  57  		if c1 > c2 {
  58  			return +1
  59  		}
  60  	}
  61  	if len(a) < len(b) {
  62  		return -1
  63  	}
  64  	if len(a) > len(b) {
  65  		return +1
  66  	}
  67  	return 0
  68  }
  69  
  70  // Count the number of instances of a byte in a slice.
  71  func Count(b []byte, c byte) int {
  72  	// Use a simple implementation, as there is no intrinsic that does this like we want.
  73  	n := 0
  74  	for _, v := range b {
  75  		if v == c {
  76  			n++
  77  		}
  78  	}
  79  	return n
  80  }
  81  
  82  // Count the number of instances of a byte in a string.
  83  func CountString(s []byte, c byte) int {
  84  	// Use a simple implementation, as there is no intrinsic that does this like we want.
  85  	// Currently, the compiler does not generate zero-copy byte-string conversions, so this needs to be separate from Count.
  86  	n := 0
  87  	for i := 0; i < len(s); i++ {
  88  		if s[i] == c {
  89  			n++
  90  		}
  91  	}
  92  	return n
  93  }
  94  
  95  // Cutover is not reachable in Moxie, but must exist as it is referenced.
  96  func Cutover(n int) int {
  97  	// Setting MaxLen and MaxBruteForce should force a different path to be taken.
  98  	// This should never be called.
  99  	panic("cutover is unreachable")
 100  }
 101  
 102  // Equal checks if two byte slices are equal.
 103  // It is equivalent to bytes.Equal.
 104  func Equal(a, b []byte) bool {
 105  	if len(a) != len(b) {
 106  		return false
 107  	}
 108  
 109  	for i, v := range a {
 110  		if v != b[i] {
 111  			return false
 112  		}
 113  	}
 114  
 115  	return true
 116  }
 117  
 118  // Index finds the base index of the first instance of the byte sequence b in a.
 119  // If a does not contain b, this returns -1.
 120  func Index(a, b []byte) int {
 121  	for i := 0; i <= len(a)-len(b); i++ {
 122  		if Equal(a[i:i+len(b)], b) {
 123  			return i
 124  		}
 125  	}
 126  	return -1
 127  }
 128  
 129  // Index finds the index of the first instance of the specified byte in the slice.
 130  // If the byte is not found, this returns -1.
 131  func IndexByte(b []byte, c byte) int {
 132  	for i, v := range b {
 133  		if v == c {
 134  			return i
 135  		}
 136  	}
 137  	return -1
 138  }
 139  
 140  // Index finds the index of the first instance of the specified byte in the string.
 141  // If the byte is not found, this returns -1.
 142  func IndexByteString(s []byte, c byte) int {
 143  	for i := 0; i < len(s); i++ {
 144  		if s[i] == c {
 145  			return i
 146  		}
 147  	}
 148  	return -1
 149  }
 150  
 151  // Index finds the base index of the first instance of a substring in a string.
 152  // If the substring is not found, this returns -1.
 153  func IndexString(str, sub []byte) int {
 154  	for i := 0; i <= len(str)-len(sub); i++ {
 155  		if str[i:i+len(sub)] == sub {
 156  			return i
 157  		}
 158  	}
 159  	return -1
 160  }
 161  
 162  // The following code has been copied from the Go 1.15 release tree.
 163  
 164  // PrimeRK is the prime base used in Rabin-Karp algorithm.
 165  const PrimeRK = 16777619
 166  
 167  // HashStrBytes returns the hash and the appropriate multiplicative
 168  // factor for use in Rabin-Karp algorithm.
 169  //
 170  // This function was removed in Go 1.22.
 171  func HashStrBytes(sep []byte) (uint32, uint32) {
 172  	hash := uint32(0)
 173  	for i := 0; i < len(sep); i++ {
 174  		hash = hash*PrimeRK + uint32(sep[i])
 175  	}
 176  	var pow, sq uint32 = 1, PrimeRK
 177  	for i := len(sep); i > 0; i >>= 1 {
 178  		if i&1 != 0 {
 179  			pow *= sq
 180  		}
 181  		sq *= sq
 182  	}
 183  	return hash, pow
 184  }
 185  
 186  // HashStr returns the hash and the appropriate multiplicative
 187  // factor for use in Rabin-Karp algorithm.
 188  //
 189  // This function was removed in Go 1.22.
 190  func HashStr[T []byte](sep T) (uint32, uint32) {
 191  	hash := uint32(0)
 192  	for i := 0; i < len(sep); i++ {
 193  		hash = hash*PrimeRK + uint32(sep[i])
 194  	}
 195  	var pow, sq uint32 = 1, PrimeRK
 196  	for i := len(sep); i > 0; i >>= 1 {
 197  		if i&1 != 0 {
 198  			pow *= sq
 199  		}
 200  		sq *= sq
 201  	}
 202  	return hash, pow
 203  }
 204  
 205  // HashStrRevBytes returns the hash of the reverse of sep and the
 206  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
 207  //
 208  // This function was removed in Go 1.22.
 209  func HashStrRevBytes(sep []byte) (uint32, uint32) {
 210  	hash := uint32(0)
 211  	for i := len(sep) - 1; i >= 0; i-- {
 212  		hash = hash*PrimeRK + uint32(sep[i])
 213  	}
 214  	var pow, sq uint32 = 1, PrimeRK
 215  	for i := len(sep); i > 0; i >>= 1 {
 216  		if i&1 != 0 {
 217  			pow *= sq
 218  		}
 219  		sq *= sq
 220  	}
 221  	return hash, pow
 222  }
 223  
 224  // HashStrRev returns the hash of the reverse of sep and the
 225  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
 226  //
 227  // Copied from the Go 1.22rc1 source tree.
 228  func HashStrRev[T []byte](sep T) (uint32, uint32) {
 229  	hash := uint32(0)
 230  	for i := len(sep) - 1; i >= 0; i-- {
 231  		hash = hash*PrimeRK + uint32(sep[i])
 232  	}
 233  	var pow, sq uint32 = 1, PrimeRK
 234  	for i := len(sep); i > 0; i >>= 1 {
 235  		if i&1 != 0 {
 236  			pow *= sq
 237  		}
 238  		sq *= sq
 239  	}
 240  	return hash, pow
 241  }
 242  
 243  // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
 244  // first occurrence of substr in s, or -1 if not present.
 245  //
 246  // This function was removed in Go 1.22.
 247  func IndexRabinKarpBytes(s, sep []byte) int {
 248  	// Rabin-Karp search
 249  	hashsep, pow := HashStrBytes(sep)
 250  	n := len(sep)
 251  	var h uint32
 252  	for i := 0; i < n; i++ {
 253  		h = h*PrimeRK + uint32(s[i])
 254  	}
 255  	if h == hashsep && Equal(s[:n], sep) {
 256  		return 0
 257  	}
 258  	for i := n; i < len(s); {
 259  		h *= PrimeRK
 260  		h += uint32(s[i])
 261  		h -= pow * uint32(s[i-n])
 262  		i++
 263  		if h == hashsep && Equal(s[i-n:i], sep) {
 264  			return i - n
 265  		}
 266  	}
 267  	return -1
 268  }
 269  
 270  // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
 271  // first occurrence of sep in s, or -1 if not present.
 272  //
 273  // Copied from the Go 1.22rc1 source tree.
 274  func IndexRabinKarp[T []byte](s, sep T) int {
 275  	// Rabin-Karp search
 276  	hashss, pow := HashStr(sep)
 277  	n := len(sep)
 278  	var h uint32
 279  	for i := 0; i < n; i++ {
 280  		h = h*PrimeRK + uint32(s[i])
 281  	}
 282  	if h == hashss && []byte(s[:n]) == []byte(sep) {
 283  		return 0
 284  	}
 285  	for i := n; i < len(s); {
 286  		h *= PrimeRK
 287  		h += uint32(s[i])
 288  		h -= pow * uint32(s[i-n])
 289  		i++
 290  		if h == hashss && []byte(s[i-n:i]) == []byte(sep) {
 291  			return i - n
 292  		}
 293  	}
 294  	return -1
 295  }
 296  
 297  // MakeNoZero makes a slice of length and capacity n without zeroing the bytes.
 298  // It is the caller's responsibility to ensure uninitialized bytes
 299  // do not leak to the end user.
 300  func MakeNoZero(n int) []byte {
 301  	// Note: this does zero the buffer even though that's not necessary.
 302  	// For performance reasons we might want to change this (similar to the
 303  	// malloc function implemented in the runtime).
 304  	return []byte{:n}
 305  }
 306  
 307  // Copied from the Go 1.22rc1 source tree.
 308  func LastIndexByte(s []byte, c byte) int {
 309  	for i := len(s) - 1; i >= 0; i-- {
 310  		if s[i] == c {
 311  			return i
 312  		}
 313  	}
 314  	return -1
 315  }
 316  
 317  // Copied from the Go 1.22rc1 source tree.
 318  func LastIndexByteString(s []byte, c byte) int {
 319  	for i := len(s) - 1; i >= 0; i-- {
 320  		if s[i] == c {
 321  			return i
 322  		}
 323  	}
 324  	return -1
 325  }
 326  
 327  // LastIndexRabinKarp uses the Rabin-Karp search algorithm to return the last index of the
 328  // occurrence of sep in s, or -1 if not present.
 329  //
 330  // Copied from the Go 1.22rc1 source tree.
 331  func LastIndexRabinKarp[T []byte](s, sep T) int {
 332  	// Rabin-Karp search from the end of the string
 333  	hashss, pow := HashStrRev(sep)
 334  	n := len(sep)
 335  	last := len(s) - n
 336  	var h uint32
 337  	for i := len(s) - 1; i >= last; i-- {
 338  		h = h*PrimeRK + uint32(s[i])
 339  	}
 340  	if h == hashss && []byte(s[last:]) == []byte(sep) {
 341  		return last
 342  	}
 343  	for i := last - 1; i >= 0; i-- {
 344  		h *= PrimeRK
 345  		h += uint32(s[i])
 346  		h -= pow * uint32(s[i+n])
 347  		if h == hashss && []byte(s[i:i+n]) == []byte(sep) {
 348  			return i
 349  		}
 350  	}
 351  	return -1
 352  }
 353