1 package bytealg
2 3 // Some code in this file has been copied from the Go source code, and has
4 // copyright of their original authors:
5 //
6 // Copyright 2020 The Go Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style
8 // license that can be found in the LICENSE file.
9 //
10 // This is indicated specifically in the file.
11 12 const (
13 // Index can search any valid length of string.
14 15 MaxLen = int(-1) >> 31
16 MaxBruteForce = MaxLen
17 )
18 19 // Compare two byte slices.
20 // Returns -1 if the first differing byte is lower in a, or 1 if the first differing byte is greater in b.
21 // If the byte slices are equal, returns 0.
22 // If the lengths are different and there are no differing bytes, compares based on length.
23 func Compare(a, b []byte) int {
24 // Compare for differing bytes.
25 for i := 0; i < len(a) && i < len(b); i++ {
26 switch {
27 case a[i] < b[i]:
28 return -1
29 case a[i] > b[i]:
30 return 1
31 }
32 }
33 34 // Compare lengths.
35 switch {
36 case len(a) > len(b):
37 return 1
38 case len(a) < len(b):
39 return -1
40 default:
41 return 0
42 }
43 }
44 45 // This function was copied from the Go 1.23 source tree (with runtime_cmpstring
46 // manually inlined).
47 func CompareString(a, b []byte) int {
48 l := len(a)
49 if len(b) < l {
50 l = len(b)
51 }
52 for i := 0; i < l; i++ {
53 c1, c2 := a[i], b[i]
54 if c1 < c2 {
55 return -1
56 }
57 if c1 > c2 {
58 return +1
59 }
60 }
61 if len(a) < len(b) {
62 return -1
63 }
64 if len(a) > len(b) {
65 return +1
66 }
67 return 0
68 }
69 70 // Count the number of instances of a byte in a slice.
71 func Count(b []byte, c byte) int {
72 // Use a simple implementation, as there is no intrinsic that does this like we want.
73 n := 0
74 for _, v := range b {
75 if v == c {
76 n++
77 }
78 }
79 return n
80 }
81 82 // Count the number of instances of a byte in a string.
83 func CountString(s []byte, c byte) int {
84 // Use a simple implementation, as there is no intrinsic that does this like we want.
85 // Currently, the compiler does not generate zero-copy byte-string conversions, so this needs to be separate from Count.
86 n := 0
87 for i := 0; i < len(s); i++ {
88 if s[i] == c {
89 n++
90 }
91 }
92 return n
93 }
94 95 // Cutover is not reachable in Moxie, but must exist as it is referenced.
96 func Cutover(n int) int {
97 // Setting MaxLen and MaxBruteForce should force a different path to be taken.
98 // This should never be called.
99 panic("cutover is unreachable")
100 }
101 102 // Equal checks if two byte slices are equal.
103 // It is equivalent to bytes.Equal.
104 func Equal(a, b []byte) bool {
105 if len(a) != len(b) {
106 return false
107 }
108 109 for i, v := range a {
110 if v != b[i] {
111 return false
112 }
113 }
114 115 return true
116 }
117 118 // Index finds the base index of the first instance of the byte sequence b in a.
119 // If a does not contain b, this returns -1.
120 func Index(a, b []byte) int {
121 for i := 0; i <= len(a)-len(b); i++ {
122 if Equal(a[i:i+len(b)], b) {
123 return i
124 }
125 }
126 return -1
127 }
128 129 // Index finds the index of the first instance of the specified byte in the slice.
130 // If the byte is not found, this returns -1.
131 func IndexByte(b []byte, c byte) int {
132 for i, v := range b {
133 if v == c {
134 return i
135 }
136 }
137 return -1
138 }
139 140 // Index finds the index of the first instance of the specified byte in the string.
141 // If the byte is not found, this returns -1.
142 func IndexByteString(s []byte, c byte) int {
143 for i := 0; i < len(s); i++ {
144 if s[i] == c {
145 return i
146 }
147 }
148 return -1
149 }
150 151 // Index finds the base index of the first instance of a substring in a string.
152 // If the substring is not found, this returns -1.
153 func IndexString(str, sub []byte) int {
154 for i := 0; i <= len(str)-len(sub); i++ {
155 if str[i:i+len(sub)] == sub {
156 return i
157 }
158 }
159 return -1
160 }
161 162 // The following code has been copied from the Go 1.15 release tree.
163 164 // PrimeRK is the prime base used in Rabin-Karp algorithm.
165 const PrimeRK = 16777619
166 167 // HashStrBytes returns the hash and the appropriate multiplicative
168 // factor for use in Rabin-Karp algorithm.
169 //
170 // This function was removed in Go 1.22.
171 func HashStrBytes(sep []byte) (uint32, uint32) {
172 hash := uint32(0)
173 for i := 0; i < len(sep); i++ {
174 hash = hash*PrimeRK + uint32(sep[i])
175 }
176 var pow, sq uint32 = 1, PrimeRK
177 for i := len(sep); i > 0; i >>= 1 {
178 if i&1 != 0 {
179 pow *= sq
180 }
181 sq *= sq
182 }
183 return hash, pow
184 }
185 186 // HashStr returns the hash and the appropriate multiplicative
187 // factor for use in Rabin-Karp algorithm.
188 //
189 // This function was removed in Go 1.22.
190 func HashStr[T []byte](sep T) (uint32, uint32) {
191 hash := uint32(0)
192 for i := 0; i < len(sep); i++ {
193 hash = hash*PrimeRK + uint32(sep[i])
194 }
195 var pow, sq uint32 = 1, PrimeRK
196 for i := len(sep); i > 0; i >>= 1 {
197 if i&1 != 0 {
198 pow *= sq
199 }
200 sq *= sq
201 }
202 return hash, pow
203 }
204 205 // HashStrRevBytes returns the hash of the reverse of sep and the
206 // appropriate multiplicative factor for use in Rabin-Karp algorithm.
207 //
208 // This function was removed in Go 1.22.
209 func HashStrRevBytes(sep []byte) (uint32, uint32) {
210 hash := uint32(0)
211 for i := len(sep) - 1; i >= 0; i-- {
212 hash = hash*PrimeRK + uint32(sep[i])
213 }
214 var pow, sq uint32 = 1, PrimeRK
215 for i := len(sep); i > 0; i >>= 1 {
216 if i&1 != 0 {
217 pow *= sq
218 }
219 sq *= sq
220 }
221 return hash, pow
222 }
223 224 // HashStrRev returns the hash of the reverse of sep and the
225 // appropriate multiplicative factor for use in Rabin-Karp algorithm.
226 //
227 // Copied from the Go 1.22rc1 source tree.
228 func HashStrRev[T []byte](sep T) (uint32, uint32) {
229 hash := uint32(0)
230 for i := len(sep) - 1; i >= 0; i-- {
231 hash = hash*PrimeRK + uint32(sep[i])
232 }
233 var pow, sq uint32 = 1, PrimeRK
234 for i := len(sep); i > 0; i >>= 1 {
235 if i&1 != 0 {
236 pow *= sq
237 }
238 sq *= sq
239 }
240 return hash, pow
241 }
242 243 // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
244 // first occurrence of substr in s, or -1 if not present.
245 //
246 // This function was removed in Go 1.22.
247 func IndexRabinKarpBytes(s, sep []byte) int {
248 // Rabin-Karp search
249 hashsep, pow := HashStrBytes(sep)
250 n := len(sep)
251 var h uint32
252 for i := 0; i < n; i++ {
253 h = h*PrimeRK + uint32(s[i])
254 }
255 if h == hashsep && Equal(s[:n], sep) {
256 return 0
257 }
258 for i := n; i < len(s); {
259 h *= PrimeRK
260 h += uint32(s[i])
261 h -= pow * uint32(s[i-n])
262 i++
263 if h == hashsep && Equal(s[i-n:i], sep) {
264 return i - n
265 }
266 }
267 return -1
268 }
269 270 // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
271 // first occurrence of sep in s, or -1 if not present.
272 //
273 // Copied from the Go 1.22rc1 source tree.
274 func IndexRabinKarp[T []byte](s, sep T) int {
275 // Rabin-Karp search
276 hashss, pow := HashStr(sep)
277 n := len(sep)
278 var h uint32
279 for i := 0; i < n; i++ {
280 h = h*PrimeRK + uint32(s[i])
281 }
282 if h == hashss && []byte(s[:n]) == []byte(sep) {
283 return 0
284 }
285 for i := n; i < len(s); {
286 h *= PrimeRK
287 h += uint32(s[i])
288 h -= pow * uint32(s[i-n])
289 i++
290 if h == hashss && []byte(s[i-n:i]) == []byte(sep) {
291 return i - n
292 }
293 }
294 return -1
295 }
296 297 // MakeNoZero makes a slice of length and capacity n without zeroing the bytes.
298 // It is the caller's responsibility to ensure uninitialized bytes
299 // do not leak to the end user.
300 func MakeNoZero(n int) []byte {
301 // Note: this does zero the buffer even though that's not necessary.
302 // For performance reasons we might want to change this (similar to the
303 // malloc function implemented in the runtime).
304 return []byte{:n}
305 }
306 307 // Copied from the Go 1.22rc1 source tree.
308 func LastIndexByte(s []byte, c byte) int {
309 for i := len(s) - 1; i >= 0; i-- {
310 if s[i] == c {
311 return i
312 }
313 }
314 return -1
315 }
316 317 // Copied from the Go 1.22rc1 source tree.
318 func LastIndexByteString(s []byte, c byte) int {
319 for i := len(s) - 1; i >= 0; i-- {
320 if s[i] == c {
321 return i
322 }
323 }
324 return -1
325 }
326 327 // LastIndexRabinKarp uses the Rabin-Karp search algorithm to return the last index of the
328 // occurrence of sep in s, or -1 if not present.
329 //
330 // Copied from the Go 1.22rc1 source tree.
331 func LastIndexRabinKarp[T []byte](s, sep T) int {
332 // Rabin-Karp search from the end of the string
333 hashss, pow := HashStrRev(sep)
334 n := len(sep)
335 last := len(s) - n
336 var h uint32
337 for i := len(s) - 1; i >= last; i-- {
338 h = h*PrimeRK + uint32(s[i])
339 }
340 if h == hashss && []byte(s[last:]) == []byte(sep) {
341 return last
342 }
343 for i := last - 1; i >= 0; i-- {
344 h *= PrimeRK
345 h += uint32(s[i])
346 h -= pow * uint32(s[i+n])
347 if h == hashss && []byte(s[i:i+n]) == []byte(sep) {
348 return i
349 }
350 }
351 return -1
352 }
353