strings.mx raw

   1  // Copyright 2009 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package strings implements simple functions to manipulate UTF-8 encoded strings.
   6  //
   7  // For information about UTF-8 strings in Go, see https://blog.golang.org/strings.
   8  package strings
   9  
  10  import (
  11  	"internal/bytealg"
  12  	"internal/stringslite"
  13  	"math/bits"
  14  	"unicode"
  15  	"unicode/utf8"
  16  )
  17  
  18  const maxInt = int(^uint(0) >> 1)
  19  
  20  // explode splits s into a slice of UTF-8 strings,
  21  // one string per Unicode character up to a maximum of n (n < 0 means no limit).
  22  // Invalid UTF-8 bytes are sliced individually.
  23  func explode(s []byte, n int) [][]byte {
  24  	l := utf8.RuneCountInString(s)
  25  	if n < 0 || n > l {
  26  		n = l
  27  	}
  28  	a := [][]byte{:n}
  29  	for i := 0; i < n-1; i++ {
  30  		_, size := utf8.DecodeRuneInString(s)
  31  		a[i] = s[:size]
  32  		s = s[size:]
  33  	}
  34  	if n > 0 {
  35  		a[n-1] = s
  36  	}
  37  	return a
  38  }
  39  
  40  // Count counts the number of non-overlapping instances of substr in s.
  41  // If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
  42  func Count(s, substr []byte) int {
  43  	// special case
  44  	if len(substr) == 0 {
  45  		return utf8.RuneCountInString(s) + 1
  46  	}
  47  	if len(substr) == 1 {
  48  		return bytealg.CountString(s, substr[0])
  49  	}
  50  	n := 0
  51  	for {
  52  		i := Index(s, substr)
  53  		if i == -1 {
  54  			return n
  55  		}
  56  		n++
  57  		s = s[i+len(substr):]
  58  	}
  59  }
  60  
  61  // Contains reports whether substr is within s.
  62  func Contains(s, substr []byte) bool {
  63  	return Index(s, substr) >= 0
  64  }
  65  
  66  // ContainsAny reports whether any Unicode code points in chars are within s.
  67  func ContainsAny(s, chars []byte) bool {
  68  	return IndexAny(s, chars) >= 0
  69  }
  70  
  71  // ContainsRune reports whether the Unicode code point r is within s.
  72  func ContainsRune(s []byte, r rune) bool {
  73  	return IndexRune(s, r) >= 0
  74  }
  75  
  76  // ContainsFunc reports whether any Unicode code points r within s satisfy f(r).
  77  func ContainsFunc(s []byte, f func(rune) bool) bool {
  78  	return IndexFunc(s, f) >= 0
  79  }
  80  
  81  // LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s.
  82  func LastIndex(s, substr []byte) int {
  83  	n := len(substr)
  84  	switch {
  85  	case n == 0:
  86  		return len(s)
  87  	case n == 1:
  88  		return bytealg.LastIndexByteString(s, substr[0])
  89  	case n == len(s):
  90  		if substr == s {
  91  			return 0
  92  		}
  93  		return -1
  94  	case n > len(s):
  95  		return -1
  96  	}
  97  	// Rabin-Karp search from the end of the string
  98  	hashss, pow := bytealg.HashStrRev(substr)
  99  	last := len(s) - n
 100  	var h uint32
 101  	for i := len(s) - 1; i >= last; i-- {
 102  		h = h*bytealg.PrimeRK + uint32(s[i])
 103  	}
 104  	if h == hashss && s[last:] == substr {
 105  		return last
 106  	}
 107  	for i := last - 1; i >= 0; i-- {
 108  		h *= bytealg.PrimeRK
 109  		h += uint32(s[i])
 110  		h -= pow * uint32(s[i+n])
 111  		if h == hashss && s[i:i+n] == substr {
 112  			return i
 113  		}
 114  	}
 115  	return -1
 116  }
 117  
 118  // IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s.
 119  func IndexByte(s []byte, c byte) int {
 120  	return stringslite.IndexByte(s, c)
 121  }
 122  
 123  // IndexRune returns the index of the first instance of the Unicode code point
 124  // r, or -1 if rune is not present in s.
 125  // If r is [utf8.RuneError], it returns the first instance of any
 126  // invalid UTF-8 byte sequence.
 127  func IndexRune(s []byte, r rune) int {
 128  	const haveFastIndex = bytealg.MaxBruteForce > 0
 129  	switch {
 130  	case 0 <= r && r < utf8.RuneSelf:
 131  		return IndexByte(s, byte(r))
 132  	case r == utf8.RuneError:
 133  		for i, r := range s {
 134  			if r == utf8.RuneError {
 135  				return i
 136  			}
 137  		}
 138  		return -1
 139  	case !utf8.ValidRune(r):
 140  		return -1
 141  	default:
 142  		// Search for rune r using the last byte of its UTF-8 encoded form.
 143  		// The distribution of the last byte is more uniform compared to the
 144  		// first byte which has a 78% chance of being [240, 243, 244].
 145  		rs := []byte(r)
 146  		last := len(rs) - 1
 147  		i := last
 148  		fails := 0
 149  		for i < len(s) {
 150  			if s[i] != rs[last] {
 151  				o := IndexByte(s[i+1:], rs[last])
 152  				if o < 0 {
 153  					return -1
 154  				}
 155  				i += o + 1
 156  			}
 157  			// Step backwards comparing bytes.
 158  			for j := 1; j < len(rs); j++ {
 159  				if s[i-j] != rs[last-j] {
 160  					goto next
 161  				}
 162  			}
 163  			return i - last
 164  		next:
 165  			fails++
 166  			i++
 167  			if (haveFastIndex && fails > bytealg.Cutover(i)) && i < len(s) ||
 168  				(!haveFastIndex && fails >= 4+i>>4 && i < len(s)) {
 169  				goto fallback
 170  			}
 171  		}
 172  		return -1
 173  
 174  	fallback:
 175  		// see comment in ../bytes/bytes.go
 176  		if haveFastIndex {
 177  			if j := bytealg.IndexString(s[i-last:], []byte(r)); j >= 0 {
 178  				return i + j - last
 179  			}
 180  		} else {
 181  			c0 := rs[last]
 182  			c1 := rs[last-1]
 183  		loop:
 184  			for ; i < len(s); i++ {
 185  				if s[i] == c0 && s[i-1] == c1 {
 186  					for k := 2; k < len(rs); k++ {
 187  						if s[i-k] != rs[last-k] {
 188  							continue loop
 189  						}
 190  					}
 191  					return i - last
 192  				}
 193  			}
 194  		}
 195  		return -1
 196  	}
 197  }
 198  
 199  // IndexAny returns the index of the first instance of any Unicode code point
 200  // from chars in s, or -1 if no Unicode code point from chars is present in s.
 201  func IndexAny(s, chars []byte) int {
 202  	if chars == "" {
 203  		// Avoid scanning all of s.
 204  		return -1
 205  	}
 206  	if len(chars) == 1 {
 207  		// Avoid scanning all of s.
 208  		r := rune(chars[0])
 209  		if r >= utf8.RuneSelf {
 210  			r = utf8.RuneError
 211  		}
 212  		return IndexRune(s, r)
 213  	}
 214  	if len(s) > 8 {
 215  		if as, isASCII := makeASCIISet(chars); isASCII {
 216  			for i := 0; i < len(s); i++ {
 217  				if as.contains(s[i]) {
 218  					return i
 219  				}
 220  			}
 221  			return -1
 222  		}
 223  	}
 224  	for i, c := range s {
 225  		if IndexRune(chars, c) >= 0 {
 226  			return i
 227  		}
 228  	}
 229  	return -1
 230  }
 231  
 232  // LastIndexAny returns the index of the last instance of any Unicode code
 233  // point from chars in s, or -1 if no Unicode code point from chars is
 234  // present in s.
 235  func LastIndexAny(s, chars []byte) int {
 236  	if chars == "" {
 237  		// Avoid scanning all of s.
 238  		return -1
 239  	}
 240  	if len(s) == 1 {
 241  		rc := rune(s[0])
 242  		if rc >= utf8.RuneSelf {
 243  			rc = utf8.RuneError
 244  		}
 245  		if IndexRune(chars, rc) >= 0 {
 246  			return 0
 247  		}
 248  		return -1
 249  	}
 250  	if len(s) > 8 {
 251  		if as, isASCII := makeASCIISet(chars); isASCII {
 252  			for i := len(s) - 1; i >= 0; i-- {
 253  				if as.contains(s[i]) {
 254  					return i
 255  				}
 256  			}
 257  			return -1
 258  		}
 259  	}
 260  	if len(chars) == 1 {
 261  		rc := rune(chars[0])
 262  		if rc >= utf8.RuneSelf {
 263  			rc = utf8.RuneError
 264  		}
 265  		for i := len(s); i > 0; {
 266  			r, size := utf8.DecodeLastRuneInString(s[:i])
 267  			i -= size
 268  			if rc == r {
 269  				return i
 270  			}
 271  		}
 272  		return -1
 273  	}
 274  	for i := len(s); i > 0; {
 275  		r, size := utf8.DecodeLastRuneInString(s[:i])
 276  		i -= size
 277  		if IndexRune(chars, r) >= 0 {
 278  			return i
 279  		}
 280  	}
 281  	return -1
 282  }
 283  
 284  // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
 285  func LastIndexByte(s []byte, c byte) int {
 286  	return bytealg.LastIndexByteString(s, c)
 287  }
 288  
 289  // Generic split: splits after each instance of sep,
 290  // including sepSave bytes of sep in the subarrays.
 291  func genSplit(s, sep []byte, sepSave, n int) [][]byte {
 292  	if n == 0 {
 293  		return nil
 294  	}
 295  	if sep == "" {
 296  		return explode(s, n)
 297  	}
 298  	if n < 0 {
 299  		n = Count(s, sep) + 1
 300  	}
 301  
 302  	if n > len(s)+1 {
 303  		n = len(s) + 1
 304  	}
 305  	a := [][]byte{:n}
 306  	n--
 307  	i := 0
 308  	for i < n {
 309  		m := Index(s, sep)
 310  		if m < 0 {
 311  			break
 312  		}
 313  		a[i] = s[:m+sepSave]
 314  		s = s[m+len(sep):]
 315  		i++
 316  	}
 317  	a[i] = s
 318  	return a[:i+1]
 319  }
 320  
 321  // SplitN slices s into substrings separated by sep and returns a slice of
 322  // the substrings between those separators.
 323  //
 324  // The count determines the number of substrings to return:
 325  //   - n > 0: at most n substrings; the last substring will be the unsplit remainder;
 326  //   - n == 0: the result is nil (zero substrings);
 327  //   - n < 0: all substrings.
 328  //
 329  // Edge cases for s and sep (for example, empty strings) are handled
 330  // as described in the documentation for [Split].
 331  //
 332  // To split around the first instance of a separator, see [Cut].
 333  func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
 334  
 335  // SplitAfterN slices s into substrings after each instance of sep and
 336  // returns a slice of those substrings.
 337  //
 338  // The count determines the number of substrings to return:
 339  //   - n > 0: at most n substrings; the last substring will be the unsplit remainder;
 340  //   - n == 0: the result is nil (zero substrings);
 341  //   - n < 0: all substrings.
 342  //
 343  // Edge cases for s and sep (for example, empty strings) are handled
 344  // as described in the documentation for [SplitAfter].
 345  func SplitAfterN(s, sep []byte, n int) [][]byte {
 346  	return genSplit(s, sep, len(sep), n)
 347  }
 348  
 349  // Split slices s into all substrings separated by sep and returns a slice of
 350  // the substrings between those separators.
 351  //
 352  // If s does not contain sep and sep is not empty, Split returns a
 353  // slice of length 1 whose only element is s.
 354  //
 355  // If sep is empty, Split splits after each UTF-8 sequence. If both s
 356  // and sep are empty, Split returns an empty slice.
 357  //
 358  // It is equivalent to [SplitN] with a count of -1.
 359  //
 360  // To split around the first instance of a separator, see [Cut].
 361  func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
 362  
 363  // SplitAfter slices s into all substrings after each instance of sep and
 364  // returns a slice of those substrings.
 365  //
 366  // If s does not contain sep and sep is not empty, SplitAfter returns
 367  // a slice of length 1 whose only element is s.
 368  //
 369  // If sep is empty, SplitAfter splits after each UTF-8 sequence. If
 370  // both s and sep are empty, SplitAfter returns an empty slice.
 371  //
 372  // It is equivalent to [SplitAfterN] with a count of -1.
 373  func SplitAfter(s, sep []byte) [][]byte {
 374  	return genSplit(s, sep, len(sep), -1)
 375  }
 376  
 377  var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
 378  
 379  // Fields splits the string s around each instance of one or more consecutive white space
 380  // characters, as defined by [unicode.IsSpace], returning a slice of substrings of s or an
 381  // empty slice if s contains only white space. Every element of the returned slice is
 382  // non-empty. Unlike [Split], leading and trailing runs runs of white space characters
 383  // are discarded.
 384  func Fields(s []byte) [][]byte {
 385  	// First count the fields.
 386  	// This is an exact count if s is ASCII, otherwise it is an approximation.
 387  	n := 0
 388  	wasSpace := 1
 389  	// setBits is used to track which bits are set in the bytes of s.
 390  	setBits := uint8(0)
 391  	for i := 0; i < len(s); i++ {
 392  		r := s[i]
 393  		setBits |= r
 394  		isSpace := int(asciiSpace[r])
 395  		n += wasSpace & ^isSpace
 396  		wasSpace = isSpace
 397  	}
 398  
 399  	if setBits >= utf8.RuneSelf {
 400  		// Some runes in the input string are not ASCII.
 401  		return FieldsFunc(s, unicode.IsSpace)
 402  	}
 403  	// ASCII fast path
 404  	a := [][]byte{:n}
 405  	na := 0
 406  	fieldStart := 0
 407  	i := 0
 408  	// Skip spaces in the front of the input.
 409  	for i < len(s) && asciiSpace[s[i]] != 0 {
 410  		i++
 411  	}
 412  	fieldStart = i
 413  	for i < len(s) {
 414  		if asciiSpace[s[i]] == 0 {
 415  			i++
 416  			continue
 417  		}
 418  		a[na] = s[fieldStart:i]
 419  		na++
 420  		i++
 421  		// Skip spaces in between fields.
 422  		for i < len(s) && asciiSpace[s[i]] != 0 {
 423  			i++
 424  		}
 425  		fieldStart = i
 426  	}
 427  	if fieldStart < len(s) { // Last field might end at EOF.
 428  		a[na] = s[fieldStart:]
 429  	}
 430  	return a
 431  }
 432  
 433  // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
 434  // and returns an array of slices of s. If all code points in s satisfy f(c) or the
 435  // string is empty, an empty slice is returned. Every element of the returned slice is
 436  // non-empty. Unlike [SplitFunc], leading and trailing runs of code points satisfying f(c)
 437  // are discarded.
 438  //
 439  // FieldsFunc makes no guarantees about the order in which it calls f(c)
 440  // and assumes that f always returns the same value for a given c.
 441  func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
 442  	// A span is used to record a slice of s of the form s[start:end].
 443  	// The start index is inclusive and the end index is exclusive.
 444  	type span struct {
 445  		start int
 446  		end   int
 447  	}
 448  	spans := []span{:0:32}
 449  
 450  	// Find the field start and end indices.
 451  	// Doing this in a separate pass (rather than slicing the string s
 452  	// and collecting the result substrings right away) is significantly
 453  	// more efficient, possibly due to cache effects.
 454  	start := -1 // valid span start if >= 0
 455  	for end, rune := range s {
 456  		if f(rune) {
 457  			if start >= 0 {
 458  				spans = append(spans, span{start, end})
 459  				// Set start to a negative value.
 460  				// Note: using -1 here consistently and reproducibly
 461  				// slows down this code by a several percent on amd64.
 462  				start = ^start
 463  			}
 464  		} else {
 465  			if start < 0 {
 466  				start = end
 467  			}
 468  		}
 469  	}
 470  
 471  	// Last field might end at EOF.
 472  	if start >= 0 {
 473  		spans = append(spans, span{start, len(s)})
 474  	}
 475  
 476  	// Create strings from recorded field indices.
 477  	a := [][]byte{:len(spans)}
 478  	for i, span := range spans {
 479  		a[i] = s[span.start:span.end]
 480  	}
 481  
 482  	return a
 483  }
 484  
 485  // Join concatenates the elements of its first argument to create a single string. The separator
 486  // string sep is placed between elements in the resulting string.
 487  func Join(elems [][]byte, sep []byte) []byte {
 488  	switch len(elems) {
 489  	case 0:
 490  		return ""
 491  	case 1:
 492  		return elems[0]
 493  	}
 494  
 495  	var n int
 496  	if len(sep) > 0 {
 497  		if len(sep) >= maxInt/(len(elems)-1) {
 498  			panic("strings: Join output length overflow")
 499  		}
 500  		n += len(sep) * (len(elems) - 1)
 501  	}
 502  	for _, elem := range elems {
 503  		if len(elem) > maxInt-n {
 504  			panic("strings: Join output length overflow")
 505  		}
 506  		n += len(elem)
 507  	}
 508  
 509  	var b Builder
 510  	b.Grow(n)
 511  	b.WriteString(elems[0])
 512  	for _, s := range elems[1:] {
 513  		b.WriteString(sep)
 514  		b.WriteString(s)
 515  	}
 516  	return b.String()
 517  }
 518  
 519  // HasPrefix reports whether the string s begins with prefix.
 520  func HasPrefix(s, prefix []byte) bool {
 521  	return stringslite.HasPrefix(s, prefix)
 522  }
 523  
 524  // HasSuffix reports whether the string s ends with suffix.
 525  func HasSuffix(s, suffix []byte) bool {
 526  	return stringslite.HasSuffix(s, suffix)
 527  }
 528  
 529  // Map returns a copy of the string s with all its characters modified
 530  // according to the mapping function. If mapping returns a negative value, the character is
 531  // dropped from the string with no replacement.
 532  func Map(mapping func(rune) rune, s []byte) []byte {
 533  	// In the worst case, the string can grow when mapped, making
 534  	// things unpleasant. But it's so rare we barge in assuming it's
 535  	// fine. It could also shrink but that falls out naturally.
 536  
 537  	// The output buffer b is initialized on demand, the first
 538  	// time a character differs.
 539  	var b Builder
 540  
 541  	for i, c := range s {
 542  		r := mapping(c)
 543  		if r == c && c != utf8.RuneError {
 544  			continue
 545  		}
 546  
 547  		var width int
 548  		if c == utf8.RuneError {
 549  			c, width = utf8.DecodeRuneInString(s[i:])
 550  			if width != 1 && r == c {
 551  				continue
 552  			}
 553  		} else {
 554  			width = utf8.RuneLen(c)
 555  		}
 556  
 557  		b.Grow(len(s) + utf8.UTFMax)
 558  		b.WriteString(s[:i])
 559  		if r >= 0 {
 560  			b.WriteRune(r)
 561  		}
 562  
 563  		s = s[i+width:]
 564  		break
 565  	}
 566  
 567  	// Fast path for unchanged input
 568  	if b.Cap() == 0 { // didn't call b.Grow above
 569  		return s
 570  	}
 571  
 572  	for _, c := range s {
 573  		r := mapping(c)
 574  
 575  		if r >= 0 {
 576  			// common case
 577  			// Due to inlining, it is more performant to determine if WriteByte should be
 578  			// invoked rather than always call WriteRune
 579  			if r < utf8.RuneSelf {
 580  				b.WriteByte(byte(r))
 581  			} else {
 582  				// r is not an ASCII rune.
 583  				b.WriteRune(r)
 584  			}
 585  		}
 586  	}
 587  
 588  	return b.String()
 589  }
 590  
 591  // According to static analysis, spaces, dashes, zeros, equals, and tabs
 592  // are the most commonly repeated string literal,
 593  // often used for display on fixed-width terminal windows.
 594  // Pre-declare constants for these for O(1) repetition in the common-case.
 595  const (
 596  	repeatedSpaces = "" +
 597  		"                                                                " +
 598  		"                                                                "
 599  	repeatedDashes = "" +
 600  		"----------------------------------------------------------------" +
 601  		"----------------------------------------------------------------"
 602  	repeatedZeroes = "" +
 603  		"0000000000000000000000000000000000000000000000000000000000000000"
 604  	repeatedEquals = "" +
 605  		"================================================================" +
 606  		"================================================================"
 607  	repeatedTabs = "" +
 608  		"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" +
 609  		"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
 610  )
 611  
 612  // Repeat returns a new string consisting of count copies of the string s.
 613  //
 614  // It panics if count is negative or if the result of (len(s) * count)
 615  // overflows.
 616  func Repeat(s []byte, count int) []byte {
 617  	switch count {
 618  	case 0:
 619  		return ""
 620  	case 1:
 621  		return s
 622  	}
 623  
 624  	// Since we cannot return an error on overflow,
 625  	// we should panic if the repeat will generate an overflow.
 626  	// See golang.org/issue/16237.
 627  	if count < 0 {
 628  		panic("strings: negative Repeat count")
 629  	}
 630  	hi, lo := bits.Mul(uint(len(s)), uint(count))
 631  	if hi > 0 || lo > uint(maxInt) {
 632  		panic("strings: Repeat output length overflow")
 633  	}
 634  	n := int(lo) // lo = len(s) * count
 635  
 636  	if len(s) == 0 {
 637  		return ""
 638  	}
 639  
 640  	// Optimize for commonly repeated strings of relatively short length.
 641  	switch s[0] {
 642  	case ' ', '-', '0', '=', '\t':
 643  		switch {
 644  		case n <= len(repeatedSpaces) && HasPrefix(repeatedSpaces, s):
 645  			return repeatedSpaces[:n]
 646  		case n <= len(repeatedDashes) && HasPrefix(repeatedDashes, s):
 647  			return repeatedDashes[:n]
 648  		case n <= len(repeatedZeroes) && HasPrefix(repeatedZeroes, s):
 649  			return repeatedZeroes[:n]
 650  		case n <= len(repeatedEquals) && HasPrefix(repeatedEquals, s):
 651  			return repeatedEquals[:n]
 652  		case n <= len(repeatedTabs) && HasPrefix(repeatedTabs, s):
 653  			return repeatedTabs[:n]
 654  		}
 655  	}
 656  
 657  	// Past a certain chunk size it is counterproductive to use
 658  	// larger chunks as the source of the write, as when the source
 659  	// is too large we are basically just thrashing the CPU D-cache.
 660  	// So if the result length is larger than an empirically-found
 661  	// limit (8KB), we stop growing the source string once the limit
 662  	// is reached and keep reusing the same source string - that
 663  	// should therefore be always resident in the L1 cache - until we
 664  	// have completed the construction of the result.
 665  	// This yields significant speedups (up to +100%) in cases where
 666  	// the result length is large (roughly, over L2 cache size).
 667  	const chunkLimit = 8 * 1024
 668  	chunkMax := n
 669  	if n > chunkLimit {
 670  		chunkMax = chunkLimit / len(s) * len(s)
 671  		if chunkMax == 0 {
 672  			chunkMax = len(s)
 673  		}
 674  	}
 675  
 676  	var b Builder
 677  	b.Grow(n)
 678  	b.WriteString(s)
 679  	for b.Len() < n {
 680  		chunk := min(n-b.Len(), b.Len(), chunkMax)
 681  		b.WriteString(b.String()[:chunk])
 682  	}
 683  	return b.String()
 684  }
 685  
 686  // ToUpper returns s with all Unicode letters mapped to their upper case.
 687  func ToUpper(s []byte) []byte {
 688  	isASCII, hasLower := true, false
 689  	for i := 0; i < len(s); i++ {
 690  		c := s[i]
 691  		if c >= utf8.RuneSelf {
 692  			isASCII = false
 693  			break
 694  		}
 695  		hasLower = hasLower || ('a' <= c && c <= 'z')
 696  	}
 697  
 698  	if isASCII { // optimize for ASCII-only strings.
 699  		if !hasLower {
 700  			return s
 701  		}
 702  		var (
 703  			b   Builder
 704  			pos int
 705  		)
 706  		b.Grow(len(s))
 707  		for i := 0; i < len(s); i++ {
 708  			c := s[i]
 709  			if 'a' <= c && c <= 'z' {
 710  				c -= 'a' - 'A'
 711  				if pos < i {
 712  					b.WriteString(s[pos:i])
 713  				}
 714  				b.WriteByte(c)
 715  				pos = i + 1
 716  			}
 717  		}
 718  		if pos < len(s) {
 719  			b.WriteString(s[pos:])
 720  		}
 721  		return b.String()
 722  	}
 723  	return Map(unicode.ToUpper, s)
 724  }
 725  
 726  // ToLower returns s with all Unicode letters mapped to their lower case.
 727  func ToLower(s []byte) []byte {
 728  	isASCII, hasUpper := true, false
 729  	for i := 0; i < len(s); i++ {
 730  		c := s[i]
 731  		if c >= utf8.RuneSelf {
 732  			isASCII = false
 733  			break
 734  		}
 735  		hasUpper = hasUpper || ('A' <= c && c <= 'Z')
 736  	}
 737  
 738  	if isASCII { // optimize for ASCII-only strings.
 739  		if !hasUpper {
 740  			return s
 741  		}
 742  		var (
 743  			b   Builder
 744  			pos int
 745  		)
 746  		b.Grow(len(s))
 747  		for i := 0; i < len(s); i++ {
 748  			c := s[i]
 749  			if 'A' <= c && c <= 'Z' {
 750  				c += 'a' - 'A'
 751  				if pos < i {
 752  					b.WriteString(s[pos:i])
 753  				}
 754  				b.WriteByte(c)
 755  				pos = i + 1
 756  			}
 757  		}
 758  		if pos < len(s) {
 759  			b.WriteString(s[pos:])
 760  		}
 761  		return b.String()
 762  	}
 763  	return Map(unicode.ToLower, s)
 764  }
 765  
 766  // ToTitle returns a copy of the string s with all Unicode letters mapped to
 767  // their Unicode title case.
 768  func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
 769  
 770  // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
 771  // upper case using the case mapping specified by c.
 772  func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
 773  	return Map(c.ToUpper, s)
 774  }
 775  
 776  // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
 777  // lower case using the case mapping specified by c.
 778  func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
 779  	return Map(c.ToLower, s)
 780  }
 781  
 782  // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
 783  // Unicode title case, giving priority to the special casing rules.
 784  func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
 785  	return Map(c.ToTitle, s)
 786  }
 787  
 788  // ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences
 789  // replaced by the replacement string, which may be empty.
 790  func ToValidUTF8(s, replacement []byte) []byte {
 791  	var b Builder
 792  
 793  	for i, c := range s {
 794  		if c != utf8.RuneError {
 795  			continue
 796  		}
 797  
 798  		_, wid := utf8.DecodeRuneInString(s[i:])
 799  		if wid == 1 {
 800  			b.Grow(len(s) + len(replacement))
 801  			b.WriteString(s[:i])
 802  			s = s[i:]
 803  			break
 804  		}
 805  	}
 806  
 807  	// Fast path for unchanged input
 808  	if b.Cap() == 0 { // didn't call b.Grow above
 809  		return s
 810  	}
 811  
 812  	invalid := false // previous byte was from an invalid UTF-8 sequence
 813  	for i := 0; i < len(s); {
 814  		c := s[i]
 815  		if c < utf8.RuneSelf {
 816  			i++
 817  			invalid = false
 818  			b.WriteByte(c)
 819  			continue
 820  		}
 821  		_, wid := utf8.DecodeRuneInString(s[i:])
 822  		if wid == 1 {
 823  			i++
 824  			if !invalid {
 825  				invalid = true
 826  				b.WriteString(replacement)
 827  			}
 828  			continue
 829  		}
 830  		invalid = false
 831  		b.WriteString(s[i : i+wid])
 832  		i += wid
 833  	}
 834  
 835  	return b.String()
 836  }
 837  
 838  // isSeparator reports whether the rune could mark a word boundary.
 839  // TODO: update when package unicode captures more of the properties.
 840  func isSeparator(r rune) bool {
 841  	// ASCII alphanumerics and underscore are not separators
 842  	if r <= 0x7F {
 843  		switch {
 844  		case '0' <= r && r <= '9':
 845  			return false
 846  		case 'a' <= r && r <= 'z':
 847  			return false
 848  		case 'A' <= r && r <= 'Z':
 849  			return false
 850  		case r == '_':
 851  			return false
 852  		}
 853  		return true
 854  	}
 855  	// Letters and digits are not separators
 856  	if unicode.IsLetter(r) || unicode.IsDigit(r) {
 857  		return false
 858  	}
 859  	// Otherwise, all we can do for now is treat spaces as separators.
 860  	return unicode.IsSpace(r)
 861  }
 862  
 863  // Title returns a copy of the string s with all Unicode letters that begin words
 864  // mapped to their Unicode title case.
 865  //
 866  // Deprecated: The rule Title uses for word boundaries does not handle Unicode
 867  // punctuation properly. Use golang.org/x/text/cases instead.
 868  func Title(s []byte) []byte {
 869  	// Use a closure here to remember state.
 870  	// Hackish but effective. Depends on Map scanning in order and calling
 871  	// the closure once per rune.
 872  	prev := ' '
 873  	return Map(
 874  		func(r rune) rune {
 875  			if isSeparator(prev) {
 876  				prev = r
 877  				return unicode.ToTitle(r)
 878  			}
 879  			prev = r
 880  			return r
 881  		},
 882  		s)
 883  }
 884  
 885  // TrimLeftFunc returns a slice of the string s with all leading
 886  // Unicode code points c satisfying f(c) removed.
 887  func TrimLeftFunc(s []byte, f func(rune) bool) []byte {
 888  	i := indexFunc(s, f, false)
 889  	if i == -1 {
 890  		return ""
 891  	}
 892  	return s[i:]
 893  }
 894  
 895  // TrimRightFunc returns a slice of the string s with all trailing
 896  // Unicode code points c satisfying f(c) removed.
 897  func TrimRightFunc(s []byte, f func(rune) bool) []byte {
 898  	i := lastIndexFunc(s, f, false)
 899  	if i >= 0 && s[i] >= utf8.RuneSelf {
 900  		_, wid := utf8.DecodeRuneInString(s[i:])
 901  		i += wid
 902  	} else {
 903  		i++
 904  	}
 905  	return s[0:i]
 906  }
 907  
 908  // TrimFunc returns a slice of the string s with all leading
 909  // and trailing Unicode code points c satisfying f(c) removed.
 910  func TrimFunc(s []byte, f func(rune) bool) []byte {
 911  	return TrimRightFunc(TrimLeftFunc(s, f), f)
 912  }
 913  
 914  // IndexFunc returns the index into s of the first Unicode
 915  // code point satisfying f(c), or -1 if none do.
 916  func IndexFunc(s []byte, f func(rune) bool) int {
 917  	return indexFunc(s, f, true)
 918  }
 919  
 920  // LastIndexFunc returns the index into s of the last
 921  // Unicode code point satisfying f(c), or -1 if none do.
 922  func LastIndexFunc(s []byte, f func(rune) bool) int {
 923  	return lastIndexFunc(s, f, true)
 924  }
 925  
 926  // indexFunc is the same as IndexFunc except that if
 927  // truth==false, the sense of the predicate function is
 928  // inverted.
 929  func indexFunc(s []byte, f func(rune) bool, truth bool) int {
 930  	for i, r := range s {
 931  		if f(r) == truth {
 932  			return i
 933  		}
 934  	}
 935  	return -1
 936  }
 937  
 938  // lastIndexFunc is the same as LastIndexFunc except that if
 939  // truth==false, the sense of the predicate function is
 940  // inverted.
 941  func lastIndexFunc(s []byte, f func(rune) bool, truth bool) int {
 942  	for i := len(s); i > 0; {
 943  		r, size := utf8.DecodeLastRuneInString(s[0:i])
 944  		i -= size
 945  		if f(r) == truth {
 946  			return i
 947  		}
 948  	}
 949  	return -1
 950  }
 951  
 952  // asciiSet is a 32-byte value, where each bit represents the presence of a
 953  // given ASCII character in the set. The 128-bits of the lower 16 bytes,
 954  // starting with the least-significant bit of the lowest word to the
 955  // most-significant bit of the highest word, map to the full range of all
 956  // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
 957  // ensuring that any non-ASCII character will be reported as not in the set.
 958  // This allocates a total of 32 bytes even though the upper half
 959  // is unused to avoid bounds checks in asciiSet.contains.
 960  type asciiSet [8]uint32
 961  
 962  // makeASCIISet creates a set of ASCII characters and reports whether all
 963  // characters in chars are ASCII.
 964  func makeASCIISet(chars []byte) (as asciiSet, ok bool) {
 965  	for i := 0; i < len(chars); i++ {
 966  		c := chars[i]
 967  		if c >= utf8.RuneSelf {
 968  			return as, false
 969  		}
 970  		as[c/32] |= 1 << (c % 32)
 971  	}
 972  	return as, true
 973  }
 974  
 975  // contains reports whether c is inside the set.
 976  func (as *asciiSet) contains(c byte) bool {
 977  	return (as[c/32] & (1 << (c % 32))) != 0
 978  }
 979  
 980  // Trim returns a slice of the string s with all leading and
 981  // trailing Unicode code points contained in cutset removed.
 982  func Trim(s, cutset []byte) []byte {
 983  	if s == "" || cutset == "" {
 984  		return s
 985  	}
 986  	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
 987  		return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0])
 988  	}
 989  	if as, ok := makeASCIISet(cutset); ok {
 990  		return trimLeftASCII(trimRightASCII(s, &as), &as)
 991  	}
 992  	return trimLeftUnicode(trimRightUnicode(s, cutset), cutset)
 993  }
 994  
 995  // TrimLeft returns a slice of the string s with all leading
 996  // Unicode code points contained in cutset removed.
 997  //
 998  // To remove a prefix, use [TrimPrefix] instead.
 999  func TrimLeft(s, cutset []byte) []byte {
1000  	if s == "" || cutset == "" {
1001  		return s
1002  	}
1003  	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
1004  		return trimLeftByte(s, cutset[0])
1005  	}
1006  	if as, ok := makeASCIISet(cutset); ok {
1007  		return trimLeftASCII(s, &as)
1008  	}
1009  	return trimLeftUnicode(s, cutset)
1010  }
1011  
1012  func trimLeftByte(s []byte, c byte) []byte {
1013  	for len(s) > 0 && s[0] == c {
1014  		s = s[1:]
1015  	}
1016  	return s
1017  }
1018  
1019  func trimLeftASCII(s []byte, as *asciiSet) []byte {
1020  	for len(s) > 0 {
1021  		if !as.contains(s[0]) {
1022  			break
1023  		}
1024  		s = s[1:]
1025  	}
1026  	return s
1027  }
1028  
1029  func trimLeftUnicode(s, cutset []byte) []byte {
1030  	for len(s) > 0 {
1031  		r, n := rune(s[0]), 1
1032  		if r >= utf8.RuneSelf {
1033  			r, n = utf8.DecodeRuneInString(s)
1034  		}
1035  		if !ContainsRune(cutset, r) {
1036  			break
1037  		}
1038  		s = s[n:]
1039  	}
1040  	return s
1041  }
1042  
1043  // TrimRight returns a slice of the string s, with all trailing
1044  // Unicode code points contained in cutset removed.
1045  //
1046  // To remove a suffix, use [TrimSuffix] instead.
1047  func TrimRight(s, cutset []byte) []byte {
1048  	if s == "" || cutset == "" {
1049  		return s
1050  	}
1051  	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
1052  		return trimRightByte(s, cutset[0])
1053  	}
1054  	if as, ok := makeASCIISet(cutset); ok {
1055  		return trimRightASCII(s, &as)
1056  	}
1057  	return trimRightUnicode(s, cutset)
1058  }
1059  
1060  func trimRightByte(s []byte, c byte) []byte {
1061  	for len(s) > 0 && s[len(s)-1] == c {
1062  		s = s[:len(s)-1]
1063  	}
1064  	return s
1065  }
1066  
1067  func trimRightASCII(s []byte, as *asciiSet) []byte {
1068  	for len(s) > 0 {
1069  		if !as.contains(s[len(s)-1]) {
1070  			break
1071  		}
1072  		s = s[:len(s)-1]
1073  	}
1074  	return s
1075  }
1076  
1077  func trimRightUnicode(s, cutset []byte) []byte {
1078  	for len(s) > 0 {
1079  		r, n := rune(s[len(s)-1]), 1
1080  		if r >= utf8.RuneSelf {
1081  			r, n = utf8.DecodeLastRuneInString(s)
1082  		}
1083  		if !ContainsRune(cutset, r) {
1084  			break
1085  		}
1086  		s = s[:len(s)-n]
1087  	}
1088  	return s
1089  }
1090  
1091  // TrimSpace returns a slice of the string s, with all leading
1092  // and trailing white space removed, as defined by Unicode.
1093  func TrimSpace(s []byte) []byte {
1094  	// Fast path for ASCII: look for the first ASCII non-space byte
1095  	start := 0
1096  	for ; start < len(s); start++ {
1097  		c := s[start]
1098  		if c >= utf8.RuneSelf {
1099  			// If we run into a non-ASCII byte, fall back to the
1100  			// slower unicode-aware method on the remaining bytes
1101  			return TrimFunc(s[start:], unicode.IsSpace)
1102  		}
1103  		if asciiSpace[c] == 0 {
1104  			break
1105  		}
1106  	}
1107  
1108  	// Now look for the first ASCII non-space byte from the end
1109  	stop := len(s)
1110  	for ; stop > start; stop-- {
1111  		c := s[stop-1]
1112  		if c >= utf8.RuneSelf {
1113  			// start has been already trimmed above, should trim end only
1114  			return TrimRightFunc(s[start:stop], unicode.IsSpace)
1115  		}
1116  		if asciiSpace[c] == 0 {
1117  			break
1118  		}
1119  	}
1120  
1121  	// At this point s[start:stop] starts and ends with an ASCII
1122  	// non-space bytes, so we're done. Non-ASCII cases have already
1123  	// been handled above.
1124  	return s[start:stop]
1125  }
1126  
1127  // TrimPrefix returns s without the provided leading prefix string.
1128  // If s doesn't start with prefix, s is returned unchanged.
1129  func TrimPrefix(s, prefix []byte) []byte {
1130  	return stringslite.TrimPrefix(s, prefix)
1131  }
1132  
1133  // TrimSuffix returns s without the provided trailing suffix string.
1134  // If s doesn't end with suffix, s is returned unchanged.
1135  func TrimSuffix(s, suffix []byte) []byte {
1136  	return stringslite.TrimSuffix(s, suffix)
1137  }
1138  
1139  // Replace returns a copy of the string s with the first n
1140  // non-overlapping instances of old replaced by new.
1141  // If old is empty, it matches at the beginning of the string
1142  // and after each UTF-8 sequence, yielding up to k+1 replacements
1143  // for a k-rune string.
1144  // If n < 0, there is no limit on the number of replacements.
1145  func Replace(s, old, new []byte, n int) []byte {
1146  	if old == new || n == 0 {
1147  		return s // avoid allocation
1148  	}
1149  
1150  	// Compute number of replacements.
1151  	if m := Count(s, old); m == 0 {
1152  		return s // avoid allocation
1153  	} else if n < 0 || m < n {
1154  		n = m
1155  	}
1156  
1157  	// Apply replacements to buffer.
1158  	var b Builder
1159  	b.Grow(len(s) + n*(len(new)-len(old)))
1160  	start := 0
1161  	if len(old) > 0 {
1162  		for range n {
1163  			j := start + Index(s[start:], old)
1164  			b.WriteString(s[start:j])
1165  			b.WriteString(new)
1166  			start = j + len(old)
1167  		}
1168  	} else { // len(old) == 0
1169  		b.WriteString(new)
1170  		for range n - 1 {
1171  			_, wid := utf8.DecodeRuneInString(s[start:])
1172  			j := start + wid
1173  			b.WriteString(s[start:j])
1174  			b.WriteString(new)
1175  			start = j
1176  		}
1177  	}
1178  	b.WriteString(s[start:])
1179  	return b.String()
1180  }
1181  
1182  // ReplaceAll returns a copy of the string s with all
1183  // non-overlapping instances of old replaced by new.
1184  // If old is empty, it matches at the beginning of the string
1185  // and after each UTF-8 sequence, yielding up to k+1 replacements
1186  // for a k-rune string.
1187  func ReplaceAll(s, old, new []byte) []byte {
1188  	return Replace(s, old, new, -1)
1189  }
1190  
1191  // EqualFold reports whether s and t, interpreted as UTF-8 strings,
1192  // are equal under simple Unicode case-folding, which is a more general
1193  // form of case-insensitivity.
1194  func EqualFold(s, t []byte) bool {
1195  	// ASCII fast path
1196  	i := 0
1197  	for n := min(len(s), len(t)); i < n; i++ {
1198  		sr := s[i]
1199  		tr := t[i]
1200  		if sr|tr >= utf8.RuneSelf {
1201  			goto hasUnicode
1202  		}
1203  
1204  		// Easy case.
1205  		if tr == sr {
1206  			continue
1207  		}
1208  
1209  		// Make sr < tr to simplify what follows.
1210  		if tr < sr {
1211  			tr, sr = sr, tr
1212  		}
1213  		// ASCII only, sr/tr must be upper/lower case
1214  		if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
1215  			continue
1216  		}
1217  		return false
1218  	}
1219  	// Check if we've exhausted both strings.
1220  	return len(s) == len(t)
1221  
1222  hasUnicode:
1223  	s = s[i:]
1224  	t = t[i:]
1225  	for _, sr := range s {
1226  		// If t is exhausted the strings are not equal.
1227  		if len(t) == 0 {
1228  			return false
1229  		}
1230  
1231  		// Extract first rune from second string.
1232  		var tr rune
1233  		if t[0] < utf8.RuneSelf {
1234  			tr, t = rune(t[0]), t[1:]
1235  		} else {
1236  			r, size := utf8.DecodeRuneInString(t)
1237  			tr, t = r, t[size:]
1238  		}
1239  
1240  		// If they match, keep going; if not, return false.
1241  
1242  		// Easy case.
1243  		if tr == sr {
1244  			continue
1245  		}
1246  
1247  		// Make sr < tr to simplify what follows.
1248  		if tr < sr {
1249  			tr, sr = sr, tr
1250  		}
1251  		// Fast check for ASCII.
1252  		if tr < utf8.RuneSelf {
1253  			// ASCII only, sr/tr must be upper/lower case
1254  			if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
1255  				continue
1256  			}
1257  			return false
1258  		}
1259  
1260  		// General case. SimpleFold(x) returns the next equivalent rune > x
1261  		// or wraps around to smaller values.
1262  		r := unicode.SimpleFold(sr)
1263  		for r != sr && r < tr {
1264  			r = unicode.SimpleFold(r)
1265  		}
1266  		if r == tr {
1267  			continue
1268  		}
1269  		return false
1270  	}
1271  
1272  	// First string is empty, so check if the second one is also empty.
1273  	return len(t) == 0
1274  }
1275  
1276  // Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
1277  func Index(s, substr []byte) int {
1278  	return stringslite.Index(s, substr)
1279  }
1280  
1281  // Cut slices s around the first instance of sep,
1282  // returning the text before and after sep.
1283  // The found result reports whether sep appears in s.
1284  // If sep does not appear in s, cut returns s, "", false.
1285  func Cut(s, sep []byte) (before, after []byte, found bool) {
1286  	return stringslite.Cut(s, sep)
1287  }
1288  
1289  // CutPrefix returns s without the provided leading prefix string
1290  // and reports whether it found the prefix.
1291  // If s doesn't start with prefix, CutPrefix returns s, false.
1292  // If prefix is the empty string, CutPrefix returns s, true.
1293  func CutPrefix(s, prefix []byte) (after []byte, found bool) {
1294  	return stringslite.CutPrefix(s, prefix)
1295  }
1296  
1297  // CutSuffix returns s without the provided ending suffix string
1298  // and reports whether it found the suffix.
1299  // If s doesn't end with suffix, CutSuffix returns s, false.
1300  // If suffix is the empty string, CutSuffix returns s, true.
1301  func CutSuffix(s, suffix []byte) (before []byte, found bool) {
1302  	return stringslite.CutSuffix(s, suffix)
1303  }
1304