sais2.mx raw

   1  // Copyright 2019 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Code generated by go generate; DO NOT EDIT.
   6  
   7  package suffixarray
   8  
   9  func text_64(text []byte, sa []int64) {
  10  	if int(int64(len(text))) != len(text) || len(text) != len(sa) {
  11  		panic("suffixarray: misuse of text_64")
  12  	}
  13  	sais_8_64(text, 256, sa, []int64{:2*256})
  14  }
  15  
  16  func sais_8_64(text []byte, textMax int, sa, tmp []int64) {
  17  	if len(sa) != len(text) || len(tmp) < textMax {
  18  		panic("suffixarray: misuse of sais_8_64")
  19  	}
  20  
  21  	// Trivial base cases. Sorting 0 or 1 things is easy.
  22  	if len(text) == 0 {
  23  		return
  24  	}
  25  	if len(text) == 1 {
  26  		sa[0] = 0
  27  		return
  28  	}
  29  
  30  	// Establish slices indexed by text character
  31  	// holding character frequency and bucket-sort offsets.
  32  	// If there's only enough tmp for one slice,
  33  	// we make it the bucket offsets and recompute
  34  	// the character frequency each time we need it.
  35  	var freq, bucket []int64
  36  	if len(tmp) >= 2*textMax {
  37  		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
  38  		freq[0] = -1 // mark as uninitialized
  39  	} else {
  40  		freq, bucket = nil, tmp[:textMax]
  41  	}
  42  
  43  	// The SAIS algorithm.
  44  	// Each of these calls makes one scan through sa.
  45  	// See the individual functions for documentation
  46  	// about each's role in the algorithm.
  47  	numLMS := placeLMS_8_64(text, sa, freq, bucket)
  48  	if numLMS <= 1 {
  49  		// 0 or 1 items are already sorted. Do nothing.
  50  	} else {
  51  		induceSubL_8_64(text, sa, freq, bucket)
  52  		induceSubS_8_64(text, sa, freq, bucket)
  53  		length_8_64(text, sa, numLMS)
  54  		maxID := assignID_8_64(text, sa, numLMS)
  55  		if maxID < numLMS {
  56  			map_64(sa, numLMS)
  57  			recurse_64(sa, tmp, numLMS, maxID)
  58  			unmap_8_64(text, sa, numLMS)
  59  		} else {
  60  			// If maxID == numLMS, then each LMS-substring
  61  			// is unique, so the relative ordering of two LMS-suffixes
  62  			// is determined by just the leading LMS-substring.
  63  			// That is, the LMS-suffix sort order matches the
  64  			// (simpler) LMS-substring sort order.
  65  			// Copy the original LMS-substring order into the
  66  			// suffix array destination.
  67  			copy(sa, sa[len(sa)-numLMS:])
  68  		}
  69  		expand_8_64(text, freq, bucket, sa, numLMS)
  70  	}
  71  	induceL_8_64(text, sa, freq, bucket)
  72  	induceS_8_64(text, sa, freq, bucket)
  73  
  74  	// Mark for caller that we overwrote tmp.
  75  	tmp[0] = -1
  76  }
  77  
  78  func sais_32(text []int32, textMax int, sa, tmp []int32) {
  79  	if len(sa) != len(text) || len(tmp) < textMax {
  80  		panic("suffixarray: misuse of sais_32")
  81  	}
  82  
  83  	// Trivial base cases. Sorting 0 or 1 things is easy.
  84  	if len(text) == 0 {
  85  		return
  86  	}
  87  	if len(text) == 1 {
  88  		sa[0] = 0
  89  		return
  90  	}
  91  
  92  	// Establish slices indexed by text character
  93  	// holding character frequency and bucket-sort offsets.
  94  	// If there's only enough tmp for one slice,
  95  	// we make it the bucket offsets and recompute
  96  	// the character frequency each time we need it.
  97  	var freq, bucket []int32
  98  	if len(tmp) >= 2*textMax {
  99  		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
 100  		freq[0] = -1 // mark as uninitialized
 101  	} else {
 102  		freq, bucket = nil, tmp[:textMax]
 103  	}
 104  
 105  	// The SAIS algorithm.
 106  	// Each of these calls makes one scan through sa.
 107  	// See the individual functions for documentation
 108  	// about each's role in the algorithm.
 109  	numLMS := placeLMS_32(text, sa, freq, bucket)
 110  	if numLMS <= 1 {
 111  		// 0 or 1 items are already sorted. Do nothing.
 112  	} else {
 113  		induceSubL_32(text, sa, freq, bucket)
 114  		induceSubS_32(text, sa, freq, bucket)
 115  		length_32(text, sa, numLMS)
 116  		maxID := assignID_32(text, sa, numLMS)
 117  		if maxID < numLMS {
 118  			map_32(sa, numLMS)
 119  			recurse_32(sa, tmp, numLMS, maxID)
 120  			unmap_32(text, sa, numLMS)
 121  		} else {
 122  			// If maxID == numLMS, then each LMS-substring
 123  			// is unique, so the relative ordering of two LMS-suffixes
 124  			// is determined by just the leading LMS-substring.
 125  			// That is, the LMS-suffix sort order matches the
 126  			// (simpler) LMS-substring sort order.
 127  			// Copy the original LMS-substring order into the
 128  			// suffix array destination.
 129  			copy(sa, sa[len(sa)-numLMS:])
 130  		}
 131  		expand_32(text, freq, bucket, sa, numLMS)
 132  	}
 133  	induceL_32(text, sa, freq, bucket)
 134  	induceS_32(text, sa, freq, bucket)
 135  
 136  	// Mark for caller that we overwrote tmp.
 137  	tmp[0] = -1
 138  }
 139  
 140  func sais_64(text []int64, textMax int, sa, tmp []int64) {
 141  	if len(sa) != len(text) || len(tmp) < textMax {
 142  		panic("suffixarray: misuse of sais_64")
 143  	}
 144  
 145  	// Trivial base cases. Sorting 0 or 1 things is easy.
 146  	if len(text) == 0 {
 147  		return
 148  	}
 149  	if len(text) == 1 {
 150  		sa[0] = 0
 151  		return
 152  	}
 153  
 154  	// Establish slices indexed by text character
 155  	// holding character frequency and bucket-sort offsets.
 156  	// If there's only enough tmp for one slice,
 157  	// we make it the bucket offsets and recompute
 158  	// the character frequency each time we need it.
 159  	var freq, bucket []int64
 160  	if len(tmp) >= 2*textMax {
 161  		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
 162  		freq[0] = -1 // mark as uninitialized
 163  	} else {
 164  		freq, bucket = nil, tmp[:textMax]
 165  	}
 166  
 167  	// The SAIS algorithm.
 168  	// Each of these calls makes one scan through sa.
 169  	// See the individual functions for documentation
 170  	// about each's role in the algorithm.
 171  	numLMS := placeLMS_64(text, sa, freq, bucket)
 172  	if numLMS <= 1 {
 173  		// 0 or 1 items are already sorted. Do nothing.
 174  	} else {
 175  		induceSubL_64(text, sa, freq, bucket)
 176  		induceSubS_64(text, sa, freq, bucket)
 177  		length_64(text, sa, numLMS)
 178  		maxID := assignID_64(text, sa, numLMS)
 179  		if maxID < numLMS {
 180  			map_64(sa, numLMS)
 181  			recurse_64(sa, tmp, numLMS, maxID)
 182  			unmap_64(text, sa, numLMS)
 183  		} else {
 184  			// If maxID == numLMS, then each LMS-substring
 185  			// is unique, so the relative ordering of two LMS-suffixes
 186  			// is determined by just the leading LMS-substring.
 187  			// That is, the LMS-suffix sort order matches the
 188  			// (simpler) LMS-substring sort order.
 189  			// Copy the original LMS-substring order into the
 190  			// suffix array destination.
 191  			copy(sa, sa[len(sa)-numLMS:])
 192  		}
 193  		expand_64(text, freq, bucket, sa, numLMS)
 194  	}
 195  	induceL_64(text, sa, freq, bucket)
 196  	induceS_64(text, sa, freq, bucket)
 197  
 198  	// Mark for caller that we overwrote tmp.
 199  	tmp[0] = -1
 200  }
 201  
 202  func freq_8_64(text []byte, freq, bucket []int64) []int64 {
 203  	if freq != nil && freq[0] >= 0 {
 204  		return freq // already computed
 205  	}
 206  	if freq == nil {
 207  		freq = bucket
 208  	}
 209  
 210  	freq = freq[:256] // eliminate bounds check for freq[c] below
 211  	clear(freq)
 212  	for _, c := range text {
 213  		freq[c]++
 214  	}
 215  	return freq
 216  }
 217  
 218  func freq_32(text []int32, freq, bucket []int32) []int32 {
 219  	if freq != nil && freq[0] >= 0 {
 220  		return freq // already computed
 221  	}
 222  	if freq == nil {
 223  		freq = bucket
 224  	}
 225  
 226  	clear(freq)
 227  	for _, c := range text {
 228  		freq[c]++
 229  	}
 230  	return freq
 231  }
 232  
 233  func freq_64(text []int64, freq, bucket []int64) []int64 {
 234  	if freq != nil && freq[0] >= 0 {
 235  		return freq // already computed
 236  	}
 237  	if freq == nil {
 238  		freq = bucket
 239  	}
 240  
 241  	clear(freq)
 242  	for _, c := range text {
 243  		freq[c]++
 244  	}
 245  	return freq
 246  }
 247  
 248  func bucketMin_8_64(text []byte, freq, bucket []int64) {
 249  	freq = freq_8_64(text, freq, bucket)
 250  	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
 251  	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
 252  	total := int64(0)
 253  	for i, n := range freq {
 254  		bucket[i] = total
 255  		total += n
 256  	}
 257  }
 258  
 259  func bucketMin_32(text []int32, freq, bucket []int32) {
 260  	freq = freq_32(text, freq, bucket)
 261  	total := int32(0)
 262  	for i, n := range freq {
 263  		bucket[i] = total
 264  		total += n
 265  	}
 266  }
 267  
 268  func bucketMin_64(text []int64, freq, bucket []int64) {
 269  	freq = freq_64(text, freq, bucket)
 270  	total := int64(0)
 271  	for i, n := range freq {
 272  		bucket[i] = total
 273  		total += n
 274  	}
 275  }
 276  
 277  func bucketMax_8_64(text []byte, freq, bucket []int64) {
 278  	freq = freq_8_64(text, freq, bucket)
 279  	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
 280  	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
 281  	total := int64(0)
 282  	for i, n := range freq {
 283  		total += n
 284  		bucket[i] = total
 285  	}
 286  }
 287  
 288  func bucketMax_32(text []int32, freq, bucket []int32) {
 289  	freq = freq_32(text, freq, bucket)
 290  	total := int32(0)
 291  	for i, n := range freq {
 292  		total += n
 293  		bucket[i] = total
 294  	}
 295  }
 296  
 297  func bucketMax_64(text []int64, freq, bucket []int64) {
 298  	freq = freq_64(text, freq, bucket)
 299  	total := int64(0)
 300  	for i, n := range freq {
 301  		total += n
 302  		bucket[i] = total
 303  	}
 304  }
 305  
 306  func placeLMS_8_64(text []byte, sa, freq, bucket []int64) int {
 307  	bucketMax_8_64(text, freq, bucket)
 308  
 309  	numLMS := 0
 310  	lastB := int64(-1)
 311  	bucket = bucket[:256] // eliminate bounds check for bucket[c1] below
 312  
 313  	// The next stanza of code (until the blank line) loop backward
 314  	// over text, stopping to execute a code body at each position i
 315  	// such that text[i] is an L-character and text[i+1] is an S-character.
 316  	// That is, i+1 is the position of the start of an LMS-substring.
 317  	// These could be hoisted out into a function with a callback,
 318  	// but at a significant speed cost. Instead, we just write these
 319  	// seven lines a few times in this source file. The copies below
 320  	// refer back to the pattern established by this original as the
 321  	// "LMS-substring iterator".
 322  	//
 323  	// In every scan through the text, c0, c1 are successive characters of text.
 324  	// In this backward scan, c0 == text[i] and c1 == text[i+1].
 325  	// By scanning backward, we can keep track of whether the current
 326  	// position is type-S or type-L according to the usual definition:
 327  	//
 328  	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
 329  	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
 330  	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
 331  	//
 332  	// The backward scan lets us maintain the current type,
 333  	// update it when we see c0 != c1, and otherwise leave it alone.
 334  	// We want to identify all S positions with a preceding L.
 335  	// Position len(text) is one such position by definition, but we have
 336  	// nowhere to write it down, so we eliminate it by untruthfully
 337  	// setting isTypeS = false at the start of the loop.
 338  	c0, c1, isTypeS := byte(0), byte(0), false
 339  	for i := len(text) - 1; i >= 0; i-- {
 340  		c0, c1 = text[i], c0
 341  		if c0 < c1 {
 342  			isTypeS = true
 343  		} else if c0 > c1 && isTypeS {
 344  			isTypeS = false
 345  
 346  			// Bucket the index i+1 for the start of an LMS-substring.
 347  			b := bucket[c1] - 1
 348  			bucket[c1] = b
 349  			sa[b] = int64(i + 1)
 350  			lastB = b
 351  			numLMS++
 352  		}
 353  	}
 354  
 355  	// We recorded the LMS-substring starts but really want the ends.
 356  	// Luckily, with two differences, the start indexes and the end indexes are the same.
 357  	// The first difference is that the rightmost LMS-substring's end index is len(text),
 358  	// so the caller must pretend that sa[-1] == len(text), as noted above.
 359  	// The second difference is that the first leftmost LMS-substring start index
 360  	// does not end an earlier LMS-substring, so as an optimization we can omit
 361  	// that leftmost LMS-substring start index (the last one we wrote).
 362  	//
 363  	// Exception: if numLMS <= 1, the caller is not going to bother with
 364  	// the recursion at all and will treat the result as containing LMS-substring starts.
 365  	// In that case, we don't remove the final entry.
 366  	if numLMS > 1 {
 367  		sa[lastB] = 0
 368  	}
 369  	return numLMS
 370  }
 371  
 372  func placeLMS_32(text []int32, sa, freq, bucket []int32) int {
 373  	bucketMax_32(text, freq, bucket)
 374  
 375  	numLMS := 0
 376  	lastB := int32(-1)
 377  
 378  	// The next stanza of code (until the blank line) loop backward
 379  	// over text, stopping to execute a code body at each position i
 380  	// such that text[i] is an L-character and text[i+1] is an S-character.
 381  	// That is, i+1 is the position of the start of an LMS-substring.
 382  	// These could be hoisted out into a function with a callback,
 383  	// but at a significant speed cost. Instead, we just write these
 384  	// seven lines a few times in this source file. The copies below
 385  	// refer back to the pattern established by this original as the
 386  	// "LMS-substring iterator".
 387  	//
 388  	// In every scan through the text, c0, c1 are successive characters of text.
 389  	// In this backward scan, c0 == text[i] and c1 == text[i+1].
 390  	// By scanning backward, we can keep track of whether the current
 391  	// position is type-S or type-L according to the usual definition:
 392  	//
 393  	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
 394  	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
 395  	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
 396  	//
 397  	// The backward scan lets us maintain the current type,
 398  	// update it when we see c0 != c1, and otherwise leave it alone.
 399  	// We want to identify all S positions with a preceding L.
 400  	// Position len(text) is one such position by definition, but we have
 401  	// nowhere to write it down, so we eliminate it by untruthfully
 402  	// setting isTypeS = false at the start of the loop.
 403  	c0, c1, isTypeS := int32(0), int32(0), false
 404  	for i := len(text) - 1; i >= 0; i-- {
 405  		c0, c1 = text[i], c0
 406  		if c0 < c1 {
 407  			isTypeS = true
 408  		} else if c0 > c1 && isTypeS {
 409  			isTypeS = false
 410  
 411  			// Bucket the index i+1 for the start of an LMS-substring.
 412  			b := bucket[c1] - 1
 413  			bucket[c1] = b
 414  			sa[b] = int32(i + 1)
 415  			lastB = b
 416  			numLMS++
 417  		}
 418  	}
 419  
 420  	// We recorded the LMS-substring starts but really want the ends.
 421  	// Luckily, with two differences, the start indexes and the end indexes are the same.
 422  	// The first difference is that the rightmost LMS-substring's end index is len(text),
 423  	// so the caller must pretend that sa[-1] == len(text), as noted above.
 424  	// The second difference is that the first leftmost LMS-substring start index
 425  	// does not end an earlier LMS-substring, so as an optimization we can omit
 426  	// that leftmost LMS-substring start index (the last one we wrote).
 427  	//
 428  	// Exception: if numLMS <= 1, the caller is not going to bother with
 429  	// the recursion at all and will treat the result as containing LMS-substring starts.
 430  	// In that case, we don't remove the final entry.
 431  	if numLMS > 1 {
 432  		sa[lastB] = 0
 433  	}
 434  	return numLMS
 435  }
 436  
 437  func placeLMS_64(text []int64, sa, freq, bucket []int64) int {
 438  	bucketMax_64(text, freq, bucket)
 439  
 440  	numLMS := 0
 441  	lastB := int64(-1)
 442  
 443  	// The next stanza of code (until the blank line) loop backward
 444  	// over text, stopping to execute a code body at each position i
 445  	// such that text[i] is an L-character and text[i+1] is an S-character.
 446  	// That is, i+1 is the position of the start of an LMS-substring.
 447  	// These could be hoisted out into a function with a callback,
 448  	// but at a significant speed cost. Instead, we just write these
 449  	// seven lines a few times in this source file. The copies below
 450  	// refer back to the pattern established by this original as the
 451  	// "LMS-substring iterator".
 452  	//
 453  	// In every scan through the text, c0, c1 are successive characters of text.
 454  	// In this backward scan, c0 == text[i] and c1 == text[i+1].
 455  	// By scanning backward, we can keep track of whether the current
 456  	// position is type-S or type-L according to the usual definition:
 457  	//
 458  	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
 459  	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
 460  	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
 461  	//
 462  	// The backward scan lets us maintain the current type,
 463  	// update it when we see c0 != c1, and otherwise leave it alone.
 464  	// We want to identify all S positions with a preceding L.
 465  	// Position len(text) is one such position by definition, but we have
 466  	// nowhere to write it down, so we eliminate it by untruthfully
 467  	// setting isTypeS = false at the start of the loop.
 468  	c0, c1, isTypeS := int64(0), int64(0), false
 469  	for i := len(text) - 1; i >= 0; i-- {
 470  		c0, c1 = text[i], c0
 471  		if c0 < c1 {
 472  			isTypeS = true
 473  		} else if c0 > c1 && isTypeS {
 474  			isTypeS = false
 475  
 476  			// Bucket the index i+1 for the start of an LMS-substring.
 477  			b := bucket[c1] - 1
 478  			bucket[c1] = b
 479  			sa[b] = int64(i + 1)
 480  			lastB = b
 481  			numLMS++
 482  		}
 483  	}
 484  
 485  	// We recorded the LMS-substring starts but really want the ends.
 486  	// Luckily, with two differences, the start indexes and the end indexes are the same.
 487  	// The first difference is that the rightmost LMS-substring's end index is len(text),
 488  	// so the caller must pretend that sa[-1] == len(text), as noted above.
 489  	// The second difference is that the first leftmost LMS-substring start index
 490  	// does not end an earlier LMS-substring, so as an optimization we can omit
 491  	// that leftmost LMS-substring start index (the last one we wrote).
 492  	//
 493  	// Exception: if numLMS <= 1, the caller is not going to bother with
 494  	// the recursion at all and will treat the result as containing LMS-substring starts.
 495  	// In that case, we don't remove the final entry.
 496  	if numLMS > 1 {
 497  		sa[lastB] = 0
 498  	}
 499  	return numLMS
 500  }
 501  
 502  func induceSubL_8_64(text []byte, sa, freq, bucket []int64) {
 503  	// Initialize positions for left side of character buckets.
 504  	bucketMin_8_64(text, freq, bucket)
 505  	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
 506  
 507  	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
 508  	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
 509  	// Because j-1 is type L, inserting it into sa now will sort it correctly.
 510  	// But we want to distinguish a j-1 with j-2 of type L from type S.
 511  	// We can process the former but want to leave the latter for the caller.
 512  	// We record the difference by negating j-1 if it is preceded by type S.
 513  	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
 514  	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
 515  	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
 516  	// and so on, in sorted but not necessarily adjacent order, until it finds
 517  	// one preceded by an index of type S, at which point it must stop.
 518  	//
 519  	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
 520  	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
 521  	// only the indexes of the leftmost L-type indexes for each LMS-substring.
 522  	//
 523  	// The suffix array sa therefore serves simultaneously as input, output,
 524  	// and a miraculously well-tailored work queue.
 525  
 526  	// placeLMS_8_64 left out the implicit entry sa[-1] == len(text),
 527  	// corresponding to the identified type-L index len(text)-1.
 528  	// Process it before the left-to-right scan of sa proper.
 529  	// See body in loop for commentary.
 530  	k := len(text) - 1
 531  	c0, c1 := text[k-1], text[k]
 532  	if c0 < c1 {
 533  		k = -k
 534  	}
 535  
 536  	// Cache recently used bucket index:
 537  	// we're processing suffixes in sorted order
 538  	// and accessing buckets indexed by the
 539  	// byte before the sorted order, which still
 540  	// has very good locality.
 541  	// Invariant: b is cached, possibly dirty copy of bucket[cB].
 542  	cB := c1
 543  	b := bucket[cB]
 544  	sa[b] = int64(k)
 545  	b++
 546  
 547  	for i := 0; i < len(sa); i++ {
 548  		j := int(sa[i])
 549  		if j == 0 {
 550  			// Skip empty entry.
 551  			continue
 552  		}
 553  		if j < 0 {
 554  			// Leave discovered type-S index for caller.
 555  			sa[i] = int64(-j)
 556  			continue
 557  		}
 558  		sa[i] = 0
 559  
 560  		// Index j was on work queue, meaning k := j-1 is L-type,
 561  		// so we can now place k correctly into sa.
 562  		// If k-1 is L-type, queue k for processing later in this loop.
 563  		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
 564  		k := j - 1
 565  		c0, c1 := text[k-1], text[k]
 566  		if c0 < c1 {
 567  			k = -k
 568  		}
 569  
 570  		if cB != c1 {
 571  			bucket[cB] = b
 572  			cB = c1
 573  			b = bucket[cB]
 574  		}
 575  		sa[b] = int64(k)
 576  		b++
 577  	}
 578  }
 579  
 580  func induceSubL_32(text []int32, sa, freq, bucket []int32) {
 581  	// Initialize positions for left side of character buckets.
 582  	bucketMin_32(text, freq, bucket)
 583  
 584  	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
 585  	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
 586  	// Because j-1 is type L, inserting it into sa now will sort it correctly.
 587  	// But we want to distinguish a j-1 with j-2 of type L from type S.
 588  	// We can process the former but want to leave the latter for the caller.
 589  	// We record the difference by negating j-1 if it is preceded by type S.
 590  	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
 591  	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
 592  	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
 593  	// and so on, in sorted but not necessarily adjacent order, until it finds
 594  	// one preceded by an index of type S, at which point it must stop.
 595  	//
 596  	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
 597  	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
 598  	// only the indexes of the leftmost L-type indexes for each LMS-substring.
 599  	//
 600  	// The suffix array sa therefore serves simultaneously as input, output,
 601  	// and a miraculously well-tailored work queue.
 602  
 603  	// placeLMS_32 left out the implicit entry sa[-1] == len(text),
 604  	// corresponding to the identified type-L index len(text)-1.
 605  	// Process it before the left-to-right scan of sa proper.
 606  	// See body in loop for commentary.
 607  	k := len(text) - 1
 608  	c0, c1 := text[k-1], text[k]
 609  	if c0 < c1 {
 610  		k = -k
 611  	}
 612  
 613  	// Cache recently used bucket index:
 614  	// we're processing suffixes in sorted order
 615  	// and accessing buckets indexed by the
 616  	// int32 before the sorted order, which still
 617  	// has very good locality.
 618  	// Invariant: b is cached, possibly dirty copy of bucket[cB].
 619  	cB := c1
 620  	b := bucket[cB]
 621  	sa[b] = int32(k)
 622  	b++
 623  
 624  	for i := 0; i < len(sa); i++ {
 625  		j := int(sa[i])
 626  		if j == 0 {
 627  			// Skip empty entry.
 628  			continue
 629  		}
 630  		if j < 0 {
 631  			// Leave discovered type-S index for caller.
 632  			sa[i] = int32(-j)
 633  			continue
 634  		}
 635  		sa[i] = 0
 636  
 637  		// Index j was on work queue, meaning k := j-1 is L-type,
 638  		// so we can now place k correctly into sa.
 639  		// If k-1 is L-type, queue k for processing later in this loop.
 640  		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
 641  		k := j - 1
 642  		c0, c1 := text[k-1], text[k]
 643  		if c0 < c1 {
 644  			k = -k
 645  		}
 646  
 647  		if cB != c1 {
 648  			bucket[cB] = b
 649  			cB = c1
 650  			b = bucket[cB]
 651  		}
 652  		sa[b] = int32(k)
 653  		b++
 654  	}
 655  }
 656  
 657  func induceSubL_64(text []int64, sa, freq, bucket []int64) {
 658  	// Initialize positions for left side of character buckets.
 659  	bucketMin_64(text, freq, bucket)
 660  
 661  	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
 662  	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
 663  	// Because j-1 is type L, inserting it into sa now will sort it correctly.
 664  	// But we want to distinguish a j-1 with j-2 of type L from type S.
 665  	// We can process the former but want to leave the latter for the caller.
 666  	// We record the difference by negating j-1 if it is preceded by type S.
 667  	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
 668  	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
 669  	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
 670  	// and so on, in sorted but not necessarily adjacent order, until it finds
 671  	// one preceded by an index of type S, at which point it must stop.
 672  	//
 673  	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
 674  	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
 675  	// only the indexes of the leftmost L-type indexes for each LMS-substring.
 676  	//
 677  	// The suffix array sa therefore serves simultaneously as input, output,
 678  	// and a miraculously well-tailored work queue.
 679  
 680  	// placeLMS_64 left out the implicit entry sa[-1] == len(text),
 681  	// corresponding to the identified type-L index len(text)-1.
 682  	// Process it before the left-to-right scan of sa proper.
 683  	// See body in loop for commentary.
 684  	k := len(text) - 1
 685  	c0, c1 := text[k-1], text[k]
 686  	if c0 < c1 {
 687  		k = -k
 688  	}
 689  
 690  	// Cache recently used bucket index:
 691  	// we're processing suffixes in sorted order
 692  	// and accessing buckets indexed by the
 693  	// int64 before the sorted order, which still
 694  	// has very good locality.
 695  	// Invariant: b is cached, possibly dirty copy of bucket[cB].
 696  	cB := c1
 697  	b := bucket[cB]
 698  	sa[b] = int64(k)
 699  	b++
 700  
 701  	for i := 0; i < len(sa); i++ {
 702  		j := int(sa[i])
 703  		if j == 0 {
 704  			// Skip empty entry.
 705  			continue
 706  		}
 707  		if j < 0 {
 708  			// Leave discovered type-S index for caller.
 709  			sa[i] = int64(-j)
 710  			continue
 711  		}
 712  		sa[i] = 0
 713  
 714  		// Index j was on work queue, meaning k := j-1 is L-type,
 715  		// so we can now place k correctly into sa.
 716  		// If k-1 is L-type, queue k for processing later in this loop.
 717  		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
 718  		k := j - 1
 719  		c0, c1 := text[k-1], text[k]
 720  		if c0 < c1 {
 721  			k = -k
 722  		}
 723  
 724  		if cB != c1 {
 725  			bucket[cB] = b
 726  			cB = c1
 727  			b = bucket[cB]
 728  		}
 729  		sa[b] = int64(k)
 730  		b++
 731  	}
 732  }
 733  
 734  func induceSubS_8_64(text []byte, sa, freq, bucket []int64) {
 735  	// Initialize positions for right side of character buckets.
 736  	bucketMax_8_64(text, freq, bucket)
 737  	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
 738  
 739  	// Analogous to induceSubL_8_64 above,
 740  	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
 741  	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
 742  	// Because j-1 is type S, inserting it into sa now will sort it correctly.
 743  	// But we want to distinguish a j-1 with j-2 of type S from type L.
 744  	// We can process the former but want to leave the latter for the caller.
 745  	// We record the difference by negating j-1 if it is preceded by type L.
 746  	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
 747  	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
 748  	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
 749  	// and so on, in sorted but not necessarily adjacent order, until it finds
 750  	// one preceded by an index of type L, at which point it must stop.
 751  	// That index (preceded by one of type L) is an LMS-substring start.
 752  	//
 753  	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
 754  	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
 755  	// so that the loop finishes with the top of sa containing exactly
 756  	// the LMS-substring start indexes, sorted by LMS-substring.
 757  
 758  	// Cache recently used bucket index:
 759  	cB := byte(0)
 760  	b := bucket[cB]
 761  
 762  	top := len(sa)
 763  	for i := len(sa) - 1; i >= 0; i-- {
 764  		j := int(sa[i])
 765  		if j == 0 {
 766  			// Skip empty entry.
 767  			continue
 768  		}
 769  		sa[i] = 0
 770  		if j < 0 {
 771  			// Leave discovered LMS-substring start index for caller.
 772  			top--
 773  			sa[top] = int64(-j)
 774  			continue
 775  		}
 776  
 777  		// Index j was on work queue, meaning k := j-1 is S-type,
 778  		// so we can now place k correctly into sa.
 779  		// If k-1 is S-type, queue k for processing later in this loop.
 780  		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
 781  		k := j - 1
 782  		c1 := text[k]
 783  		c0 := text[k-1]
 784  		if c0 > c1 {
 785  			k = -k
 786  		}
 787  
 788  		if cB != c1 {
 789  			bucket[cB] = b
 790  			cB = c1
 791  			b = bucket[cB]
 792  		}
 793  		b--
 794  		sa[b] = int64(k)
 795  	}
 796  }
 797  
 798  func induceSubS_32(text []int32, sa, freq, bucket []int32) {
 799  	// Initialize positions for right side of character buckets.
 800  	bucketMax_32(text, freq, bucket)
 801  
 802  	// Analogous to induceSubL_32 above,
 803  	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
 804  	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
 805  	// Because j-1 is type S, inserting it into sa now will sort it correctly.
 806  	// But we want to distinguish a j-1 with j-2 of type S from type L.
 807  	// We can process the former but want to leave the latter for the caller.
 808  	// We record the difference by negating j-1 if it is preceded by type L.
 809  	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
 810  	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
 811  	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
 812  	// and so on, in sorted but not necessarily adjacent order, until it finds
 813  	// one preceded by an index of type L, at which point it must stop.
 814  	// That index (preceded by one of type L) is an LMS-substring start.
 815  	//
 816  	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
 817  	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
 818  	// so that the loop finishes with the top of sa containing exactly
 819  	// the LMS-substring start indexes, sorted by LMS-substring.
 820  
 821  	// Cache recently used bucket index:
 822  	cB := int32(0)
 823  	b := bucket[cB]
 824  
 825  	top := len(sa)
 826  	for i := len(sa) - 1; i >= 0; i-- {
 827  		j := int(sa[i])
 828  		if j == 0 {
 829  			// Skip empty entry.
 830  			continue
 831  		}
 832  		sa[i] = 0
 833  		if j < 0 {
 834  			// Leave discovered LMS-substring start index for caller.
 835  			top--
 836  			sa[top] = int32(-j)
 837  			continue
 838  		}
 839  
 840  		// Index j was on work queue, meaning k := j-1 is S-type,
 841  		// so we can now place k correctly into sa.
 842  		// If k-1 is S-type, queue k for processing later in this loop.
 843  		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
 844  		k := j - 1
 845  		c1 := text[k]
 846  		c0 := text[k-1]
 847  		if c0 > c1 {
 848  			k = -k
 849  		}
 850  
 851  		if cB != c1 {
 852  			bucket[cB] = b
 853  			cB = c1
 854  			b = bucket[cB]
 855  		}
 856  		b--
 857  		sa[b] = int32(k)
 858  	}
 859  }
 860  
 861  func induceSubS_64(text []int64, sa, freq, bucket []int64) {
 862  	// Initialize positions for right side of character buckets.
 863  	bucketMax_64(text, freq, bucket)
 864  
 865  	// Analogous to induceSubL_64 above,
 866  	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
 867  	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
 868  	// Because j-1 is type S, inserting it into sa now will sort it correctly.
 869  	// But we want to distinguish a j-1 with j-2 of type S from type L.
 870  	// We can process the former but want to leave the latter for the caller.
 871  	// We record the difference by negating j-1 if it is preceded by type L.
 872  	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
 873  	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
 874  	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
 875  	// and so on, in sorted but not necessarily adjacent order, until it finds
 876  	// one preceded by an index of type L, at which point it must stop.
 877  	// That index (preceded by one of type L) is an LMS-substring start.
 878  	//
 879  	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
 880  	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
 881  	// so that the loop finishes with the top of sa containing exactly
 882  	// the LMS-substring start indexes, sorted by LMS-substring.
 883  
 884  	// Cache recently used bucket index:
 885  	cB := int64(0)
 886  	b := bucket[cB]
 887  
 888  	top := len(sa)
 889  	for i := len(sa) - 1; i >= 0; i-- {
 890  		j := int(sa[i])
 891  		if j == 0 {
 892  			// Skip empty entry.
 893  			continue
 894  		}
 895  		sa[i] = 0
 896  		if j < 0 {
 897  			// Leave discovered LMS-substring start index for caller.
 898  			top--
 899  			sa[top] = int64(-j)
 900  			continue
 901  		}
 902  
 903  		// Index j was on work queue, meaning k := j-1 is S-type,
 904  		// so we can now place k correctly into sa.
 905  		// If k-1 is S-type, queue k for processing later in this loop.
 906  		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
 907  		k := j - 1
 908  		c1 := text[k]
 909  		c0 := text[k-1]
 910  		if c0 > c1 {
 911  			k = -k
 912  		}
 913  
 914  		if cB != c1 {
 915  			bucket[cB] = b
 916  			cB = c1
 917  			b = bucket[cB]
 918  		}
 919  		b--
 920  		sa[b] = int64(k)
 921  	}
 922  }
 923  
 924  func length_8_64(text []byte, sa []int64, numLMS int) {
 925  	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
 926  
 927  	// The encoding of N text bytes into a “length” word
 928  	// adds 1 to each byte, packs them into the bottom
 929  	// N*8 bits of a word, and then bitwise inverts the result.
 930  	// That is, the text sequence A B C (hex 41 42 43)
 931  	// encodes as ^uint64(0x42_43_44).
 932  	// LMS-substrings can never start or end with 0xFF.
 933  	// Adding 1 ensures the encoded byte sequence never
 934  	// starts or ends with 0x00, so that present bytes can be
 935  	// distinguished from zero-padding in the top bits,
 936  	// so the length need not be separately encoded.
 937  	// Inverting the bytes increases the chance that a
 938  	// 4-byte encoding will still be ≥ len(text).
 939  	// In particular, if the first byte is ASCII (<= 0x7E, so +1 <= 0x7F)
 940  	// then the high bit of the inversion will be set,
 941  	// making it clearly not a valid length (it would be a negative one).
 942  	//
 943  	// cx holds the pre-inverted encoding (the packed incremented bytes).
 944  	cx := uint64(0) // byte-only
 945  
 946  	// This stanza (until the blank line) is the "LMS-substring iterator",
 947  	// described in placeLMS_8_64 above, with one line added to maintain cx.
 948  	c0, c1, isTypeS := byte(0), byte(0), false
 949  	for i := len(text) - 1; i >= 0; i-- {
 950  		c0, c1 = text[i], c0
 951  		cx = cx<<8 | uint64(c1+1) // byte-only
 952  		if c0 < c1 {
 953  			isTypeS = true
 954  		} else if c0 > c1 && isTypeS {
 955  			isTypeS = false
 956  
 957  			// Index j = i+1 is the start of an LMS-substring.
 958  			// Compute length or encoded text to store in sa[j/2].
 959  			j := i + 1
 960  			var code int64
 961  			if end == 0 {
 962  				code = 0
 963  			} else {
 964  				code = int64(end - j)
 965  				if code <= 64/8 && ^cx >= uint64(len(text)) { // byte-only
 966  					code = int64(^cx) // byte-only
 967  				} // byte-only
 968  			}
 969  			sa[j>>1] = code
 970  			end = j + 1
 971  			cx = uint64(c1 + 1) // byte-only
 972  		}
 973  	}
 974  }
 975  
 976  func length_32(text []int32, sa []int32, numLMS int) {
 977  	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
 978  
 979  	// The encoding of N text int32s into a “length” word
 980  	// adds 1 to each int32, packs them into the bottom
 981  	// N*8 bits of a word, and then bitwise inverts the result.
 982  	// That is, the text sequence A B C (hex 41 42 43)
 983  	// encodes as ^uint32(0x42_43_44).
 984  	// LMS-substrings can never start or end with 0xFF.
 985  	// Adding 1 ensures the encoded int32 sequence never
 986  	// starts or ends with 0x00, so that present int32s can be
 987  	// distinguished from zero-padding in the top bits,
 988  	// so the length need not be separately encoded.
 989  	// Inverting the int32s increases the chance that a
 990  	// 4-int32 encoding will still be ≥ len(text).
 991  	// In particular, if the first int32 is ASCII (<= 0x7E, so +1 <= 0x7F)
 992  	// then the high bit of the inversion will be set,
 993  	// making it clearly not a valid length (it would be a negative one).
 994  	//
 995  	// cx holds the pre-inverted encoding (the packed incremented int32s).
 996  
 997  	// This stanza (until the blank line) is the "LMS-substring iterator",
 998  	// described in placeLMS_32 above, with one line added to maintain cx.
 999  	c0, c1, isTypeS := int32(0), int32(0), false
1000  	for i := len(text) - 1; i >= 0; i-- {
1001  		c0, c1 = text[i], c0
1002  		if c0 < c1 {
1003  			isTypeS = true
1004  		} else if c0 > c1 && isTypeS {
1005  			isTypeS = false
1006  
1007  			// Index j = i+1 is the start of an LMS-substring.
1008  			// Compute length or encoded text to store in sa[j/2].
1009  			j := i + 1
1010  			var code int32
1011  			if end == 0 {
1012  				code = 0
1013  			} else {
1014  				code = int32(end - j)
1015  			}
1016  			sa[j>>1] = code
1017  			end = j + 1
1018  		}
1019  	}
1020  }
1021  
1022  func length_64(text []int64, sa []int64, numLMS int) {
1023  	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
1024  
1025  	// The encoding of N text int64s into a “length” word
1026  	// adds 1 to each int64, packs them into the bottom
1027  	// N*8 bits of a word, and then bitwise inverts the result.
1028  	// That is, the text sequence A B C (hex 41 42 43)
1029  	// encodes as ^uint64(0x42_43_44).
1030  	// LMS-substrings can never start or end with 0xFF.
1031  	// Adding 1 ensures the encoded int64 sequence never
1032  	// starts or ends with 0x00, so that present int64s can be
1033  	// distinguished from zero-padding in the top bits,
1034  	// so the length need not be separately encoded.
1035  	// Inverting the int64s increases the chance that a
1036  	// 4-int64 encoding will still be ≥ len(text).
1037  	// In particular, if the first int64 is ASCII (<= 0x7E, so +1 <= 0x7F)
1038  	// then the high bit of the inversion will be set,
1039  	// making it clearly not a valid length (it would be a negative one).
1040  	//
1041  	// cx holds the pre-inverted encoding (the packed incremented int64s).
1042  
1043  	// This stanza (until the blank line) is the "LMS-substring iterator",
1044  	// described in placeLMS_64 above, with one line added to maintain cx.
1045  	c0, c1, isTypeS := int64(0), int64(0), false
1046  	for i := len(text) - 1; i >= 0; i-- {
1047  		c0, c1 = text[i], c0
1048  		if c0 < c1 {
1049  			isTypeS = true
1050  		} else if c0 > c1 && isTypeS {
1051  			isTypeS = false
1052  
1053  			// Index j = i+1 is the start of an LMS-substring.
1054  			// Compute length or encoded text to store in sa[j/2].
1055  			j := i + 1
1056  			var code int64
1057  			if end == 0 {
1058  				code = 0
1059  			} else {
1060  				code = int64(end - j)
1061  			}
1062  			sa[j>>1] = code
1063  			end = j + 1
1064  		}
1065  	}
1066  }
1067  
1068  func assignID_8_64(text []byte, sa []int64, numLMS int) int {
1069  	id := 0
1070  	lastLen := int64(-1) // impossible
1071  	lastPos := int64(0)
1072  	for _, j := range sa[len(sa)-numLMS:] {
1073  		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
1074  		n := sa[j/2]
1075  		if n != lastLen {
1076  			goto New
1077  		}
1078  		if uint64(n) >= uint64(len(text)) {
1079  			// “Length” is really encoded full text, and they match.
1080  			goto Same
1081  		}
1082  		{
1083  			// Compare actual texts.
1084  			n := int(n)
1085  			this := text[j:][:n]
1086  			last := text[lastPos:][:n]
1087  			for i := 0; i < n; i++ {
1088  				if this[i] != last[i] {
1089  					goto New
1090  				}
1091  			}
1092  			goto Same
1093  		}
1094  	New:
1095  		id++
1096  		lastPos = j
1097  		lastLen = n
1098  	Same:
1099  		sa[j/2] = int64(id)
1100  	}
1101  	return id
1102  }
1103  
1104  func assignID_32(text []int32, sa []int32, numLMS int) int {
1105  	id := 0
1106  	lastLen := int32(-1) // impossible
1107  	lastPos := int32(0)
1108  	for _, j := range sa[len(sa)-numLMS:] {
1109  		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
1110  		n := sa[j/2]
1111  		if n != lastLen {
1112  			goto New
1113  		}
1114  		if uint32(n) >= uint32(len(text)) {
1115  			// “Length” is really encoded full text, and they match.
1116  			goto Same
1117  		}
1118  		{
1119  			// Compare actual texts.
1120  			n := int(n)
1121  			this := text[j:][:n]
1122  			last := text[lastPos:][:n]
1123  			for i := 0; i < n; i++ {
1124  				if this[i] != last[i] {
1125  					goto New
1126  				}
1127  			}
1128  			goto Same
1129  		}
1130  	New:
1131  		id++
1132  		lastPos = j
1133  		lastLen = n
1134  	Same:
1135  		sa[j/2] = int32(id)
1136  	}
1137  	return id
1138  }
1139  
1140  func assignID_64(text []int64, sa []int64, numLMS int) int {
1141  	id := 0
1142  	lastLen := int64(-1) // impossible
1143  	lastPos := int64(0)
1144  	for _, j := range sa[len(sa)-numLMS:] {
1145  		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
1146  		n := sa[j/2]
1147  		if n != lastLen {
1148  			goto New
1149  		}
1150  		if uint64(n) >= uint64(len(text)) {
1151  			// “Length” is really encoded full text, and they match.
1152  			goto Same
1153  		}
1154  		{
1155  			// Compare actual texts.
1156  			n := int(n)
1157  			this := text[j:][:n]
1158  			last := text[lastPos:][:n]
1159  			for i := 0; i < n; i++ {
1160  				if this[i] != last[i] {
1161  					goto New
1162  				}
1163  			}
1164  			goto Same
1165  		}
1166  	New:
1167  		id++
1168  		lastPos = j
1169  		lastLen = n
1170  	Same:
1171  		sa[j/2] = int64(id)
1172  	}
1173  	return id
1174  }
1175  
1176  func map_64(sa []int64, numLMS int) {
1177  	w := len(sa)
1178  	for i := len(sa) / 2; i >= 0; i-- {
1179  		j := sa[i]
1180  		if j > 0 {
1181  			w--
1182  			sa[w] = j - 1
1183  		}
1184  	}
1185  }
1186  
1187  func recurse_64(sa, oldTmp []int64, numLMS, maxID int) {
1188  	dst, saTmp, text := sa[:numLMS], sa[numLMS:len(sa)-numLMS], sa[len(sa)-numLMS:]
1189  
1190  	// Set up temporary space for recursive call.
1191  	// We must pass sais_64 a tmp buffer with at least maxID entries.
1192  	//
1193  	// The subproblem is guaranteed to have length at most len(sa)/2,
1194  	// so that sa can hold both the subproblem and its suffix array.
1195  	// Nearly all the time, however, the subproblem has length < len(sa)/3,
1196  	// in which case there is a subproblem-sized middle of sa that
1197  	// we can reuse for temporary space (saTmp).
1198  	// When recurse_64 is called from sais_8_64, oldTmp is length 512
1199  	// (from text_64), and saTmp will typically be much larger, so we'll use saTmp.
1200  	// When deeper recursions come back to recurse_64, now oldTmp is
1201  	// the saTmp from the top-most recursion, it is typically larger than
1202  	// the current saTmp (because the current sa gets smaller and smaller
1203  	// as the recursion gets deeper), and we keep reusing that top-most
1204  	// large saTmp instead of the offered smaller ones.
1205  	//
1206  	// Why is the subproblem length so often just under len(sa)/3?
1207  	// See Nong, Zhang, and Chen, section 3.6 for a plausible explanation.
1208  	// In brief, the len(sa)/2 case would correspond to an SLSLSLSLSLSL pattern
1209  	// in the input, perfect alternation of larger and smaller input bytes.
1210  	// Real text doesn't do that. If each L-type index is randomly followed
1211  	// by either an L-type or S-type index, then half the substrings will
1212  	// be of the form SLS, but the other half will be longer. Of that half,
1213  	// half (a quarter overall) will be SLLS; an eighth will be SLLLS, and so on.
1214  	// Not counting the final S in each (which overlaps the first S in the next),
1215  	// This works out to an average length 2×½ + 3×¼ + 4×⅛ + ... = 3.
1216  	// The space we need is further reduced by the fact that many of the
1217  	// short patterns like SLS will often be the same character sequences
1218  	// repeated throughout the text, reducing maxID relative to numLMS.
1219  	//
1220  	// For short inputs, the averages may not run in our favor, but then we
1221  	// can often fall back to using the length-512 tmp available in the
1222  	// top-most call. (Also a short allocation would not be a big deal.)
1223  	//
1224  	// For pathological inputs, we fall back to allocating a new tmp of length
1225  	// max(maxID, numLMS/2). This level of the recursion needs maxID,
1226  	// and all deeper levels of the recursion will need no more than numLMS/2,
1227  	// so this one allocation is guaranteed to suffice for the entire stack
1228  	// of recursive calls.
1229  	tmp := oldTmp
1230  	if len(tmp) < len(saTmp) {
1231  		tmp = saTmp
1232  	}
1233  	if len(tmp) < numLMS {
1234  		// TestSAIS/forcealloc reaches this code.
1235  		n := maxID
1236  		if n < numLMS/2 {
1237  			n = numLMS / 2
1238  		}
1239  		tmp = []int64{:n}
1240  	}
1241  
1242  	// sais_64 requires that the caller arrange to clear dst,
1243  	// because in general the caller may know dst is
1244  	// freshly-allocated and already cleared. But this one is not.
1245  	clear(dst)
1246  	sais_64(text, maxID, dst, tmp)
1247  }
1248  
1249  func unmap_8_64(text []byte, sa []int64, numLMS int) {
1250  	unmap := sa[len(sa)-numLMS:]
1251  	j := len(unmap)
1252  
1253  	// "LMS-substring iterator" (see placeLMS_8_64 above).
1254  	c0, c1, isTypeS := byte(0), byte(0), false
1255  	for i := len(text) - 1; i >= 0; i-- {
1256  		c0, c1 = text[i], c0
1257  		if c0 < c1 {
1258  			isTypeS = true
1259  		} else if c0 > c1 && isTypeS {
1260  			isTypeS = false
1261  
1262  			// Populate inverse map.
1263  			j--
1264  			unmap[j] = int64(i + 1)
1265  		}
1266  	}
1267  
1268  	// Apply inverse map to subproblem suffix array.
1269  	sa = sa[:numLMS]
1270  	for i := 0; i < len(sa); i++ {
1271  		sa[i] = unmap[sa[i]]
1272  	}
1273  }
1274  
1275  func unmap_32(text []int32, sa []int32, numLMS int) {
1276  	unmap := sa[len(sa)-numLMS:]
1277  	j := len(unmap)
1278  
1279  	// "LMS-substring iterator" (see placeLMS_32 above).
1280  	c0, c1, isTypeS := int32(0), int32(0), false
1281  	for i := len(text) - 1; i >= 0; i-- {
1282  		c0, c1 = text[i], c0
1283  		if c0 < c1 {
1284  			isTypeS = true
1285  		} else if c0 > c1 && isTypeS {
1286  			isTypeS = false
1287  
1288  			// Populate inverse map.
1289  			j--
1290  			unmap[j] = int32(i + 1)
1291  		}
1292  	}
1293  
1294  	// Apply inverse map to subproblem suffix array.
1295  	sa = sa[:numLMS]
1296  	for i := 0; i < len(sa); i++ {
1297  		sa[i] = unmap[sa[i]]
1298  	}
1299  }
1300  
1301  func unmap_64(text []int64, sa []int64, numLMS int) {
1302  	unmap := sa[len(sa)-numLMS:]
1303  	j := len(unmap)
1304  
1305  	// "LMS-substring iterator" (see placeLMS_64 above).
1306  	c0, c1, isTypeS := int64(0), int64(0), false
1307  	for i := len(text) - 1; i >= 0; i-- {
1308  		c0, c1 = text[i], c0
1309  		if c0 < c1 {
1310  			isTypeS = true
1311  		} else if c0 > c1 && isTypeS {
1312  			isTypeS = false
1313  
1314  			// Populate inverse map.
1315  			j--
1316  			unmap[j] = int64(i + 1)
1317  		}
1318  	}
1319  
1320  	// Apply inverse map to subproblem suffix array.
1321  	sa = sa[:numLMS]
1322  	for i := 0; i < len(sa); i++ {
1323  		sa[i] = unmap[sa[i]]
1324  	}
1325  }
1326  
1327  func expand_8_64(text []byte, freq, bucket, sa []int64, numLMS int) {
1328  	bucketMax_8_64(text, freq, bucket)
1329  	bucket = bucket[:256] // eliminate bound check for bucket[c] below
1330  
1331  	// Loop backward through sa, always tracking
1332  	// the next index to populate from sa[:numLMS].
1333  	// When we get to one, populate it.
1334  	// Zero the rest of the slots; they have dead values in them.
1335  	x := numLMS - 1
1336  	saX := sa[x]
1337  	c := text[saX]
1338  	b := bucket[c] - 1
1339  	bucket[c] = b
1340  
1341  	for i := len(sa) - 1; i >= 0; i-- {
1342  		if i != int(b) {
1343  			sa[i] = 0
1344  			continue
1345  		}
1346  		sa[i] = saX
1347  
1348  		// Load next entry to put down (if any).
1349  		if x > 0 {
1350  			x--
1351  			saX = sa[x] // TODO bounds check
1352  			c = text[saX]
1353  			b = bucket[c] - 1
1354  			bucket[c] = b
1355  		}
1356  	}
1357  }
1358  
1359  func expand_32(text []int32, freq, bucket, sa []int32, numLMS int) {
1360  	bucketMax_32(text, freq, bucket)
1361  
1362  	// Loop backward through sa, always tracking
1363  	// the next index to populate from sa[:numLMS].
1364  	// When we get to one, populate it.
1365  	// Zero the rest of the slots; they have dead values in them.
1366  	x := numLMS - 1
1367  	saX := sa[x]
1368  	c := text[saX]
1369  	b := bucket[c] - 1
1370  	bucket[c] = b
1371  
1372  	for i := len(sa) - 1; i >= 0; i-- {
1373  		if i != int(b) {
1374  			sa[i] = 0
1375  			continue
1376  		}
1377  		sa[i] = saX
1378  
1379  		// Load next entry to put down (if any).
1380  		if x > 0 {
1381  			x--
1382  			saX = sa[x] // TODO bounds check
1383  			c = text[saX]
1384  			b = bucket[c] - 1
1385  			bucket[c] = b
1386  		}
1387  	}
1388  }
1389  
1390  func expand_64(text []int64, freq, bucket, sa []int64, numLMS int) {
1391  	bucketMax_64(text, freq, bucket)
1392  
1393  	// Loop backward through sa, always tracking
1394  	// the next index to populate from sa[:numLMS].
1395  	// When we get to one, populate it.
1396  	// Zero the rest of the slots; they have dead values in them.
1397  	x := numLMS - 1
1398  	saX := sa[x]
1399  	c := text[saX]
1400  	b := bucket[c] - 1
1401  	bucket[c] = b
1402  
1403  	for i := len(sa) - 1; i >= 0; i-- {
1404  		if i != int(b) {
1405  			sa[i] = 0
1406  			continue
1407  		}
1408  		sa[i] = saX
1409  
1410  		// Load next entry to put down (if any).
1411  		if x > 0 {
1412  			x--
1413  			saX = sa[x] // TODO bounds check
1414  			c = text[saX]
1415  			b = bucket[c] - 1
1416  			bucket[c] = b
1417  		}
1418  	}
1419  }
1420  
1421  func induceL_8_64(text []byte, sa, freq, bucket []int64) {
1422  	// Initialize positions for left side of character buckets.
1423  	bucketMin_8_64(text, freq, bucket)
1424  	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
1425  
1426  	// This scan is similar to the one in induceSubL_8_64 above.
1427  	// That one arranges to clear all but the leftmost L-type indexes.
1428  	// This scan leaves all the L-type indexes and the original S-type
1429  	// indexes, but it negates the positive leftmost L-type indexes
1430  	// (the ones that induceS_8_64 needs to process).
1431  
1432  	// expand_8_64 left out the implicit entry sa[-1] == len(text),
1433  	// corresponding to the identified type-L index len(text)-1.
1434  	// Process it before the left-to-right scan of sa proper.
1435  	// See body in loop for commentary.
1436  	k := len(text) - 1
1437  	c0, c1 := text[k-1], text[k]
1438  	if c0 < c1 {
1439  		k = -k
1440  	}
1441  
1442  	// Cache recently used bucket index.
1443  	cB := c1
1444  	b := bucket[cB]
1445  	sa[b] = int64(k)
1446  	b++
1447  
1448  	for i := 0; i < len(sa); i++ {
1449  		j := int(sa[i])
1450  		if j <= 0 {
1451  			// Skip empty or negated entry (including negated zero).
1452  			continue
1453  		}
1454  
1455  		// Index j was on work queue, meaning k := j-1 is L-type,
1456  		// so we can now place k correctly into sa.
1457  		// If k-1 is L-type, queue k for processing later in this loop.
1458  		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
1459  		// If k is zero, k-1 doesn't exist, so we only need to leave it
1460  		// for the caller. The caller can't tell the difference between
1461  		// an empty slot and a non-empty zero, but there's no need
1462  		// to distinguish them anyway: the final suffix array will end up
1463  		// with one zero somewhere, and that will be a real zero.
1464  		k := j - 1
1465  		c1 := text[k]
1466  		if k > 0 {
1467  			if c0 := text[k-1]; c0 < c1 {
1468  				k = -k
1469  			}
1470  		}
1471  
1472  		if cB != c1 {
1473  			bucket[cB] = b
1474  			cB = c1
1475  			b = bucket[cB]
1476  		}
1477  		sa[b] = int64(k)
1478  		b++
1479  	}
1480  }
1481  
1482  func induceL_32(text []int32, sa, freq, bucket []int32) {
1483  	// Initialize positions for left side of character buckets.
1484  	bucketMin_32(text, freq, bucket)
1485  
1486  	// This scan is similar to the one in induceSubL_32 above.
1487  	// That one arranges to clear all but the leftmost L-type indexes.
1488  	// This scan leaves all the L-type indexes and the original S-type
1489  	// indexes, but it negates the positive leftmost L-type indexes
1490  	// (the ones that induceS_32 needs to process).
1491  
1492  	// expand_32 left out the implicit entry sa[-1] == len(text),
1493  	// corresponding to the identified type-L index len(text)-1.
1494  	// Process it before the left-to-right scan of sa proper.
1495  	// See body in loop for commentary.
1496  	k := len(text) - 1
1497  	c0, c1 := text[k-1], text[k]
1498  	if c0 < c1 {
1499  		k = -k
1500  	}
1501  
1502  	// Cache recently used bucket index.
1503  	cB := c1
1504  	b := bucket[cB]
1505  	sa[b] = int32(k)
1506  	b++
1507  
1508  	for i := 0; i < len(sa); i++ {
1509  		j := int(sa[i])
1510  		if j <= 0 {
1511  			// Skip empty or negated entry (including negated zero).
1512  			continue
1513  		}
1514  
1515  		// Index j was on work queue, meaning k := j-1 is L-type,
1516  		// so we can now place k correctly into sa.
1517  		// If k-1 is L-type, queue k for processing later in this loop.
1518  		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
1519  		// If k is zero, k-1 doesn't exist, so we only need to leave it
1520  		// for the caller. The caller can't tell the difference between
1521  		// an empty slot and a non-empty zero, but there's no need
1522  		// to distinguish them anyway: the final suffix array will end up
1523  		// with one zero somewhere, and that will be a real zero.
1524  		k := j - 1
1525  		c1 := text[k]
1526  		if k > 0 {
1527  			if c0 := text[k-1]; c0 < c1 {
1528  				k = -k
1529  			}
1530  		}
1531  
1532  		if cB != c1 {
1533  			bucket[cB] = b
1534  			cB = c1
1535  			b = bucket[cB]
1536  		}
1537  		sa[b] = int32(k)
1538  		b++
1539  	}
1540  }
1541  
1542  func induceL_64(text []int64, sa, freq, bucket []int64) {
1543  	// Initialize positions for left side of character buckets.
1544  	bucketMin_64(text, freq, bucket)
1545  
1546  	// This scan is similar to the one in induceSubL_64 above.
1547  	// That one arranges to clear all but the leftmost L-type indexes.
1548  	// This scan leaves all the L-type indexes and the original S-type
1549  	// indexes, but it negates the positive leftmost L-type indexes
1550  	// (the ones that induceS_64 needs to process).
1551  
1552  	// expand_64 left out the implicit entry sa[-1] == len(text),
1553  	// corresponding to the identified type-L index len(text)-1.
1554  	// Process it before the left-to-right scan of sa proper.
1555  	// See body in loop for commentary.
1556  	k := len(text) - 1
1557  	c0, c1 := text[k-1], text[k]
1558  	if c0 < c1 {
1559  		k = -k
1560  	}
1561  
1562  	// Cache recently used bucket index.
1563  	cB := c1
1564  	b := bucket[cB]
1565  	sa[b] = int64(k)
1566  	b++
1567  
1568  	for i := 0; i < len(sa); i++ {
1569  		j := int(sa[i])
1570  		if j <= 0 {
1571  			// Skip empty or negated entry (including negated zero).
1572  			continue
1573  		}
1574  
1575  		// Index j was on work queue, meaning k := j-1 is L-type,
1576  		// so we can now place k correctly into sa.
1577  		// If k-1 is L-type, queue k for processing later in this loop.
1578  		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
1579  		// If k is zero, k-1 doesn't exist, so we only need to leave it
1580  		// for the caller. The caller can't tell the difference between
1581  		// an empty slot and a non-empty zero, but there's no need
1582  		// to distinguish them anyway: the final suffix array will end up
1583  		// with one zero somewhere, and that will be a real zero.
1584  		k := j - 1
1585  		c1 := text[k]
1586  		if k > 0 {
1587  			if c0 := text[k-1]; c0 < c1 {
1588  				k = -k
1589  			}
1590  		}
1591  
1592  		if cB != c1 {
1593  			bucket[cB] = b
1594  			cB = c1
1595  			b = bucket[cB]
1596  		}
1597  		sa[b] = int64(k)
1598  		b++
1599  	}
1600  }
1601  
1602  func induceS_8_64(text []byte, sa, freq, bucket []int64) {
1603  	// Initialize positions for right side of character buckets.
1604  	bucketMax_8_64(text, freq, bucket)
1605  	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
1606  
1607  	cB := byte(0)
1608  	b := bucket[cB]
1609  
1610  	for i := len(sa) - 1; i >= 0; i-- {
1611  		j := int(sa[i])
1612  		if j >= 0 {
1613  			// Skip non-flagged entry.
1614  			// (This loop can't see an empty entry; 0 means the real zero index.)
1615  			continue
1616  		}
1617  
1618  		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
1619  		j = -j
1620  		sa[i] = int64(j)
1621  
1622  		// Index j was on work queue (encoded as -j but now decoded),
1623  		// meaning k := j-1 is L-type,
1624  		// so we can now place k correctly into sa.
1625  		// If k-1 is S-type, queue -k for processing later in this loop.
1626  		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
1627  		// If k is zero, k-1 doesn't exist, so we only need to leave it
1628  		// for the caller.
1629  		k := j - 1
1630  		c1 := text[k]
1631  		if k > 0 {
1632  			if c0 := text[k-1]; c0 <= c1 {
1633  				k = -k
1634  			}
1635  		}
1636  
1637  		if cB != c1 {
1638  			bucket[cB] = b
1639  			cB = c1
1640  			b = bucket[cB]
1641  		}
1642  		b--
1643  		sa[b] = int64(k)
1644  	}
1645  }
1646  
1647  func induceS_32(text []int32, sa, freq, bucket []int32) {
1648  	// Initialize positions for right side of character buckets.
1649  	bucketMax_32(text, freq, bucket)
1650  
1651  	cB := int32(0)
1652  	b := bucket[cB]
1653  
1654  	for i := len(sa) - 1; i >= 0; i-- {
1655  		j := int(sa[i])
1656  		if j >= 0 {
1657  			// Skip non-flagged entry.
1658  			// (This loop can't see an empty entry; 0 means the real zero index.)
1659  			continue
1660  		}
1661  
1662  		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
1663  		j = -j
1664  		sa[i] = int32(j)
1665  
1666  		// Index j was on work queue (encoded as -j but now decoded),
1667  		// meaning k := j-1 is L-type,
1668  		// so we can now place k correctly into sa.
1669  		// If k-1 is S-type, queue -k for processing later in this loop.
1670  		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
1671  		// If k is zero, k-1 doesn't exist, so we only need to leave it
1672  		// for the caller.
1673  		k := j - 1
1674  		c1 := text[k]
1675  		if k > 0 {
1676  			if c0 := text[k-1]; c0 <= c1 {
1677  				k = -k
1678  			}
1679  		}
1680  
1681  		if cB != c1 {
1682  			bucket[cB] = b
1683  			cB = c1
1684  			b = bucket[cB]
1685  		}
1686  		b--
1687  		sa[b] = int32(k)
1688  	}
1689  }
1690  
1691  func induceS_64(text []int64, sa, freq, bucket []int64) {
1692  	// Initialize positions for right side of character buckets.
1693  	bucketMax_64(text, freq, bucket)
1694  
1695  	cB := int64(0)
1696  	b := bucket[cB]
1697  
1698  	for i := len(sa) - 1; i >= 0; i-- {
1699  		j := int(sa[i])
1700  		if j >= 0 {
1701  			// Skip non-flagged entry.
1702  			// (This loop can't see an empty entry; 0 means the real zero index.)
1703  			continue
1704  		}
1705  
1706  		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
1707  		j = -j
1708  		sa[i] = int64(j)
1709  
1710  		// Index j was on work queue (encoded as -j but now decoded),
1711  		// meaning k := j-1 is L-type,
1712  		// so we can now place k correctly into sa.
1713  		// If k-1 is S-type, queue -k for processing later in this loop.
1714  		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
1715  		// If k is zero, k-1 doesn't exist, so we only need to leave it
1716  		// for the caller.
1717  		k := j - 1
1718  		c1 := text[k]
1719  		if k > 0 {
1720  			if c0 := text[k-1]; c0 <= c1 {
1721  				k = -k
1722  			}
1723  		}
1724  
1725  		if cB != c1 {
1726  			bucket[cB] = b
1727  			cB = c1
1728  			b = bucket[cB]
1729  		}
1730  		b--
1731  		sa[b] = int64(k)
1732  	}
1733  }
1734