gc.mx raw

   1  // Copyright 2017 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package trace
   6  
   7  import (
   8  	"container/heap"
   9  	"math"
  10  	"sort"
  11  	"bytes"
  12  	"time"
  13  )
  14  
  15  // MutatorUtil is a change in mutator utilization at a particular
  16  // time. Mutator utilization functions are represented as a
  17  // time-ordered []MutatorUtil.
  18  type MutatorUtil struct {
  19  	Time int64
  20  	// Util is the mean mutator utilization starting at Time. This
  21  	// is in the range [0, 1].
  22  	Util float64
  23  }
  24  
  25  // UtilFlags controls the behavior of MutatorUtilization.
  26  type UtilFlags int
  27  
  28  const (
  29  	// UtilSTW means utilization should account for STW events.
  30  	// This includes non-GC STW events, which are typically user-requested.
  31  	UtilSTW UtilFlags = 1 << iota
  32  	// UtilBackground means utilization should account for
  33  	// background mark workers.
  34  	UtilBackground
  35  	// UtilAssist means utilization should account for mark
  36  	// assists.
  37  	UtilAssist
  38  	// UtilSweep means utilization should account for sweeping.
  39  	UtilSweep
  40  
  41  	// UtilPerProc means each P should be given a separate
  42  	// utilization function. Otherwise, there is a single function
  43  	// and each P is given a fraction of the utilization.
  44  	UtilPerProc
  45  )
  46  
  47  // MutatorUtilizationV2 returns a set of mutator utilization functions
  48  // for the given v2 trace, passed as an io.Reader. Each function will
  49  // always end with 0 utilization. The bounds of each function are implicit
  50  // in the first and last event; outside of these bounds each function is
  51  // undefined.
  52  //
  53  // If the UtilPerProc flag is not given, this always returns a single
  54  // utilization function. Otherwise, it returns one function per P.
  55  func MutatorUtilizationV2(events []Event, flags UtilFlags) [][]MutatorUtil {
  56  	// Set up a bunch of analysis state.
  57  	type perP struct {
  58  		// gc > 0 indicates that GC is active on this P.
  59  		gc int
  60  		// series the logical series number for this P. This
  61  		// is necessary because Ps may be removed and then
  62  		// re-added, and then the new P needs a new series.
  63  		series int
  64  	}
  65  	type procsCount struct {
  66  		// time at which procs changed.
  67  		time int64
  68  		// n is the number of procs at that point.
  69  		n int
  70  	}
  71  	out := [][]MutatorUtil{}
  72  	stw := 0
  73  	ps := []perP{}
  74  	inGC := map[GoID]bool{}
  75  	states := map[GoID]GoState{}
  76  	bgMark := map[GoID]bool{}
  77  	procs := []procsCount{}
  78  	nSync := 0
  79  
  80  	// Helpers.
  81  	handleSTW := func(r Range) bool {
  82  		return flags&UtilSTW != 0 && isGCSTW(r)
  83  	}
  84  	handleMarkAssist := func(r Range) bool {
  85  		return flags&UtilAssist != 0 && isGCMarkAssist(r)
  86  	}
  87  	handleSweep := func(r Range) bool {
  88  		return flags&UtilSweep != 0 && isGCSweep(r)
  89  	}
  90  
  91  	// Iterate through the trace, tracking mutator utilization.
  92  	var lastEv *Event
  93  	for i := range events {
  94  		ev := &events[i]
  95  		lastEv = ev
  96  
  97  		// Process the event.
  98  		switch ev.Kind() {
  99  		case EventSync:
 100  			nSync = ev.Sync().N
 101  		case EventMetric:
 102  			m := ev.Metric()
 103  			if m.Name != "/sched/gomaxprocs:threads" {
 104  				break
 105  			}
 106  			gomaxprocs := int(m.Value.Uint64())
 107  			if len(ps) > gomaxprocs {
 108  				if flags&UtilPerProc != 0 {
 109  					// End each P's series.
 110  					for _, p := range ps[gomaxprocs:] {
 111  						out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), 0})
 112  					}
 113  				}
 114  				ps = ps[:gomaxprocs]
 115  			}
 116  			for len(ps) < gomaxprocs {
 117  				// Start new P's series.
 118  				series := 0
 119  				if flags&UtilPerProc != 0 || len(out) == 0 {
 120  					series = len(out)
 121  					out = append(out, []MutatorUtil{{int64(ev.Time()), 1}})
 122  				}
 123  				ps = append(ps, perP{series: series})
 124  			}
 125  			if len(procs) == 0 || gomaxprocs != procs[len(procs)-1].n {
 126  				procs = append(procs, procsCount{time: int64(ev.Time()), n: gomaxprocs})
 127  			}
 128  		}
 129  		if len(ps) == 0 {
 130  			// We can't start doing any analysis until we see what GOMAXPROCS is.
 131  			// It will show up very early in the trace, but we need to be robust to
 132  			// something else being emitted beforehand.
 133  			continue
 134  		}
 135  
 136  		switch ev.Kind() {
 137  		case EventRangeActive:
 138  			if nSync > 1 {
 139  				// If we've seen a full generation, then we can be sure we're not finding out
 140  				// about something late; we have complete information after that point, and these
 141  				// active events will just be redundant.
 142  				break
 143  			}
 144  			// This range is active back to the start of the trace. We're failing to account
 145  			// for this since we just found out about it now. Fix up the mutator utilization.
 146  			//
 147  			// N.B. A trace can't start during a STW, so we don't handle it here.
 148  			r := ev.Range()
 149  			switch {
 150  			case handleMarkAssist(r):
 151  				if states[ev.Goroutine()].Executing() {
 152  					// This G has been in a mark assist *and running on its P* since the start
 153  					// of the trace. Handle same as sweep below.
 154  					if flags&UtilPerProc == 0 {
 155  						// Subtract out 1/gomaxprocs mutator utilization for all time periods
 156  						// from the beginning of the trace until now.
 157  						mi, pi := 0, 0
 158  						for mi < len(out[0]) {
 159  							if pi < len(procs)-1 && procs[pi+1].time < out[0][mi].Time {
 160  								pi++
 161  								continue
 162  							}
 163  							out[0][mi].Util -= float64(1) / float64(procs[pi].n)
 164  							if out[0][mi].Util < 0 {
 165  								out[0][mi].Util = 0
 166  							}
 167  							mi++
 168  						}
 169  					}
 170  				}
 171  			case handleSweep(r):
 172  				// This P has been in sweep in the start of the trace.
 173  				//
 174  				// We don't need to do anything if UtilPerProc is set. If we get an event like
 175  				// this for a running P, it must show up the first time a P is mentioned. Therefore,
 176  				// this P won't actually have any MutatorUtils on its list yet.
 177  				//
 178  				// However, if UtilPerProc isn't set, then we probably have data from other procs
 179  				// and from previous events. We need to fix that up.
 180  				if flags&UtilPerProc != 0 {
 181  					break
 182  				}
 183  				// Subtract out 1/gomaxprocs mutator utilization for all time periods
 184  				// from the beginning of the trace until now.
 185  				mi, pi := 0, 0
 186  				for mi < len(out[0]) {
 187  					if pi < len(procs)-1 && procs[pi+1].time < out[0][mi].Time {
 188  						pi++
 189  						continue
 190  					}
 191  					out[0][mi].Util -= float64(1) / float64(procs[pi].n)
 192  					if out[0][mi].Util < 0 {
 193  						out[0][mi].Util = 0
 194  					}
 195  					mi++
 196  				}
 197  			}
 198  			// After accounting for the portion we missed, this just acts like the
 199  			// beginning of a new range.
 200  			if handleSTW(r) {
 201  				stw++
 202  			} else if handleSweep(r) {
 203  				ps[ev.Proc()].gc++
 204  			} else if handleMarkAssist(r) {
 205  				ps[ev.Proc()].gc++
 206  				if g := r.Scope.Goroutine(); g != NoGoroutine {
 207  					inGC[g] = true
 208  				}
 209  			}
 210  		case EventRangeBegin:
 211  			r := ev.Range()
 212  			if handleSTW(r) {
 213  				stw++
 214  			} else if handleSweep(r) {
 215  				ps[ev.Proc()].gc++
 216  			} else if handleMarkAssist(r) {
 217  				ps[ev.Proc()].gc++
 218  				if g := r.Scope.Goroutine(); g != NoGoroutine {
 219  					inGC[g] = true
 220  				}
 221  			}
 222  		case EventRangeEnd:
 223  			r := ev.Range()
 224  			if handleSTW(r) {
 225  				stw--
 226  			} else if handleSweep(r) {
 227  				ps[ev.Proc()].gc--
 228  			} else if handleMarkAssist(r) {
 229  				ps[ev.Proc()].gc--
 230  				if g := r.Scope.Goroutine(); g != NoGoroutine {
 231  					delete(inGC, g)
 232  				}
 233  			}
 234  		case EventStateTransition:
 235  			st := ev.StateTransition()
 236  			if st.Resource.Kind != ResourceGoroutine {
 237  				break
 238  			}
 239  			old, new := st.Goroutine()
 240  			g := st.Resource.Goroutine()
 241  			if inGC[g] || bgMark[g] {
 242  				if !old.Executing() && new.Executing() {
 243  					// Started running while doing GC things.
 244  					ps[ev.Proc()].gc++
 245  				} else if old.Executing() && !new.Executing() {
 246  					// Stopped running while doing GC things.
 247  					ps[ev.Proc()].gc--
 248  				}
 249  			}
 250  			states[g] = new
 251  		case EventLabel:
 252  			l := ev.Label()
 253  			if flags&UtilBackground != 0 && bytes.HasPrefix(l.Label, "GC ") && l.Label != "GC (idle)" {
 254  				// Background mark worker.
 255  				//
 256  				// If we're in per-proc mode, we don't
 257  				// count dedicated workers because
 258  				// they kick all of the goroutines off
 259  				// that P, so don't directly
 260  				// contribute to goroutine latency.
 261  				if !(flags&UtilPerProc != 0 && l.Label == "GC (dedicated)") {
 262  					bgMark[ev.Goroutine()] = true
 263  					ps[ev.Proc()].gc++
 264  				}
 265  			}
 266  		}
 267  
 268  		if flags&UtilPerProc == 0 {
 269  			// Compute the current average utilization.
 270  			if len(ps) == 0 {
 271  				continue
 272  			}
 273  			gcPs := 0
 274  			if stw > 0 {
 275  				gcPs = len(ps)
 276  			} else {
 277  				for i := range ps {
 278  					if ps[i].gc > 0 {
 279  						gcPs++
 280  					}
 281  				}
 282  			}
 283  			mu := MutatorUtil{int64(ev.Time()), 1 - float64(gcPs)/float64(len(ps))}
 284  
 285  			// Record the utilization change. (Since
 286  			// len(ps) == len(out), we know len(out) > 0.)
 287  			out[0] = addUtil(out[0], mu)
 288  		} else {
 289  			// Check for per-P utilization changes.
 290  			for i := range ps {
 291  				p := &ps[i]
 292  				util := 1.0
 293  				if stw > 0 || p.gc > 0 {
 294  					util = 0.0
 295  				}
 296  				out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), util})
 297  			}
 298  		}
 299  	}
 300  
 301  	// No events in the stream.
 302  	if lastEv == nil {
 303  		return nil
 304  	}
 305  
 306  	// Add final 0 utilization event to any remaining series. This
 307  	// is important to mark the end of the trace. The exact value
 308  	// shouldn't matter since no window should extend beyond this,
 309  	// but using 0 is symmetric with the start of the trace.
 310  	mu := MutatorUtil{int64(lastEv.Time()), 0}
 311  	for i := range ps {
 312  		out[ps[i].series] = addUtil(out[ps[i].series], mu)
 313  	}
 314  	return out
 315  }
 316  
 317  func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
 318  	if len(util) > 0 {
 319  		if mu.Util == util[len(util)-1].Util {
 320  			// No change.
 321  			return util
 322  		}
 323  		if mu.Time == util[len(util)-1].Time {
 324  			// Take the lowest utilization at a time stamp.
 325  			if mu.Util < util[len(util)-1].Util {
 326  				util[len(util)-1] = mu
 327  			}
 328  			return util
 329  		}
 330  	}
 331  	return append(util, mu)
 332  }
 333  
 334  // totalUtil is total utilization, measured in nanoseconds. This is a
 335  // separate type primarily to distinguish it from mean utilization,
 336  // which is also a float64.
 337  type totalUtil float64
 338  
 339  func totalUtilOf(meanUtil float64, dur int64) totalUtil {
 340  	return totalUtil(meanUtil * float64(dur))
 341  }
 342  
 343  // mean returns the mean utilization over dur.
 344  func (u totalUtil) mean(dur time.Duration) float64 {
 345  	return float64(u) / float64(dur)
 346  }
 347  
 348  // An MMUCurve is the minimum mutator utilization curve across
 349  // multiple window sizes.
 350  type MMUCurve struct {
 351  	series []mmuSeries
 352  }
 353  
 354  type mmuSeries struct {
 355  	util []MutatorUtil
 356  	// sums[j] is the cumulative sum of util[:j].
 357  	sums []totalUtil
 358  	// bands summarizes util in non-overlapping bands of duration
 359  	// bandDur.
 360  	bands []mmuBand
 361  	// bandDur is the duration of each band.
 362  	bandDur int64
 363  }
 364  
 365  type mmuBand struct {
 366  	// minUtil is the minimum instantaneous mutator utilization in
 367  	// this band.
 368  	minUtil float64
 369  	// cumUtil is the cumulative total mutator utilization between
 370  	// time 0 and the left edge of this band.
 371  	cumUtil totalUtil
 372  
 373  	// integrator is the integrator for the left edge of this
 374  	// band.
 375  	integrator integrator
 376  }
 377  
 378  // NewMMUCurve returns an MMU curve for the given mutator utilization
 379  // function.
 380  func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
 381  	series := []mmuSeries{:len(utils)}
 382  	for i, util := range utils {
 383  		series[i] = newMMUSeries(util)
 384  	}
 385  	return &MMUCurve{series}
 386  }
 387  
 388  // bandsPerSeries is the number of bands to divide each series into.
 389  // This is only changed by tests.
 390  var bandsPerSeries = 1000
 391  
 392  func newMMUSeries(util []MutatorUtil) mmuSeries {
 393  	// Compute cumulative sum.
 394  	sums := []totalUtil{:len(util)}
 395  	var prev MutatorUtil
 396  	var sum totalUtil
 397  	for j, u := range util {
 398  		sum += totalUtilOf(prev.Util, u.Time-prev.Time)
 399  		sums[j] = sum
 400  		prev = u
 401  	}
 402  
 403  	// Divide the utilization curve up into equal size
 404  	// non-overlapping "bands" and compute a summary for each of
 405  	// these bands.
 406  	//
 407  	// Compute the duration of each band.
 408  	numBands := bandsPerSeries
 409  	if numBands > len(util) {
 410  		// There's no point in having lots of bands if there
 411  		// aren't many events.
 412  		numBands = len(util)
 413  	}
 414  	dur := util[len(util)-1].Time - util[0].Time
 415  	bandDur := (dur + int64(numBands) - 1) / int64(numBands)
 416  	if bandDur < 1 {
 417  		bandDur = 1
 418  	}
 419  	// Compute the bands. There are numBands+1 bands in order to
 420  	// record the final cumulative sum.
 421  	bands := []mmuBand{:numBands+1}
 422  	s := mmuSeries{util, sums, bands, bandDur}
 423  	leftSum := integrator{&s, 0}
 424  	for i := range bands {
 425  		startTime, endTime := s.bandTime(i)
 426  		cumUtil := leftSum.advance(startTime)
 427  		predIdx := leftSum.pos
 428  		minUtil := 1.0
 429  		for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
 430  			minUtil = math.Min(minUtil, util[i].Util)
 431  		}
 432  		bands[i] = mmuBand{minUtil, cumUtil, leftSum}
 433  	}
 434  
 435  	return s
 436  }
 437  
 438  func (s *mmuSeries) bandTime(i int) (start, end int64) {
 439  	start = int64(i)*s.bandDur + s.util[0].Time
 440  	end = start + s.bandDur
 441  	return
 442  }
 443  
 444  type bandUtil struct {
 445  	// Utilization series index
 446  	series int
 447  	// Band index
 448  	i int
 449  	// Lower bound of mutator utilization for all windows
 450  	// with a left edge in this band.
 451  	utilBound float64
 452  }
 453  
 454  type bandUtilHeap []bandUtil
 455  
 456  func (h bandUtilHeap) Len() int {
 457  	return len(h)
 458  }
 459  
 460  func (h bandUtilHeap) Less(i, j int) bool {
 461  	return h[i].utilBound < h[j].utilBound
 462  }
 463  
 464  func (h bandUtilHeap) Swap(i, j int) {
 465  	h[i], h[j] = h[j], h[i]
 466  }
 467  
 468  func (h *bandUtilHeap) Push(x any) {
 469  	*h = append(*h, x.(bandUtil))
 470  }
 471  
 472  func (h *bandUtilHeap) Pop() any {
 473  	x := (*h)[len(*h)-1]
 474  	*h = (*h)[:len(*h)-1]
 475  	return x
 476  }
 477  
 478  // UtilWindow is a specific window at Time.
 479  type UtilWindow struct {
 480  	Time int64
 481  	// MutatorUtil is the mean mutator utilization in this window.
 482  	MutatorUtil float64
 483  }
 484  
 485  type utilHeap []UtilWindow
 486  
 487  func (h utilHeap) Len() int {
 488  	return len(h)
 489  }
 490  
 491  func (h utilHeap) Less(i, j int) bool {
 492  	if h[i].MutatorUtil != h[j].MutatorUtil {
 493  		return h[i].MutatorUtil > h[j].MutatorUtil
 494  	}
 495  	return h[i].Time > h[j].Time
 496  }
 497  
 498  func (h utilHeap) Swap(i, j int) {
 499  	h[i], h[j] = h[j], h[i]
 500  }
 501  
 502  func (h *utilHeap) Push(x any) {
 503  	*h = append(*h, x.(UtilWindow))
 504  }
 505  
 506  func (h *utilHeap) Pop() any {
 507  	x := (*h)[len(*h)-1]
 508  	*h = (*h)[:len(*h)-1]
 509  	return x
 510  }
 511  
 512  // An accumulator takes a windowed mutator utilization function and
 513  // tracks various statistics for that function.
 514  type accumulator struct {
 515  	mmu float64
 516  
 517  	// bound is the mutator utilization bound where adding any
 518  	// mutator utilization above this bound cannot affect the
 519  	// accumulated statistics.
 520  	bound float64
 521  
 522  	// Worst N window tracking
 523  	nWorst int
 524  	wHeap  utilHeap
 525  
 526  	// Mutator utilization distribution tracking
 527  	mud *mud
 528  	// preciseMass is the distribution mass that must be precise
 529  	// before accumulation is stopped.
 530  	preciseMass float64
 531  	// lastTime and lastMU are the previous point added to the
 532  	// windowed mutator utilization function.
 533  	lastTime int64
 534  	lastMU   float64
 535  }
 536  
 537  // resetTime declares a discontinuity in the windowed mutator
 538  // utilization function by resetting the current time.
 539  func (acc *accumulator) resetTime() {
 540  	// This only matters for distribution collection, since that's
 541  	// the only thing that depends on the progression of the
 542  	// windowed mutator utilization function.
 543  	acc.lastTime = math.MaxInt64
 544  }
 545  
 546  // addMU adds a point to the windowed mutator utilization function at
 547  // (time, mu). This must be called for monotonically increasing values
 548  // of time.
 549  //
 550  // It returns true if further calls to addMU would be pointless.
 551  func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
 552  	if mu < acc.mmu {
 553  		acc.mmu = mu
 554  	}
 555  	acc.bound = acc.mmu
 556  
 557  	if acc.nWorst == 0 {
 558  		// If the minimum has reached zero, it can't go any
 559  		// lower, so we can stop early.
 560  		return mu == 0
 561  	}
 562  
 563  	// Consider adding this window to the n worst.
 564  	if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
 565  		// This window is lower than the K'th worst window.
 566  		//
 567  		// Check if there's any overlapping window
 568  		// already in the heap and keep whichever is
 569  		// worse.
 570  		for i, ui := range acc.wHeap {
 571  			if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
 572  				if ui.MutatorUtil <= mu {
 573  					// Keep the first window.
 574  					goto keep
 575  				} else {
 576  					// Replace it with this window.
 577  					heap.Remove(&acc.wHeap, i)
 578  					break
 579  				}
 580  			}
 581  		}
 582  
 583  		heap.Push(&acc.wHeap, UtilWindow{time, mu})
 584  		if len(acc.wHeap) > acc.nWorst {
 585  			heap.Pop(&acc.wHeap)
 586  		}
 587  	keep:
 588  	}
 589  
 590  	if len(acc.wHeap) < acc.nWorst {
 591  		// We don't have N windows yet, so keep accumulating.
 592  		acc.bound = 1.0
 593  	} else {
 594  		// Anything above the least worst window has no effect.
 595  		acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
 596  	}
 597  
 598  	if acc.mud != nil {
 599  		if acc.lastTime != math.MaxInt64 {
 600  			// Update distribution.
 601  			acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
 602  		}
 603  		acc.lastTime, acc.lastMU = time, mu
 604  		if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
 605  			acc.bound = math.Max(acc.bound, mudBound)
 606  		} else {
 607  			// We haven't accumulated enough total precise
 608  			// mass yet to even reach our goal, so keep
 609  			// accumulating.
 610  			acc.bound = 1
 611  		}
 612  		// It's not worth checking percentiles every time, so
 613  		// just keep accumulating this band.
 614  		return false
 615  	}
 616  
 617  	// If we've found enough 0 utilizations, we can stop immediately.
 618  	return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
 619  }
 620  
 621  // MMU returns the minimum mutator utilization for the given time
 622  // window. This is the minimum utilization for all windows of this
 623  // duration across the execution. The returned value is in the range
 624  // [0, 1].
 625  func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
 626  	acc := accumulator{mmu: 1.0, bound: 1.0}
 627  	c.mmu(window, &acc)
 628  	return acc.mmu
 629  }
 630  
 631  // Examples returns n specific examples of the lowest mutator
 632  // utilization for the given window size. The returned windows will be
 633  // disjoint (otherwise there would be a huge number of
 634  // mostly-overlapping windows at the single lowest point). There are
 635  // no guarantees on which set of disjoint windows this returns.
 636  func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
 637  	acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
 638  	c.mmu(window, &acc)
 639  	sort.Sort(sort.Reverse(acc.wHeap))
 640  	return ([]UtilWindow)(acc.wHeap)
 641  }
 642  
 643  // MUD returns mutator utilization distribution quantiles for the
 644  // given window size.
 645  //
 646  // The mutator utilization distribution is the distribution of mean
 647  // mutator utilization across all windows of the given window size in
 648  // the trace.
 649  //
 650  // The minimum mutator utilization is the minimum (0th percentile) of
 651  // this distribution. (However, if only the minimum is desired, it's
 652  // more efficient to use the MMU method.)
 653  func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
 654  	if len(quantiles) == 0 {
 655  		return []float64{}
 656  	}
 657  
 658  	// Each unrefined band contributes a known total mass to the
 659  	// distribution (bandDur except at the end), but in an unknown
 660  	// way. However, we know that all the mass it contributes must
 661  	// be at or above its worst-case mean mutator utilization.
 662  	//
 663  	// Hence, we refine bands until the highest desired
 664  	// distribution quantile is less than the next worst-case mean
 665  	// mutator utilization. At this point, all further
 666  	// contributions to the distribution must be beyond the
 667  	// desired quantile and hence cannot affect it.
 668  	//
 669  	// First, find the highest desired distribution quantile.
 670  	maxQ := quantiles[0]
 671  	for _, q := range quantiles {
 672  		if q > maxQ {
 673  			maxQ = q
 674  		}
 675  	}
 676  	// The distribution's mass is in units of time (it's not
 677  	// normalized because this would make it more annoying to
 678  	// account for future contributions of unrefined bands). The
 679  	// total final mass will be the duration of the trace itself
 680  	// minus the window size. Using this, we can compute the mass
 681  	// corresponding to quantile maxQ.
 682  	var duration int64
 683  	for _, s := range c.series {
 684  		duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
 685  		if duration1 >= int64(window) {
 686  			duration += duration1 - int64(window)
 687  		}
 688  	}
 689  	qMass := float64(duration) * maxQ
 690  
 691  	// Accumulate the MUD until we have precise information for
 692  	// everything to the left of qMass.
 693  	acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: &mud{}}
 694  	acc.mud.setTrackMass(qMass)
 695  	c.mmu(window, &acc)
 696  
 697  	// Evaluate the quantiles on the accumulated MUD.
 698  	out := []float64{:len(quantiles)}
 699  	for i := range out {
 700  		mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
 701  		if math.IsNaN(mu) {
 702  			// There are a few legitimate ways this can
 703  			// happen:
 704  			//
 705  			// 1. If the window is the full trace
 706  			// duration, then the windowed MU function is
 707  			// only defined at a single point, so the MU
 708  			// distribution is not well-defined.
 709  			//
 710  			// 2. If there are no events, then the MU
 711  			// distribution has no mass.
 712  			//
 713  			// Either way, all of the quantiles will have
 714  			// converged toward the MMU at this point.
 715  			mu = acc.mmu
 716  		}
 717  		out[i] = mu
 718  	}
 719  	return out
 720  }
 721  
 722  func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
 723  	if window <= 0 {
 724  		acc.mmu = 0
 725  		return
 726  	}
 727  
 728  	var bandU bandUtilHeap
 729  	windows := []time.Duration{:len(c.series)}
 730  	for i, s := range c.series {
 731  		windows[i] = window
 732  		if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
 733  			windows[i] = max
 734  		}
 735  
 736  		bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
 737  		if bandU == nil {
 738  			bandU = bandU1
 739  		} else {
 740  			bandU = append(bandU, bandU1...)
 741  		}
 742  	}
 743  
 744  	// Process bands from lowest utilization bound to highest.
 745  	heap.Init(&bandU)
 746  
 747  	// Refine each band into a precise window and MMU until
 748  	// refining the next lowest band can no longer affect the MMU
 749  	// or windows.
 750  	for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
 751  		i := bandU[0].series
 752  		c.series[i].bandMMU(bandU[0].i, windows[i], acc)
 753  		heap.Pop(&bandU)
 754  	}
 755  }
 756  
 757  func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
 758  	// For each band, compute the worst-possible total mutator
 759  	// utilization for all windows that start in that band.
 760  
 761  	// minBands is the minimum number of bands a window can span
 762  	// and maxBands is the maximum number of bands a window can
 763  	// span in any alignment.
 764  	minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
 765  	maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
 766  	if window > 1 && maxBands < 2 {
 767  		panic("maxBands < 2")
 768  	}
 769  	tailDur := int64(window) % c.bandDur
 770  	nUtil := len(c.bands) - maxBands + 1
 771  	if nUtil < 0 {
 772  		nUtil = 0
 773  	}
 774  	bandU := []bandUtil{:nUtil}
 775  	for i := range bandU {
 776  		// To compute the worst-case MU, we assume the minimum
 777  		// for any bands that are only partially overlapped by
 778  		// some window and the mean for any bands that are
 779  		// completely covered by all windows.
 780  		var util totalUtil
 781  
 782  		// Find the lowest and second lowest of the partial
 783  		// bands.
 784  		l := c.bands[i].minUtil
 785  		r1 := c.bands[i+minBands-1].minUtil
 786  		r2 := c.bands[i+maxBands-1].minUtil
 787  		minBand := math.Min(l, math.Min(r1, r2))
 788  		// Assume the worst window maximally overlaps the
 789  		// worst minimum and then the rest overlaps the second
 790  		// worst minimum.
 791  		if minBands == 1 {
 792  			util += totalUtilOf(minBand, int64(window))
 793  		} else {
 794  			util += totalUtilOf(minBand, c.bandDur)
 795  			midBand := 0.0
 796  			switch {
 797  			case minBand == l:
 798  				midBand = math.Min(r1, r2)
 799  			case minBand == r1:
 800  				midBand = math.Min(l, r2)
 801  			case minBand == r2:
 802  				midBand = math.Min(l, r1)
 803  			}
 804  			util += totalUtilOf(midBand, tailDur)
 805  		}
 806  
 807  		// Add the total mean MU of bands that are completely
 808  		// overlapped by all windows.
 809  		if minBands > 2 {
 810  			util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
 811  		}
 812  
 813  		bandU[i] = bandUtil{series, i, util.mean(window)}
 814  	}
 815  
 816  	return bandU
 817  }
 818  
 819  // bandMMU computes the precise minimum mutator utilization for
 820  // windows with a left edge in band bandIdx.
 821  func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
 822  	util := c.util
 823  
 824  	// We think of the mutator utilization over time as the
 825  	// box-filtered utilization function, which we call the
 826  	// "windowed mutator utilization function". The resulting
 827  	// function is continuous and piecewise linear (unless
 828  	// window==0, which we handle elsewhere), where the boundaries
 829  	// between segments occur when either edge of the window
 830  	// encounters a change in the instantaneous mutator
 831  	// utilization function. Hence, the minimum of this function
 832  	// will always occur when one of the edges of the window
 833  	// aligns with a utilization change, so these are the only
 834  	// points we need to consider.
 835  	//
 836  	// We compute the mutator utilization function incrementally
 837  	// by tracking the integral from t=0 to the left edge of the
 838  	// window and to the right edge of the window.
 839  	left := c.bands[bandIdx].integrator
 840  	right := left
 841  	time, endTime := c.bandTime(bandIdx)
 842  	if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
 843  		endTime = utilEnd
 844  	}
 845  	acc.resetTime()
 846  	for {
 847  		// Advance edges to time and time+window.
 848  		mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
 849  		if acc.addMU(time, mu, window) {
 850  			break
 851  		}
 852  		if time == endTime {
 853  			break
 854  		}
 855  
 856  		// The maximum slope of the windowed mutator
 857  		// utilization function is 1/window, so we can always
 858  		// advance the time by at least (mu - mmu) * window
 859  		// without dropping below mmu.
 860  		minTime := time + int64((mu-acc.bound)*float64(window))
 861  
 862  		// Advance the window to the next time where either
 863  		// the left or right edge of the window encounters a
 864  		// change in the utilization curve.
 865  		if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
 866  			time = t1
 867  		} else {
 868  			time = t2
 869  		}
 870  		if time < minTime {
 871  			time = minTime
 872  		}
 873  		if time >= endTime {
 874  			// For MMUs we could stop here, but for MUDs
 875  			// it's important that we span the entire
 876  			// band.
 877  			time = endTime
 878  		}
 879  	}
 880  }
 881  
 882  // An integrator tracks a position in a utilization function and
 883  // integrates it.
 884  type integrator struct {
 885  	u *mmuSeries
 886  	// pos is the index in u.util of the current time's non-strict
 887  	// predecessor.
 888  	pos int
 889  }
 890  
 891  // advance returns the integral of the utilization function from 0 to
 892  // time. advance must be called on monotonically increasing values of
 893  // times.
 894  func (in *integrator) advance(time int64) totalUtil {
 895  	util, pos := in.u.util, in.pos
 896  	// Advance pos until pos+1 is time's strict successor (making
 897  	// pos time's non-strict predecessor).
 898  	//
 899  	// Very often, this will be nearby, so we optimize that case,
 900  	// but it may be arbitrarily far away, so we handled that
 901  	// efficiently, too.
 902  	const maxSeq = 8
 903  	if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
 904  		// Nearby. Use a linear scan.
 905  		for pos+1 < len(util) && util[pos+1].Time <= time {
 906  			pos++
 907  		}
 908  	} else {
 909  		// Far. Binary search for time's strict successor.
 910  		l, r := pos, len(util)
 911  		for l < r {
 912  			h := int(uint(l+r) >> 1)
 913  			if util[h].Time <= time {
 914  				l = h + 1
 915  			} else {
 916  				r = h
 917  			}
 918  		}
 919  		pos = l - 1 // Non-strict predecessor.
 920  	}
 921  	in.pos = pos
 922  	var partial totalUtil
 923  	if time != util[pos].Time {
 924  		partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
 925  	}
 926  	return in.u.sums[pos] + partial
 927  }
 928  
 929  // next returns the smallest time t' > time of a change in the
 930  // utilization function.
 931  func (in *integrator) next(time int64) int64 {
 932  	for _, u := range in.u.util[in.pos:] {
 933  		if u.Time > time {
 934  			return u.Time
 935  		}
 936  	}
 937  	return 1<<63 - 1
 938  }
 939  
 940  func isGCSTW(r Range) bool {
 941  	return bytes.HasPrefix(r.Name, "stop-the-world") && bytes.Contains(r.Name, "GC")
 942  }
 943  
 944  func isGCMarkAssist(r Range) bool {
 945  	return r.Name == "GC mark assist"
 946  }
 947  
 948  func isGCSweep(r Range) bool {
 949  	return r.Name == "GC incremental sweep"
 950  }
 951