1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package trace
6 7 import (
8 "container/heap"
9 "math"
10 "sort"
11 "bytes"
12 "time"
13 )
14 15 // MutatorUtil is a change in mutator utilization at a particular
16 // time. Mutator utilization functions are represented as a
17 // time-ordered []MutatorUtil.
18 type MutatorUtil struct {
19 Time int64
20 // Util is the mean mutator utilization starting at Time. This
21 // is in the range [0, 1].
22 Util float64
23 }
24 25 // UtilFlags controls the behavior of MutatorUtilization.
26 type UtilFlags int
27 28 const (
29 // UtilSTW means utilization should account for STW events.
30 // This includes non-GC STW events, which are typically user-requested.
31 UtilSTW UtilFlags = 1 << iota
32 // UtilBackground means utilization should account for
33 // background mark workers.
34 UtilBackground
35 // UtilAssist means utilization should account for mark
36 // assists.
37 UtilAssist
38 // UtilSweep means utilization should account for sweeping.
39 UtilSweep
40 41 // UtilPerProc means each P should be given a separate
42 // utilization function. Otherwise, there is a single function
43 // and each P is given a fraction of the utilization.
44 UtilPerProc
45 )
46 47 // MutatorUtilizationV2 returns a set of mutator utilization functions
48 // for the given v2 trace, passed as an io.Reader. Each function will
49 // always end with 0 utilization. The bounds of each function are implicit
50 // in the first and last event; outside of these bounds each function is
51 // undefined.
52 //
53 // If the UtilPerProc flag is not given, this always returns a single
54 // utilization function. Otherwise, it returns one function per P.
55 func MutatorUtilizationV2(events []Event, flags UtilFlags) [][]MutatorUtil {
56 // Set up a bunch of analysis state.
57 type perP struct {
58 // gc > 0 indicates that GC is active on this P.
59 gc int
60 // series the logical series number for this P. This
61 // is necessary because Ps may be removed and then
62 // re-added, and then the new P needs a new series.
63 series int
64 }
65 type procsCount struct {
66 // time at which procs changed.
67 time int64
68 // n is the number of procs at that point.
69 n int
70 }
71 out := [][]MutatorUtil{}
72 stw := 0
73 ps := []perP{}
74 inGC := map[GoID]bool{}
75 states := map[GoID]GoState{}
76 bgMark := map[GoID]bool{}
77 procs := []procsCount{}
78 nSync := 0
79 80 // Helpers.
81 handleSTW := func(r Range) bool {
82 return flags&UtilSTW != 0 && isGCSTW(r)
83 }
84 handleMarkAssist := func(r Range) bool {
85 return flags&UtilAssist != 0 && isGCMarkAssist(r)
86 }
87 handleSweep := func(r Range) bool {
88 return flags&UtilSweep != 0 && isGCSweep(r)
89 }
90 91 // Iterate through the trace, tracking mutator utilization.
92 var lastEv *Event
93 for i := range events {
94 ev := &events[i]
95 lastEv = ev
96 97 // Process the event.
98 switch ev.Kind() {
99 case EventSync:
100 nSync = ev.Sync().N
101 case EventMetric:
102 m := ev.Metric()
103 if m.Name != "/sched/gomaxprocs:threads" {
104 break
105 }
106 gomaxprocs := int(m.Value.Uint64())
107 if len(ps) > gomaxprocs {
108 if flags&UtilPerProc != 0 {
109 // End each P's series.
110 for _, p := range ps[gomaxprocs:] {
111 out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), 0})
112 }
113 }
114 ps = ps[:gomaxprocs]
115 }
116 for len(ps) < gomaxprocs {
117 // Start new P's series.
118 series := 0
119 if flags&UtilPerProc != 0 || len(out) == 0 {
120 series = len(out)
121 out = append(out, []MutatorUtil{{int64(ev.Time()), 1}})
122 }
123 ps = append(ps, perP{series: series})
124 }
125 if len(procs) == 0 || gomaxprocs != procs[len(procs)-1].n {
126 procs = append(procs, procsCount{time: int64(ev.Time()), n: gomaxprocs})
127 }
128 }
129 if len(ps) == 0 {
130 // We can't start doing any analysis until we see what GOMAXPROCS is.
131 // It will show up very early in the trace, but we need to be robust to
132 // something else being emitted beforehand.
133 continue
134 }
135 136 switch ev.Kind() {
137 case EventRangeActive:
138 if nSync > 1 {
139 // If we've seen a full generation, then we can be sure we're not finding out
140 // about something late; we have complete information after that point, and these
141 // active events will just be redundant.
142 break
143 }
144 // This range is active back to the start of the trace. We're failing to account
145 // for this since we just found out about it now. Fix up the mutator utilization.
146 //
147 // N.B. A trace can't start during a STW, so we don't handle it here.
148 r := ev.Range()
149 switch {
150 case handleMarkAssist(r):
151 if states[ev.Goroutine()].Executing() {
152 // This G has been in a mark assist *and running on its P* since the start
153 // of the trace. Handle same as sweep below.
154 if flags&UtilPerProc == 0 {
155 // Subtract out 1/gomaxprocs mutator utilization for all time periods
156 // from the beginning of the trace until now.
157 mi, pi := 0, 0
158 for mi < len(out[0]) {
159 if pi < len(procs)-1 && procs[pi+1].time < out[0][mi].Time {
160 pi++
161 continue
162 }
163 out[0][mi].Util -= float64(1) / float64(procs[pi].n)
164 if out[0][mi].Util < 0 {
165 out[0][mi].Util = 0
166 }
167 mi++
168 }
169 }
170 }
171 case handleSweep(r):
172 // This P has been in sweep in the start of the trace.
173 //
174 // We don't need to do anything if UtilPerProc is set. If we get an event like
175 // this for a running P, it must show up the first time a P is mentioned. Therefore,
176 // this P won't actually have any MutatorUtils on its list yet.
177 //
178 // However, if UtilPerProc isn't set, then we probably have data from other procs
179 // and from previous events. We need to fix that up.
180 if flags&UtilPerProc != 0 {
181 break
182 }
183 // Subtract out 1/gomaxprocs mutator utilization for all time periods
184 // from the beginning of the trace until now.
185 mi, pi := 0, 0
186 for mi < len(out[0]) {
187 if pi < len(procs)-1 && procs[pi+1].time < out[0][mi].Time {
188 pi++
189 continue
190 }
191 out[0][mi].Util -= float64(1) / float64(procs[pi].n)
192 if out[0][mi].Util < 0 {
193 out[0][mi].Util = 0
194 }
195 mi++
196 }
197 }
198 // After accounting for the portion we missed, this just acts like the
199 // beginning of a new range.
200 if handleSTW(r) {
201 stw++
202 } else if handleSweep(r) {
203 ps[ev.Proc()].gc++
204 } else if handleMarkAssist(r) {
205 ps[ev.Proc()].gc++
206 if g := r.Scope.Goroutine(); g != NoGoroutine {
207 inGC[g] = true
208 }
209 }
210 case EventRangeBegin:
211 r := ev.Range()
212 if handleSTW(r) {
213 stw++
214 } else if handleSweep(r) {
215 ps[ev.Proc()].gc++
216 } else if handleMarkAssist(r) {
217 ps[ev.Proc()].gc++
218 if g := r.Scope.Goroutine(); g != NoGoroutine {
219 inGC[g] = true
220 }
221 }
222 case EventRangeEnd:
223 r := ev.Range()
224 if handleSTW(r) {
225 stw--
226 } else if handleSweep(r) {
227 ps[ev.Proc()].gc--
228 } else if handleMarkAssist(r) {
229 ps[ev.Proc()].gc--
230 if g := r.Scope.Goroutine(); g != NoGoroutine {
231 delete(inGC, g)
232 }
233 }
234 case EventStateTransition:
235 st := ev.StateTransition()
236 if st.Resource.Kind != ResourceGoroutine {
237 break
238 }
239 old, new := st.Goroutine()
240 g := st.Resource.Goroutine()
241 if inGC[g] || bgMark[g] {
242 if !old.Executing() && new.Executing() {
243 // Started running while doing GC things.
244 ps[ev.Proc()].gc++
245 } else if old.Executing() && !new.Executing() {
246 // Stopped running while doing GC things.
247 ps[ev.Proc()].gc--
248 }
249 }
250 states[g] = new
251 case EventLabel:
252 l := ev.Label()
253 if flags&UtilBackground != 0 && bytes.HasPrefix(l.Label, "GC ") && l.Label != "GC (idle)" {
254 // Background mark worker.
255 //
256 // If we're in per-proc mode, we don't
257 // count dedicated workers because
258 // they kick all of the goroutines off
259 // that P, so don't directly
260 // contribute to goroutine latency.
261 if !(flags&UtilPerProc != 0 && l.Label == "GC (dedicated)") {
262 bgMark[ev.Goroutine()] = true
263 ps[ev.Proc()].gc++
264 }
265 }
266 }
267 268 if flags&UtilPerProc == 0 {
269 // Compute the current average utilization.
270 if len(ps) == 0 {
271 continue
272 }
273 gcPs := 0
274 if stw > 0 {
275 gcPs = len(ps)
276 } else {
277 for i := range ps {
278 if ps[i].gc > 0 {
279 gcPs++
280 }
281 }
282 }
283 mu := MutatorUtil{int64(ev.Time()), 1 - float64(gcPs)/float64(len(ps))}
284 285 // Record the utilization change. (Since
286 // len(ps) == len(out), we know len(out) > 0.)
287 out[0] = addUtil(out[0], mu)
288 } else {
289 // Check for per-P utilization changes.
290 for i := range ps {
291 p := &ps[i]
292 util := 1.0
293 if stw > 0 || p.gc > 0 {
294 util = 0.0
295 }
296 out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), util})
297 }
298 }
299 }
300 301 // No events in the stream.
302 if lastEv == nil {
303 return nil
304 }
305 306 // Add final 0 utilization event to any remaining series. This
307 // is important to mark the end of the trace. The exact value
308 // shouldn't matter since no window should extend beyond this,
309 // but using 0 is symmetric with the start of the trace.
310 mu := MutatorUtil{int64(lastEv.Time()), 0}
311 for i := range ps {
312 out[ps[i].series] = addUtil(out[ps[i].series], mu)
313 }
314 return out
315 }
316 317 func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
318 if len(util) > 0 {
319 if mu.Util == util[len(util)-1].Util {
320 // No change.
321 return util
322 }
323 if mu.Time == util[len(util)-1].Time {
324 // Take the lowest utilization at a time stamp.
325 if mu.Util < util[len(util)-1].Util {
326 util[len(util)-1] = mu
327 }
328 return util
329 }
330 }
331 return append(util, mu)
332 }
333 334 // totalUtil is total utilization, measured in nanoseconds. This is a
335 // separate type primarily to distinguish it from mean utilization,
336 // which is also a float64.
337 type totalUtil float64
338 339 func totalUtilOf(meanUtil float64, dur int64) totalUtil {
340 return totalUtil(meanUtil * float64(dur))
341 }
342 343 // mean returns the mean utilization over dur.
344 func (u totalUtil) mean(dur time.Duration) float64 {
345 return float64(u) / float64(dur)
346 }
347 348 // An MMUCurve is the minimum mutator utilization curve across
349 // multiple window sizes.
350 type MMUCurve struct {
351 series []mmuSeries
352 }
353 354 type mmuSeries struct {
355 util []MutatorUtil
356 // sums[j] is the cumulative sum of util[:j].
357 sums []totalUtil
358 // bands summarizes util in non-overlapping bands of duration
359 // bandDur.
360 bands []mmuBand
361 // bandDur is the duration of each band.
362 bandDur int64
363 }
364 365 type mmuBand struct {
366 // minUtil is the minimum instantaneous mutator utilization in
367 // this band.
368 minUtil float64
369 // cumUtil is the cumulative total mutator utilization between
370 // time 0 and the left edge of this band.
371 cumUtil totalUtil
372 373 // integrator is the integrator for the left edge of this
374 // band.
375 integrator integrator
376 }
377 378 // NewMMUCurve returns an MMU curve for the given mutator utilization
379 // function.
380 func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
381 series := []mmuSeries{:len(utils)}
382 for i, util := range utils {
383 series[i] = newMMUSeries(util)
384 }
385 return &MMUCurve{series}
386 }
387 388 // bandsPerSeries is the number of bands to divide each series into.
389 // This is only changed by tests.
390 var bandsPerSeries = 1000
391 392 func newMMUSeries(util []MutatorUtil) mmuSeries {
393 // Compute cumulative sum.
394 sums := []totalUtil{:len(util)}
395 var prev MutatorUtil
396 var sum totalUtil
397 for j, u := range util {
398 sum += totalUtilOf(prev.Util, u.Time-prev.Time)
399 sums[j] = sum
400 prev = u
401 }
402 403 // Divide the utilization curve up into equal size
404 // non-overlapping "bands" and compute a summary for each of
405 // these bands.
406 //
407 // Compute the duration of each band.
408 numBands := bandsPerSeries
409 if numBands > len(util) {
410 // There's no point in having lots of bands if there
411 // aren't many events.
412 numBands = len(util)
413 }
414 dur := util[len(util)-1].Time - util[0].Time
415 bandDur := (dur + int64(numBands) - 1) / int64(numBands)
416 if bandDur < 1 {
417 bandDur = 1
418 }
419 // Compute the bands. There are numBands+1 bands in order to
420 // record the final cumulative sum.
421 bands := []mmuBand{:numBands+1}
422 s := mmuSeries{util, sums, bands, bandDur}
423 leftSum := integrator{&s, 0}
424 for i := range bands {
425 startTime, endTime := s.bandTime(i)
426 cumUtil := leftSum.advance(startTime)
427 predIdx := leftSum.pos
428 minUtil := 1.0
429 for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
430 minUtil = math.Min(minUtil, util[i].Util)
431 }
432 bands[i] = mmuBand{minUtil, cumUtil, leftSum}
433 }
434 435 return s
436 }
437 438 func (s *mmuSeries) bandTime(i int) (start, end int64) {
439 start = int64(i)*s.bandDur + s.util[0].Time
440 end = start + s.bandDur
441 return
442 }
443 444 type bandUtil struct {
445 // Utilization series index
446 series int
447 // Band index
448 i int
449 // Lower bound of mutator utilization for all windows
450 // with a left edge in this band.
451 utilBound float64
452 }
453 454 type bandUtilHeap []bandUtil
455 456 func (h bandUtilHeap) Len() int {
457 return len(h)
458 }
459 460 func (h bandUtilHeap) Less(i, j int) bool {
461 return h[i].utilBound < h[j].utilBound
462 }
463 464 func (h bandUtilHeap) Swap(i, j int) {
465 h[i], h[j] = h[j], h[i]
466 }
467 468 func (h *bandUtilHeap) Push(x any) {
469 *h = append(*h, x.(bandUtil))
470 }
471 472 func (h *bandUtilHeap) Pop() any {
473 x := (*h)[len(*h)-1]
474 *h = (*h)[:len(*h)-1]
475 return x
476 }
477 478 // UtilWindow is a specific window at Time.
479 type UtilWindow struct {
480 Time int64
481 // MutatorUtil is the mean mutator utilization in this window.
482 MutatorUtil float64
483 }
484 485 type utilHeap []UtilWindow
486 487 func (h utilHeap) Len() int {
488 return len(h)
489 }
490 491 func (h utilHeap) Less(i, j int) bool {
492 if h[i].MutatorUtil != h[j].MutatorUtil {
493 return h[i].MutatorUtil > h[j].MutatorUtil
494 }
495 return h[i].Time > h[j].Time
496 }
497 498 func (h utilHeap) Swap(i, j int) {
499 h[i], h[j] = h[j], h[i]
500 }
501 502 func (h *utilHeap) Push(x any) {
503 *h = append(*h, x.(UtilWindow))
504 }
505 506 func (h *utilHeap) Pop() any {
507 x := (*h)[len(*h)-1]
508 *h = (*h)[:len(*h)-1]
509 return x
510 }
511 512 // An accumulator takes a windowed mutator utilization function and
513 // tracks various statistics for that function.
514 type accumulator struct {
515 mmu float64
516 517 // bound is the mutator utilization bound where adding any
518 // mutator utilization above this bound cannot affect the
519 // accumulated statistics.
520 bound float64
521 522 // Worst N window tracking
523 nWorst int
524 wHeap utilHeap
525 526 // Mutator utilization distribution tracking
527 mud *mud
528 // preciseMass is the distribution mass that must be precise
529 // before accumulation is stopped.
530 preciseMass float64
531 // lastTime and lastMU are the previous point added to the
532 // windowed mutator utilization function.
533 lastTime int64
534 lastMU float64
535 }
536 537 // resetTime declares a discontinuity in the windowed mutator
538 // utilization function by resetting the current time.
539 func (acc *accumulator) resetTime() {
540 // This only matters for distribution collection, since that's
541 // the only thing that depends on the progression of the
542 // windowed mutator utilization function.
543 acc.lastTime = math.MaxInt64
544 }
545 546 // addMU adds a point to the windowed mutator utilization function at
547 // (time, mu). This must be called for monotonically increasing values
548 // of time.
549 //
550 // It returns true if further calls to addMU would be pointless.
551 func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
552 if mu < acc.mmu {
553 acc.mmu = mu
554 }
555 acc.bound = acc.mmu
556 557 if acc.nWorst == 0 {
558 // If the minimum has reached zero, it can't go any
559 // lower, so we can stop early.
560 return mu == 0
561 }
562 563 // Consider adding this window to the n worst.
564 if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
565 // This window is lower than the K'th worst window.
566 //
567 // Check if there's any overlapping window
568 // already in the heap and keep whichever is
569 // worse.
570 for i, ui := range acc.wHeap {
571 if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
572 if ui.MutatorUtil <= mu {
573 // Keep the first window.
574 goto keep
575 } else {
576 // Replace it with this window.
577 heap.Remove(&acc.wHeap, i)
578 break
579 }
580 }
581 }
582 583 heap.Push(&acc.wHeap, UtilWindow{time, mu})
584 if len(acc.wHeap) > acc.nWorst {
585 heap.Pop(&acc.wHeap)
586 }
587 keep:
588 }
589 590 if len(acc.wHeap) < acc.nWorst {
591 // We don't have N windows yet, so keep accumulating.
592 acc.bound = 1.0
593 } else {
594 // Anything above the least worst window has no effect.
595 acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
596 }
597 598 if acc.mud != nil {
599 if acc.lastTime != math.MaxInt64 {
600 // Update distribution.
601 acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
602 }
603 acc.lastTime, acc.lastMU = time, mu
604 if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
605 acc.bound = math.Max(acc.bound, mudBound)
606 } else {
607 // We haven't accumulated enough total precise
608 // mass yet to even reach our goal, so keep
609 // accumulating.
610 acc.bound = 1
611 }
612 // It's not worth checking percentiles every time, so
613 // just keep accumulating this band.
614 return false
615 }
616 617 // If we've found enough 0 utilizations, we can stop immediately.
618 return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
619 }
620 621 // MMU returns the minimum mutator utilization for the given time
622 // window. This is the minimum utilization for all windows of this
623 // duration across the execution. The returned value is in the range
624 // [0, 1].
625 func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
626 acc := accumulator{mmu: 1.0, bound: 1.0}
627 c.mmu(window, &acc)
628 return acc.mmu
629 }
630 631 // Examples returns n specific examples of the lowest mutator
632 // utilization for the given window size. The returned windows will be
633 // disjoint (otherwise there would be a huge number of
634 // mostly-overlapping windows at the single lowest point). There are
635 // no guarantees on which set of disjoint windows this returns.
636 func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
637 acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
638 c.mmu(window, &acc)
639 sort.Sort(sort.Reverse(acc.wHeap))
640 return ([]UtilWindow)(acc.wHeap)
641 }
642 643 // MUD returns mutator utilization distribution quantiles for the
644 // given window size.
645 //
646 // The mutator utilization distribution is the distribution of mean
647 // mutator utilization across all windows of the given window size in
648 // the trace.
649 //
650 // The minimum mutator utilization is the minimum (0th percentile) of
651 // this distribution. (However, if only the minimum is desired, it's
652 // more efficient to use the MMU method.)
653 func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
654 if len(quantiles) == 0 {
655 return []float64{}
656 }
657 658 // Each unrefined band contributes a known total mass to the
659 // distribution (bandDur except at the end), but in an unknown
660 // way. However, we know that all the mass it contributes must
661 // be at or above its worst-case mean mutator utilization.
662 //
663 // Hence, we refine bands until the highest desired
664 // distribution quantile is less than the next worst-case mean
665 // mutator utilization. At this point, all further
666 // contributions to the distribution must be beyond the
667 // desired quantile and hence cannot affect it.
668 //
669 // First, find the highest desired distribution quantile.
670 maxQ := quantiles[0]
671 for _, q := range quantiles {
672 if q > maxQ {
673 maxQ = q
674 }
675 }
676 // The distribution's mass is in units of time (it's not
677 // normalized because this would make it more annoying to
678 // account for future contributions of unrefined bands). The
679 // total final mass will be the duration of the trace itself
680 // minus the window size. Using this, we can compute the mass
681 // corresponding to quantile maxQ.
682 var duration int64
683 for _, s := range c.series {
684 duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
685 if duration1 >= int64(window) {
686 duration += duration1 - int64(window)
687 }
688 }
689 qMass := float64(duration) * maxQ
690 691 // Accumulate the MUD until we have precise information for
692 // everything to the left of qMass.
693 acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: &mud{}}
694 acc.mud.setTrackMass(qMass)
695 c.mmu(window, &acc)
696 697 // Evaluate the quantiles on the accumulated MUD.
698 out := []float64{:len(quantiles)}
699 for i := range out {
700 mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
701 if math.IsNaN(mu) {
702 // There are a few legitimate ways this can
703 // happen:
704 //
705 // 1. If the window is the full trace
706 // duration, then the windowed MU function is
707 // only defined at a single point, so the MU
708 // distribution is not well-defined.
709 //
710 // 2. If there are no events, then the MU
711 // distribution has no mass.
712 //
713 // Either way, all of the quantiles will have
714 // converged toward the MMU at this point.
715 mu = acc.mmu
716 }
717 out[i] = mu
718 }
719 return out
720 }
721 722 func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
723 if window <= 0 {
724 acc.mmu = 0
725 return
726 }
727 728 var bandU bandUtilHeap
729 windows := []time.Duration{:len(c.series)}
730 for i, s := range c.series {
731 windows[i] = window
732 if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
733 windows[i] = max
734 }
735 736 bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
737 if bandU == nil {
738 bandU = bandU1
739 } else {
740 bandU = append(bandU, bandU1...)
741 }
742 }
743 744 // Process bands from lowest utilization bound to highest.
745 heap.Init(&bandU)
746 747 // Refine each band into a precise window and MMU until
748 // refining the next lowest band can no longer affect the MMU
749 // or windows.
750 for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
751 i := bandU[0].series
752 c.series[i].bandMMU(bandU[0].i, windows[i], acc)
753 heap.Pop(&bandU)
754 }
755 }
756 757 func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
758 // For each band, compute the worst-possible total mutator
759 // utilization for all windows that start in that band.
760 761 // minBands is the minimum number of bands a window can span
762 // and maxBands is the maximum number of bands a window can
763 // span in any alignment.
764 minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
765 maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
766 if window > 1 && maxBands < 2 {
767 panic("maxBands < 2")
768 }
769 tailDur := int64(window) % c.bandDur
770 nUtil := len(c.bands) - maxBands + 1
771 if nUtil < 0 {
772 nUtil = 0
773 }
774 bandU := []bandUtil{:nUtil}
775 for i := range bandU {
776 // To compute the worst-case MU, we assume the minimum
777 // for any bands that are only partially overlapped by
778 // some window and the mean for any bands that are
779 // completely covered by all windows.
780 var util totalUtil
781 782 // Find the lowest and second lowest of the partial
783 // bands.
784 l := c.bands[i].minUtil
785 r1 := c.bands[i+minBands-1].minUtil
786 r2 := c.bands[i+maxBands-1].minUtil
787 minBand := math.Min(l, math.Min(r1, r2))
788 // Assume the worst window maximally overlaps the
789 // worst minimum and then the rest overlaps the second
790 // worst minimum.
791 if minBands == 1 {
792 util += totalUtilOf(minBand, int64(window))
793 } else {
794 util += totalUtilOf(minBand, c.bandDur)
795 midBand := 0.0
796 switch {
797 case minBand == l:
798 midBand = math.Min(r1, r2)
799 case minBand == r1:
800 midBand = math.Min(l, r2)
801 case minBand == r2:
802 midBand = math.Min(l, r1)
803 }
804 util += totalUtilOf(midBand, tailDur)
805 }
806 807 // Add the total mean MU of bands that are completely
808 // overlapped by all windows.
809 if minBands > 2 {
810 util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
811 }
812 813 bandU[i] = bandUtil{series, i, util.mean(window)}
814 }
815 816 return bandU
817 }
818 819 // bandMMU computes the precise minimum mutator utilization for
820 // windows with a left edge in band bandIdx.
821 func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
822 util := c.util
823 824 // We think of the mutator utilization over time as the
825 // box-filtered utilization function, which we call the
826 // "windowed mutator utilization function". The resulting
827 // function is continuous and piecewise linear (unless
828 // window==0, which we handle elsewhere), where the boundaries
829 // between segments occur when either edge of the window
830 // encounters a change in the instantaneous mutator
831 // utilization function. Hence, the minimum of this function
832 // will always occur when one of the edges of the window
833 // aligns with a utilization change, so these are the only
834 // points we need to consider.
835 //
836 // We compute the mutator utilization function incrementally
837 // by tracking the integral from t=0 to the left edge of the
838 // window and to the right edge of the window.
839 left := c.bands[bandIdx].integrator
840 right := left
841 time, endTime := c.bandTime(bandIdx)
842 if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
843 endTime = utilEnd
844 }
845 acc.resetTime()
846 for {
847 // Advance edges to time and time+window.
848 mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
849 if acc.addMU(time, mu, window) {
850 break
851 }
852 if time == endTime {
853 break
854 }
855 856 // The maximum slope of the windowed mutator
857 // utilization function is 1/window, so we can always
858 // advance the time by at least (mu - mmu) * window
859 // without dropping below mmu.
860 minTime := time + int64((mu-acc.bound)*float64(window))
861 862 // Advance the window to the next time where either
863 // the left or right edge of the window encounters a
864 // change in the utilization curve.
865 if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
866 time = t1
867 } else {
868 time = t2
869 }
870 if time < minTime {
871 time = minTime
872 }
873 if time >= endTime {
874 // For MMUs we could stop here, but for MUDs
875 // it's important that we span the entire
876 // band.
877 time = endTime
878 }
879 }
880 }
881 882 // An integrator tracks a position in a utilization function and
883 // integrates it.
884 type integrator struct {
885 u *mmuSeries
886 // pos is the index in u.util of the current time's non-strict
887 // predecessor.
888 pos int
889 }
890 891 // advance returns the integral of the utilization function from 0 to
892 // time. advance must be called on monotonically increasing values of
893 // times.
894 func (in *integrator) advance(time int64) totalUtil {
895 util, pos := in.u.util, in.pos
896 // Advance pos until pos+1 is time's strict successor (making
897 // pos time's non-strict predecessor).
898 //
899 // Very often, this will be nearby, so we optimize that case,
900 // but it may be arbitrarily far away, so we handled that
901 // efficiently, too.
902 const maxSeq = 8
903 if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
904 // Nearby. Use a linear scan.
905 for pos+1 < len(util) && util[pos+1].Time <= time {
906 pos++
907 }
908 } else {
909 // Far. Binary search for time's strict successor.
910 l, r := pos, len(util)
911 for l < r {
912 h := int(uint(l+r) >> 1)
913 if util[h].Time <= time {
914 l = h + 1
915 } else {
916 r = h
917 }
918 }
919 pos = l - 1 // Non-strict predecessor.
920 }
921 in.pos = pos
922 var partial totalUtil
923 if time != util[pos].Time {
924 partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
925 }
926 return in.u.sums[pos] + partial
927 }
928 929 // next returns the smallest time t' > time of a change in the
930 // utilization function.
931 func (in *integrator) next(time int64) int64 {
932 for _, u := range in.u.util[in.pos:] {
933 if u.Time > time {
934 return u.Time
935 }
936 }
937 return 1<<63 - 1
938 }
939 940 func isGCSTW(r Range) bool {
941 return bytes.HasPrefix(r.Name, "stop-the-world") && bytes.Contains(r.Name, "GC")
942 }
943 944 func isGCMarkAssist(r Range) bool {
945 return r.Name == "GC mark assist"
946 }
947 948 func isGCSweep(r Range) bool {
949 return r.Name == "GC incremental sweep"
950 }
951