mud.mx raw

   1  // Copyright 2017 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package trace
   6  
   7  import (
   8  	"cmp"
   9  	"math"
  10  	"slices"
  11  )
  12  
  13  // mud is an updatable mutator utilization distribution.
  14  //
  15  // This is a continuous distribution of duration over mutator
  16  // utilization. For example, the integral from mutator utilization a
  17  // to b is the total duration during which the mutator utilization was
  18  // in the range [a, b].
  19  //
  20  // This distribution is *not* normalized (it is not a probability
  21  // distribution). This makes it easier to work with as it's being
  22  // updated.
  23  //
  24  // It is represented as the sum of scaled uniform distribution
  25  // functions and Dirac delta functions (which are treated as
  26  // degenerate uniform distributions).
  27  type mud struct {
  28  	sorted, unsorted []edge
  29  
  30  	// trackMass is the inverse cumulative sum to track as the
  31  	// distribution is updated.
  32  	trackMass float64
  33  	// trackBucket is the bucket in which trackMass falls. If the
  34  	// total mass of the distribution is < trackMass, this is
  35  	// len(hist).
  36  	trackBucket int
  37  	// trackSum is the cumulative sum of hist[:trackBucket]. Once
  38  	// trackSum >= trackMass, trackBucket must be recomputed.
  39  	trackSum float64
  40  
  41  	// hist is a hierarchical histogram of distribution mass.
  42  	hist [mudDegree]float64
  43  }
  44  
  45  const (
  46  	// mudDegree is the number of buckets in the MUD summary
  47  	// histogram.
  48  	mudDegree = 1024
  49  )
  50  
  51  type edge struct {
  52  	// At x, the function increases by y.
  53  	x, delta float64
  54  	// Additionally at x is a Dirac delta function with area dirac.
  55  	dirac float64
  56  }
  57  
  58  // add adds a uniform function over [l, r] scaled so the total weight
  59  // of the uniform is area. If l==r, this adds a Dirac delta function.
  60  func (d *mud) add(l, r, area float64) {
  61  	if area == 0 {
  62  		return
  63  	}
  64  
  65  	if r < l {
  66  		l, r = r, l
  67  	}
  68  
  69  	// Add the edges.
  70  	if l == r {
  71  		d.unsorted = append(d.unsorted, edge{l, 0, area})
  72  	} else {
  73  		delta := area / (r - l)
  74  		d.unsorted = append(d.unsorted, edge{l, delta, 0}, edge{r, -delta, 0})
  75  	}
  76  
  77  	// Update the histogram.
  78  	h := &d.hist
  79  	lbFloat, lf := math.Modf(l * mudDegree)
  80  	lb := int(lbFloat)
  81  	if lb >= mudDegree {
  82  		lb, lf = mudDegree-1, 1
  83  	}
  84  	if l == r {
  85  		h[lb] += area
  86  	} else {
  87  		rbFloat, rf := math.Modf(r * mudDegree)
  88  		rb := int(rbFloat)
  89  		if rb >= mudDegree {
  90  			rb, rf = mudDegree-1, 1
  91  		}
  92  		if lb == rb {
  93  			h[lb] += area
  94  		} else {
  95  			perBucket := area / (r - l) / mudDegree
  96  			h[lb] += perBucket * (1 - lf)
  97  			h[rb] += perBucket * rf
  98  			for i := lb + 1; i < rb; i++ {
  99  				h[i] += perBucket
 100  			}
 101  		}
 102  	}
 103  
 104  	// Update mass tracking.
 105  	if thresh := float64(d.trackBucket) / mudDegree; l < thresh {
 106  		if r < thresh {
 107  			d.trackSum += area
 108  		} else {
 109  			d.trackSum += area * (thresh - l) / (r - l)
 110  		}
 111  		if d.trackSum >= d.trackMass {
 112  			// The tracked mass now falls in a different
 113  			// bucket. Recompute the inverse cumulative sum.
 114  			d.setTrackMass(d.trackMass)
 115  		}
 116  	}
 117  }
 118  
 119  // setTrackMass sets the mass to track the inverse cumulative sum for.
 120  //
 121  // Specifically, mass is a cumulative duration, and the mutator
 122  // utilization bounds for this duration can be queried using
 123  // approxInvCumulativeSum.
 124  func (d *mud) setTrackMass(mass float64) {
 125  	d.trackMass = mass
 126  
 127  	// Find the bucket currently containing trackMass by computing
 128  	// the cumulative sum.
 129  	sum := 0.0
 130  	for i, val := range d.hist[:] {
 131  		newSum := sum + val
 132  		if newSum > mass {
 133  			// mass falls in bucket i.
 134  			d.trackBucket = i
 135  			d.trackSum = sum
 136  			return
 137  		}
 138  		sum = newSum
 139  	}
 140  	d.trackBucket = len(d.hist)
 141  	d.trackSum = sum
 142  }
 143  
 144  // approxInvCumulativeSum is like invCumulativeSum, but specifically
 145  // operates on the tracked mass and returns an upper and lower bound
 146  // approximation of the inverse cumulative sum.
 147  //
 148  // The true inverse cumulative sum will be in the range [lower, upper).
 149  func (d *mud) approxInvCumulativeSum() (float64, float64, bool) {
 150  	if d.trackBucket == len(d.hist) {
 151  		return math.NaN(), math.NaN(), false
 152  	}
 153  	return float64(d.trackBucket) / mudDegree, float64(d.trackBucket+1) / mudDegree, true
 154  }
 155  
 156  // invCumulativeSum returns x such that the integral of d from -∞ to x
 157  // is y. If the total weight of d is less than y, it returns the
 158  // maximum of the distribution and false.
 159  //
 160  // Specifically, y is a cumulative duration, and invCumulativeSum
 161  // returns the mutator utilization x such that at least y time has
 162  // been spent with mutator utilization <= x.
 163  func (d *mud) invCumulativeSum(y float64) (float64, bool) {
 164  	if len(d.sorted) == 0 && len(d.unsorted) == 0 {
 165  		return math.NaN(), false
 166  	}
 167  
 168  	// Sort edges.
 169  	edges := d.unsorted
 170  	slices.SortFunc(edges, func(a, b edge) int {
 171  		return cmp.Compare(a.x, b.x)
 172  	})
 173  	// Merge with sorted edges.
 174  	d.unsorted = nil
 175  	if d.sorted == nil {
 176  		d.sorted = edges
 177  	} else {
 178  		oldSorted := d.sorted
 179  		newSorted := []edge{:len(oldSorted)+len(edges)}
 180  		i, j := 0, 0
 181  		for o := range newSorted {
 182  			if i >= len(oldSorted) {
 183  				copy(newSorted[o:], edges[j:])
 184  				break
 185  			} else if j >= len(edges) {
 186  				copy(newSorted[o:], oldSorted[i:])
 187  				break
 188  			} else if oldSorted[i].x < edges[j].x {
 189  				newSorted[o] = oldSorted[i]
 190  				i++
 191  			} else {
 192  				newSorted[o] = edges[j]
 193  				j++
 194  			}
 195  		}
 196  		d.sorted = newSorted
 197  	}
 198  
 199  	// Traverse edges in order computing a cumulative sum.
 200  	csum, rate, prevX := 0.0, 0.0, 0.0
 201  	for _, e := range d.sorted {
 202  		newCsum := csum + (e.x-prevX)*rate
 203  		if newCsum >= y {
 204  			// y was exceeded between the previous edge
 205  			// and this one.
 206  			if rate == 0 {
 207  				// Anywhere between prevX and
 208  				// e.x will do. We return e.x
 209  				// because that takes care of
 210  				// the y==0 case naturally.
 211  				return e.x, true
 212  			}
 213  			return (y-csum)/rate + prevX, true
 214  		}
 215  		newCsum += e.dirac
 216  		if newCsum >= y {
 217  			// y was exceeded by the Dirac delta at e.x.
 218  			return e.x, true
 219  		}
 220  		csum, prevX = newCsum, e.x
 221  		rate += e.delta
 222  	}
 223  	return prevX, false
 224  }
 225