histogram.go raw

   1  // Copyright 2015 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package trace
   6  
   7  // This file implements histogramming for RPC statistics collection.
   8  
   9  import (
  10  	"bytes"
  11  	"fmt"
  12  	"html/template"
  13  	"log"
  14  	"math"
  15  	"sync"
  16  
  17  	"golang.org/x/net/internal/timeseries"
  18  )
  19  
  20  const (
  21  	bucketCount = 38
  22  )
  23  
  24  // histogram keeps counts of values in buckets that are spaced
  25  // out in powers of 2: 0-1, 2-3, 4-7...
  26  // histogram implements timeseries.Observable
  27  type histogram struct {
  28  	sum          int64   // running total of measurements
  29  	sumOfSquares float64 // square of running total
  30  	buckets      []int64 // bucketed values for histogram
  31  	value        int     // holds a single value as an optimization
  32  	valueCount   int64   // number of values recorded for single value
  33  }
  34  
  35  // addMeasurement records a value measurement observation to the histogram.
  36  func (h *histogram) addMeasurement(value int64) {
  37  	// TODO: assert invariant
  38  	h.sum += value
  39  	h.sumOfSquares += float64(value) * float64(value)
  40  
  41  	bucketIndex := getBucket(value)
  42  
  43  	if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) {
  44  		h.value = bucketIndex
  45  		h.valueCount++
  46  	} else {
  47  		h.allocateBuckets()
  48  		h.buckets[bucketIndex]++
  49  	}
  50  }
  51  
  52  func (h *histogram) allocateBuckets() {
  53  	if h.buckets == nil {
  54  		h.buckets = make([]int64, bucketCount)
  55  		h.buckets[h.value] = h.valueCount
  56  		h.value = 0
  57  		h.valueCount = -1
  58  	}
  59  }
  60  
  61  func log2(i int64) int {
  62  	n := 0
  63  	for ; i >= 0x100; i >>= 8 {
  64  		n += 8
  65  	}
  66  	for ; i > 0; i >>= 1 {
  67  		n += 1
  68  	}
  69  	return n
  70  }
  71  
  72  func getBucket(i int64) (index int) {
  73  	index = log2(i) - 1
  74  	if index < 0 {
  75  		index = 0
  76  	}
  77  	if index >= bucketCount {
  78  		index = bucketCount - 1
  79  	}
  80  	return
  81  }
  82  
  83  // Total returns the number of recorded observations.
  84  func (h *histogram) total() (total int64) {
  85  	if h.valueCount >= 0 {
  86  		total = h.valueCount
  87  	}
  88  	for _, val := range h.buckets {
  89  		total += int64(val)
  90  	}
  91  	return
  92  }
  93  
  94  // Average returns the average value of recorded observations.
  95  func (h *histogram) average() float64 {
  96  	t := h.total()
  97  	if t == 0 {
  98  		return 0
  99  	}
 100  	return float64(h.sum) / float64(t)
 101  }
 102  
 103  // Variance returns the variance of recorded observations.
 104  func (h *histogram) variance() float64 {
 105  	t := float64(h.total())
 106  	if t == 0 {
 107  		return 0
 108  	}
 109  	s := float64(h.sum) / t
 110  	return h.sumOfSquares/t - s*s
 111  }
 112  
 113  // StandardDeviation returns the standard deviation of recorded observations.
 114  func (h *histogram) standardDeviation() float64 {
 115  	return math.Sqrt(h.variance())
 116  }
 117  
 118  // PercentileBoundary estimates the value that the given fraction of recorded
 119  // observations are less than.
 120  func (h *histogram) percentileBoundary(percentile float64) int64 {
 121  	total := h.total()
 122  
 123  	// Corner cases (make sure result is strictly less than Total())
 124  	if total == 0 {
 125  		return 0
 126  	} else if total == 1 {
 127  		return int64(h.average())
 128  	}
 129  
 130  	percentOfTotal := round(float64(total) * percentile)
 131  	var runningTotal int64
 132  
 133  	for i := range h.buckets {
 134  		value := h.buckets[i]
 135  		runningTotal += value
 136  		if runningTotal == percentOfTotal {
 137  			// We hit an exact bucket boundary. If the next bucket has data, it is a
 138  			// good estimate of the value. If the bucket is empty, we interpolate the
 139  			// midpoint between the next bucket's boundary and the next non-zero
 140  			// bucket. If the remaining buckets are all empty, then we use the
 141  			// boundary for the next bucket as the estimate.
 142  			j := uint8(i + 1)
 143  			min := bucketBoundary(j)
 144  			if runningTotal < total {
 145  				for h.buckets[j] == 0 {
 146  					j++
 147  				}
 148  			}
 149  			max := bucketBoundary(j)
 150  			return min + round(float64(max-min)/2)
 151  		} else if runningTotal > percentOfTotal {
 152  			// The value is in this bucket. Interpolate the value.
 153  			delta := runningTotal - percentOfTotal
 154  			percentBucket := float64(value-delta) / float64(value)
 155  			bucketMin := bucketBoundary(uint8(i))
 156  			nextBucketMin := bucketBoundary(uint8(i + 1))
 157  			bucketSize := nextBucketMin - bucketMin
 158  			return bucketMin + round(percentBucket*float64(bucketSize))
 159  		}
 160  	}
 161  	return bucketBoundary(bucketCount - 1)
 162  }
 163  
 164  // Median returns the estimated median of the observed values.
 165  func (h *histogram) median() int64 {
 166  	return h.percentileBoundary(0.5)
 167  }
 168  
 169  // Add adds other to h.
 170  func (h *histogram) Add(other timeseries.Observable) {
 171  	o := other.(*histogram)
 172  	if o.valueCount == 0 {
 173  		// Other histogram is empty
 174  	} else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value {
 175  		// Both have a single bucketed value, aggregate them
 176  		h.valueCount += o.valueCount
 177  	} else {
 178  		// Two different values necessitate buckets in this histogram
 179  		h.allocateBuckets()
 180  		if o.valueCount >= 0 {
 181  			h.buckets[o.value] += o.valueCount
 182  		} else {
 183  			for i := range h.buckets {
 184  				h.buckets[i] += o.buckets[i]
 185  			}
 186  		}
 187  	}
 188  	h.sumOfSquares += o.sumOfSquares
 189  	h.sum += o.sum
 190  }
 191  
 192  // Clear resets the histogram to an empty state, removing all observed values.
 193  func (h *histogram) Clear() {
 194  	h.buckets = nil
 195  	h.value = 0
 196  	h.valueCount = 0
 197  	h.sum = 0
 198  	h.sumOfSquares = 0
 199  }
 200  
 201  // CopyFrom copies from other, which must be a *histogram, into h.
 202  func (h *histogram) CopyFrom(other timeseries.Observable) {
 203  	o := other.(*histogram)
 204  	if o.valueCount == -1 {
 205  		h.allocateBuckets()
 206  		copy(h.buckets, o.buckets)
 207  	}
 208  	h.sum = o.sum
 209  	h.sumOfSquares = o.sumOfSquares
 210  	h.value = o.value
 211  	h.valueCount = o.valueCount
 212  }
 213  
 214  // Multiply scales the histogram by the specified ratio.
 215  func (h *histogram) Multiply(ratio float64) {
 216  	if h.valueCount == -1 {
 217  		for i := range h.buckets {
 218  			h.buckets[i] = int64(float64(h.buckets[i]) * ratio)
 219  		}
 220  	} else {
 221  		h.valueCount = int64(float64(h.valueCount) * ratio)
 222  	}
 223  	h.sum = int64(float64(h.sum) * ratio)
 224  	h.sumOfSquares = h.sumOfSquares * ratio
 225  }
 226  
 227  // New creates a new histogram.
 228  func (h *histogram) New() timeseries.Observable {
 229  	r := new(histogram)
 230  	r.Clear()
 231  	return r
 232  }
 233  
 234  func (h *histogram) String() string {
 235  	return fmt.Sprintf("%d, %f, %d, %d, %v",
 236  		h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets)
 237  }
 238  
 239  // round returns the closest int64 to the argument
 240  func round(in float64) int64 {
 241  	return int64(math.Floor(in + 0.5))
 242  }
 243  
 244  // bucketBoundary returns the first value in the bucket.
 245  func bucketBoundary(bucket uint8) int64 {
 246  	if bucket == 0 {
 247  		return 0
 248  	}
 249  	return 1 << bucket
 250  }
 251  
 252  // bucketData holds data about a specific bucket for use in distTmpl.
 253  type bucketData struct {
 254  	Lower, Upper       int64
 255  	N                  int64
 256  	Pct, CumulativePct float64
 257  	GraphWidth         int
 258  }
 259  
 260  // data holds data about a Distribution for use in distTmpl.
 261  type data struct {
 262  	Buckets                 []*bucketData
 263  	Count, Median           int64
 264  	Mean, StandardDeviation float64
 265  }
 266  
 267  // maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets.
 268  const maxHTMLBarWidth = 350.0
 269  
 270  // newData returns data representing h for use in distTmpl.
 271  func (h *histogram) newData() *data {
 272  	// Force the allocation of buckets to simplify the rendering implementation
 273  	h.allocateBuckets()
 274  	// We scale the bars on the right so that the largest bar is
 275  	// maxHTMLBarWidth pixels in width.
 276  	maxBucket := int64(0)
 277  	for _, n := range h.buckets {
 278  		if n > maxBucket {
 279  			maxBucket = n
 280  		}
 281  	}
 282  	total := h.total()
 283  	barsizeMult := maxHTMLBarWidth / float64(maxBucket)
 284  	var pctMult float64
 285  	if total == 0 {
 286  		pctMult = 1.0
 287  	} else {
 288  		pctMult = 100.0 / float64(total)
 289  	}
 290  
 291  	buckets := make([]*bucketData, len(h.buckets))
 292  	runningTotal := int64(0)
 293  	for i, n := range h.buckets {
 294  		if n == 0 {
 295  			continue
 296  		}
 297  		runningTotal += n
 298  		var upperBound int64
 299  		if i < bucketCount-1 {
 300  			upperBound = bucketBoundary(uint8(i + 1))
 301  		} else {
 302  			upperBound = math.MaxInt64
 303  		}
 304  		buckets[i] = &bucketData{
 305  			Lower:         bucketBoundary(uint8(i)),
 306  			Upper:         upperBound,
 307  			N:             n,
 308  			Pct:           float64(n) * pctMult,
 309  			CumulativePct: float64(runningTotal) * pctMult,
 310  			GraphWidth:    int(float64(n) * barsizeMult),
 311  		}
 312  	}
 313  	return &data{
 314  		Buckets:           buckets,
 315  		Count:             total,
 316  		Median:            h.median(),
 317  		Mean:              h.average(),
 318  		StandardDeviation: h.standardDeviation(),
 319  	}
 320  }
 321  
 322  func (h *histogram) html() template.HTML {
 323  	buf := new(bytes.Buffer)
 324  	if err := distTmpl().Execute(buf, h.newData()); err != nil {
 325  		buf.Reset()
 326  		log.Printf("net/trace: couldn't execute template: %v", err)
 327  	}
 328  	return template.HTML(buf.String())
 329  }
 330  
 331  var distTmplCache *template.Template
 332  var distTmplOnce sync.Once
 333  
 334  func distTmpl() *template.Template {
 335  	distTmplOnce.Do(func() {
 336  		// Input: data
 337  		distTmplCache = template.Must(template.New("distTmpl").Parse(`
 338  <table>
 339  <tr>
 340      <td style="padding:0.25em">Count: {{.Count}}</td>
 341      <td style="padding:0.25em">Mean: {{printf "%.0f" .Mean}}</td>
 342      <td style="padding:0.25em">StdDev: {{printf "%.0f" .StandardDeviation}}</td>
 343      <td style="padding:0.25em">Median: {{.Median}}</td>
 344  </tr>
 345  </table>
 346  <hr>
 347  <table>
 348  {{range $b := .Buckets}}
 349  {{if $b}}
 350    <tr>
 351      <td style="padding:0 0 0 0.25em">[</td>
 352      <td style="text-align:right;padding:0 0.25em">{{.Lower}},</td>
 353      <td style="text-align:right;padding:0 0.25em">{{.Upper}})</td>
 354      <td style="text-align:right;padding:0 0.25em">{{.N}}</td>
 355      <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .Pct}}%</td>
 356      <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .CumulativePct}}%</td>
 357      <td><div style="background-color: blue; height: 1em; width: {{.GraphWidth}};"></div></td>
 358    </tr>
 359  {{end}}
 360  {{end}}
 361  </table>
 362  `))
 363  	})
 364  	return distTmplCache
 365  }
 366