histogram.go raw

   1  /*
   2   * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
   3   * SPDX-License-Identifier: Apache-2.0
   4   */
   5  
   6  package badger
   7  
   8  import (
   9  	"fmt"
  10  	"math"
  11  )
  12  
  13  // PrintHistogram builds and displays the key-value size histogram.
  14  // When keyPrefix is set, only the keys that have prefix "keyPrefix" are
  15  // considered for creating the histogram
  16  func (db *DB) PrintHistogram(keyPrefix []byte) {
  17  	if db == nil {
  18  		fmt.Println("\nCannot build histogram: DB is nil.")
  19  		return
  20  	}
  21  	histogram := db.buildHistogram(keyPrefix)
  22  	fmt.Printf("Histogram of key sizes (in bytes)\n")
  23  	histogram.keySizeHistogram.printHistogram()
  24  	fmt.Printf("Histogram of value sizes (in bytes)\n")
  25  	histogram.valueSizeHistogram.printHistogram()
  26  }
  27  
  28  // histogramData stores information about a histogram
  29  type histogramData struct {
  30  	bins        []int64
  31  	countPerBin []int64
  32  	totalCount  int64
  33  	min         int64
  34  	max         int64
  35  	sum         int64
  36  }
  37  
  38  // sizeHistogram contains keySize histogram and valueSize histogram
  39  type sizeHistogram struct {
  40  	keySizeHistogram, valueSizeHistogram histogramData
  41  }
  42  
  43  // newSizeHistogram returns a new instance of keyValueSizeHistogram with
  44  // properly initialized fields.
  45  func newSizeHistogram() *sizeHistogram {
  46  	// TODO(ibrahim): find appropriate bin size.
  47  	keyBins := createHistogramBins(1, 16)
  48  	valueBins := createHistogramBins(1, 30)
  49  	return &sizeHistogram{
  50  		keySizeHistogram: histogramData{
  51  			bins:        keyBins,
  52  			countPerBin: make([]int64, len(keyBins)+1),
  53  			max:         math.MinInt64,
  54  			min:         math.MaxInt64,
  55  			sum:         0,
  56  		},
  57  		valueSizeHistogram: histogramData{
  58  			bins:        valueBins,
  59  			countPerBin: make([]int64, len(valueBins)+1),
  60  			max:         math.MinInt64,
  61  			min:         math.MaxInt64,
  62  			sum:         0,
  63  		},
  64  	}
  65  }
  66  
  67  // createHistogramBins creates bins for an histogram. The bin sizes are powers
  68  // of two of the form [2^min_exponent, ..., 2^max_exponent].
  69  func createHistogramBins(minExponent, maxExponent uint32) []int64 {
  70  	var bins []int64
  71  	for i := minExponent; i <= maxExponent; i++ {
  72  		bins = append(bins, int64(1)<<i)
  73  	}
  74  	return bins
  75  }
  76  
  77  // Update the min and max fields if value is less than or greater than the
  78  // current min/max value.
  79  func (histogram *histogramData) Update(value int64) {
  80  	if value > histogram.max {
  81  		histogram.max = value
  82  	}
  83  	if value < histogram.min {
  84  		histogram.min = value
  85  	}
  86  
  87  	histogram.sum += value
  88  	histogram.totalCount++
  89  
  90  	for index := 0; index <= len(histogram.bins); index++ {
  91  		// Allocate value in the last buckets if we reached the end of the Bounds array.
  92  		if index == len(histogram.bins) {
  93  			histogram.countPerBin[index]++
  94  			break
  95  		}
  96  
  97  		// Check if the value should be added to the "index" bin
  98  		if value < histogram.bins[index] {
  99  			histogram.countPerBin[index]++
 100  			break
 101  		}
 102  	}
 103  }
 104  
 105  // buildHistogram builds the key-value size histogram.
 106  // When keyPrefix is set, only the keys that have prefix "keyPrefix" are
 107  // considered for creating the histogram
 108  func (db *DB) buildHistogram(keyPrefix []byte) *sizeHistogram {
 109  	txn := db.NewTransaction(false)
 110  	defer txn.Discard()
 111  
 112  	itr := txn.NewIterator(DefaultIteratorOptions)
 113  	defer itr.Close()
 114  
 115  	badgerHistogram := newSizeHistogram()
 116  
 117  	// Collect key and value sizes.
 118  	for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() {
 119  		item := itr.Item()
 120  		badgerHistogram.keySizeHistogram.Update(item.KeySize())
 121  		badgerHistogram.valueSizeHistogram.Update(item.ValueSize())
 122  	}
 123  	return badgerHistogram
 124  }
 125  
 126  // printHistogram prints the histogram data in a human-readable format.
 127  func (histogram histogramData) printHistogram() {
 128  	fmt.Printf("Total count: %d\n", histogram.totalCount)
 129  	fmt.Printf("Min value: %d\n", histogram.min)
 130  	fmt.Printf("Max value: %d\n", histogram.max)
 131  	fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount))
 132  	fmt.Printf("%24s %9s\n", "Range", "Count")
 133  
 134  	numBins := len(histogram.bins)
 135  	for index, count := range histogram.countPerBin {
 136  		if count == 0 {
 137  			continue
 138  		}
 139  
 140  		// The last bin represents the bin that contains the range from
 141  		// the last bin up to infinity so it's processed differently than the
 142  		// other bins.
 143  		if index == len(histogram.countPerBin)-1 {
 144  			lowerBound := int(histogram.bins[numBins-1])
 145  			fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count)
 146  			continue
 147  		}
 148  
 149  		upperBound := int(histogram.bins[index])
 150  		lowerBound := 0
 151  		if index > 0 {
 152  			lowerBound = int(histogram.bins[index-1])
 153  		}
 154  
 155  		fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count)
 156  	}
 157  	fmt.Println()
 158  }
 159