histogram.go raw
1 /*
2 * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6 package badger
7
8 import (
9 "fmt"
10 "math"
11 )
12
13 // PrintHistogram builds and displays the key-value size histogram.
14 // When keyPrefix is set, only the keys that have prefix "keyPrefix" are
15 // considered for creating the histogram
16 func (db *DB) PrintHistogram(keyPrefix []byte) {
17 if db == nil {
18 fmt.Println("\nCannot build histogram: DB is nil.")
19 return
20 }
21 histogram := db.buildHistogram(keyPrefix)
22 fmt.Printf("Histogram of key sizes (in bytes)\n")
23 histogram.keySizeHistogram.printHistogram()
24 fmt.Printf("Histogram of value sizes (in bytes)\n")
25 histogram.valueSizeHistogram.printHistogram()
26 }
27
28 // histogramData stores information about a histogram
29 type histogramData struct {
30 bins []int64
31 countPerBin []int64
32 totalCount int64
33 min int64
34 max int64
35 sum int64
36 }
37
38 // sizeHistogram contains keySize histogram and valueSize histogram
39 type sizeHistogram struct {
40 keySizeHistogram, valueSizeHistogram histogramData
41 }
42
43 // newSizeHistogram returns a new instance of keyValueSizeHistogram with
44 // properly initialized fields.
45 func newSizeHistogram() *sizeHistogram {
46 // TODO(ibrahim): find appropriate bin size.
47 keyBins := createHistogramBins(1, 16)
48 valueBins := createHistogramBins(1, 30)
49 return &sizeHistogram{
50 keySizeHistogram: histogramData{
51 bins: keyBins,
52 countPerBin: make([]int64, len(keyBins)+1),
53 max: math.MinInt64,
54 min: math.MaxInt64,
55 sum: 0,
56 },
57 valueSizeHistogram: histogramData{
58 bins: valueBins,
59 countPerBin: make([]int64, len(valueBins)+1),
60 max: math.MinInt64,
61 min: math.MaxInt64,
62 sum: 0,
63 },
64 }
65 }
66
67 // createHistogramBins creates bins for an histogram. The bin sizes are powers
68 // of two of the form [2^min_exponent, ..., 2^max_exponent].
69 func createHistogramBins(minExponent, maxExponent uint32) []int64 {
70 var bins []int64
71 for i := minExponent; i <= maxExponent; i++ {
72 bins = append(bins, int64(1)<<i)
73 }
74 return bins
75 }
76
77 // Update the min and max fields if value is less than or greater than the
78 // current min/max value.
79 func (histogram *histogramData) Update(value int64) {
80 if value > histogram.max {
81 histogram.max = value
82 }
83 if value < histogram.min {
84 histogram.min = value
85 }
86
87 histogram.sum += value
88 histogram.totalCount++
89
90 for index := 0; index <= len(histogram.bins); index++ {
91 // Allocate value in the last buckets if we reached the end of the Bounds array.
92 if index == len(histogram.bins) {
93 histogram.countPerBin[index]++
94 break
95 }
96
97 // Check if the value should be added to the "index" bin
98 if value < histogram.bins[index] {
99 histogram.countPerBin[index]++
100 break
101 }
102 }
103 }
104
105 // buildHistogram builds the key-value size histogram.
106 // When keyPrefix is set, only the keys that have prefix "keyPrefix" are
107 // considered for creating the histogram
108 func (db *DB) buildHistogram(keyPrefix []byte) *sizeHistogram {
109 txn := db.NewTransaction(false)
110 defer txn.Discard()
111
112 itr := txn.NewIterator(DefaultIteratorOptions)
113 defer itr.Close()
114
115 badgerHistogram := newSizeHistogram()
116
117 // Collect key and value sizes.
118 for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() {
119 item := itr.Item()
120 badgerHistogram.keySizeHistogram.Update(item.KeySize())
121 badgerHistogram.valueSizeHistogram.Update(item.ValueSize())
122 }
123 return badgerHistogram
124 }
125
126 // printHistogram prints the histogram data in a human-readable format.
127 func (histogram histogramData) printHistogram() {
128 fmt.Printf("Total count: %d\n", histogram.totalCount)
129 fmt.Printf("Min value: %d\n", histogram.min)
130 fmt.Printf("Max value: %d\n", histogram.max)
131 fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount))
132 fmt.Printf("%24s %9s\n", "Range", "Count")
133
134 numBins := len(histogram.bins)
135 for index, count := range histogram.countPerBin {
136 if count == 0 {
137 continue
138 }
139
140 // The last bin represents the bin that contains the range from
141 // the last bin up to infinity so it's processed differently than the
142 // other bins.
143 if index == len(histogram.countPerBin)-1 {
144 lowerBound := int(histogram.bins[numBins-1])
145 fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count)
146 continue
147 }
148
149 upperBound := int(histogram.bins[index])
150 lowerBound := 0
151 if index > 0 {
152 lowerBound = int(histogram.bins[index-1])
153 }
154
155 fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count)
156 }
157 fmt.Println()
158 }
159