1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 //go:build ignore
6 7 // Generate tables for small malloc size classes.
8 //
9 // See malloc.go for overview.
10 //
11 // The size classes are chosen so that rounding an allocation
12 // request up to the next size class wastes at most 12.5% (1.125x).
13 //
14 // Each size class has its own page count that gets allocated
15 // and chopped up when new objects of the size class are needed.
16 // That page count is chosen so that chopping up the run of
17 // pages into objects of the given size wastes at most 12.5% (1.125x)
18 // of the memory. It is not necessary that the cutoff here be
19 // the same as above.
20 //
21 // The two sources of waste multiply, so the worst possible case
22 // for the above constraints would be that allocations of some
23 // size might have a 26.6% (1.266x) overhead.
24 // In practice, only one of the wastes comes into play for a
25 // given size (sizes < 512 waste mainly on the round-up,
26 // sizes > 512 waste mainly on the page chopping).
27 // For really small sizes, alignment constraints force the
28 // overhead higher.
29 30 package main
31 32 import (
33 "bytes"
34 "flag"
35 "fmt"
36 "go/format"
37 "io"
38 "log"
39 "math"
40 "math/bits"
41 "os"
42 )
43 44 // Generate msize.go
45 46 var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go")
47 48 func main() {
49 flag.Parse()
50 51 var b bytes.Buffer
52 fmt.Fprintln(&b, "// Code generated by mksizeclasses.go; DO NOT EDIT.")
53 fmt.Fprintln(&b, "//go:generate go run mksizeclasses.go")
54 fmt.Fprintln(&b)
55 fmt.Fprintln(&b, "package runtime")
56 classes := makeClasses()
57 58 printComment(&b, classes)
59 60 printClasses(&b, classes)
61 62 out, err := format.Source(b.Bytes())
63 if err != nil {
64 log.Fatal(err)
65 }
66 if *stdout {
67 _, err = os.Stdout.Write(out)
68 } else {
69 err = os.WriteFile("sizeclasses.go", out, 0666)
70 }
71 if err != nil {
72 log.Fatal(err)
73 }
74 }
75 76 const (
77 // Constants that we use and will transfer to the runtime.
78 minHeapAlign = 8
79 maxSmallSize = 32 << 10
80 smallSizeDiv = 8
81 smallSizeMax = 1024
82 largeSizeDiv = 128
83 pageShift = 13
84 85 // Derived constants.
86 pageSize = 1 << pageShift
87 )
88 89 type class struct {
90 size int // max size
91 npages int // number of pages
92 }
93 94 func powerOfTwo(x int) bool {
95 return x != 0 && x&(x-1) == 0
96 }
97 98 func makeClasses() []class {
99 var classes []class
100 101 classes = append(classes, class{}) // class #0 is a dummy entry
102 103 align := minHeapAlign
104 for size := align; size <= maxSmallSize; size += align {
105 if powerOfTwo(size) { // bump alignment once in a while
106 if size >= 2048 {
107 align = 256
108 } else if size >= 128 {
109 align = size / 8
110 } else if size >= 32 {
111 align = 16 // heap bitmaps assume 16 byte alignment for allocations >= 32 bytes.
112 }
113 }
114 if !powerOfTwo(align) {
115 panic("incorrect alignment")
116 }
117 118 // Make the allocnpages big enough that
119 // the leftover is less than 1/8 of the total,
120 // so wasted space is at most 12.5%.
121 allocsize := pageSize
122 for allocsize%size > allocsize/8 {
123 allocsize += pageSize
124 }
125 npages := allocsize / pageSize
126 127 // If the previous sizeclass chose the same
128 // allocation size and fit the same number of
129 // objects into the page, we might as well
130 // use just this size instead of having two
131 // different sizes.
132 if len(classes) > 1 && npages == classes[len(classes)-1].npages && allocsize/size == allocsize/classes[len(classes)-1].size {
133 classes[len(classes)-1].size = size
134 continue
135 }
136 classes = append(classes, class{size: size, npages: npages})
137 }
138 139 // Increase object sizes if we can fit the same number of larger objects
140 // into the same number of pages. For example, we choose size 8448 above
141 // with 6 objects in 7 pages. But we can well use object size 9472,
142 // which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
143 // We need to preserve at least largeSizeDiv alignment otherwise
144 // sizeToClass won't work.
145 for i := range classes {
146 if i == 0 {
147 continue
148 }
149 c := &classes[i]
150 psize := c.npages * pageSize
151 new_size := (psize / (psize / c.size)) &^ (largeSizeDiv - 1)
152 if new_size > c.size {
153 c.size = new_size
154 }
155 }
156 157 if len(classes) != 68 {
158 panic("number of size classes has changed")
159 }
160 161 for i := range classes {
162 computeDivMagic(&classes[i])
163 }
164 165 return classes
166 }
167 168 // computeDivMagic checks that the division required to compute object
169 // index from span offset can be computed using 32-bit multiplication.
170 // n / c.size is implemented as (n * (^uint32(0)/uint32(c.size) + 1)) >> 32
171 // for all 0 <= n <= c.npages * pageSize
172 func computeDivMagic(c *class) {
173 // divisor
174 d := c.size
175 if d == 0 {
176 return
177 }
178 179 // maximum input value for which the formula needs to work.
180 max := c.npages * pageSize
181 182 // As reported in [1], if n and d are unsigned N-bit integers, we
183 // can compute n / d as ⌊n * c / 2^F⌋, where c is ⌈2^F / d⌉ and F is
184 // computed with:
185 //
186 // Algorithm 2: Algorithm to select the number of fractional bits
187 // and the scaled approximate reciprocal in the case of unsigned
188 // integers.
189 //
190 // if d is a power of two then
191 // Let F ← log₂(d) and c = 1.
192 // else
193 // Let F ← N + L where L is the smallest integer
194 // such that d ≤ (2^(N+L) mod d) + 2^L.
195 // end if
196 //
197 // [1] "Faster Remainder by Direct Computation: Applications to
198 // Compilers and Software Libraries" Daniel Lemire, Owen Kaser,
199 // Nathan Kurz arXiv:1902.01961
200 //
201 // To minimize the risk of introducing errors, we implement the
202 // algorithm exactly as stated, rather than trying to adapt it to
203 // fit typical Go idioms.
204 N := bits.Len(uint(max))
205 var F int
206 if powerOfTwo(d) {
207 F = int(math.Log2(float64(d)))
208 if d != 1<<F {
209 panic("imprecise log2")
210 }
211 } else {
212 for L := 0; ; L++ {
213 if d <= ((1<<(N+L))%d)+(1<<L) {
214 F = N + L
215 break
216 }
217 }
218 }
219 220 // Also, noted in the paper, F is the smallest number of fractional
221 // bits required. We use 32 bits, because it works for all size
222 // classes and is fast on all CPU architectures that we support.
223 if F > 32 {
224 fmt.Printf("d=%d max=%d N=%d F=%d\n", c.size, max, N, F)
225 panic("size class requires more than 32 bits of precision")
226 }
227 228 // Brute force double-check with the exact computation that will be
229 // done by the runtime.
230 m := ^uint32(0)/uint32(c.size) + 1
231 for n := 0; n <= max; n++ {
232 if uint32((uint64(n)*uint64(m))>>32) != uint32(n/c.size) {
233 fmt.Printf("d=%d max=%d m=%d n=%d\n", d, max, m, n)
234 panic("bad 32-bit multiply magic")
235 }
236 }
237 }
238 239 func printComment(w io.Writer, classes []class) {
240 fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-10s %-9s %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste", "min align")
241 prevSize := 0
242 var minAligns [pageShift + 1]int
243 for i, c := range classes {
244 if i == 0 {
245 continue
246 }
247 spanSize := c.npages * pageSize
248 objects := spanSize / c.size
249 tailWaste := spanSize - c.size*(spanSize/c.size)
250 maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize)
251 alignBits := bits.TrailingZeros(uint(c.size))
252 if alignBits > pageShift {
253 // object alignment is capped at page alignment
254 alignBits = pageShift
255 }
256 for i := range minAligns {
257 if i > alignBits {
258 minAligns[i] = 0
259 } else if minAligns[i] == 0 {
260 minAligns[i] = c.size
261 }
262 }
263 prevSize = c.size
264 fmt.Fprintf(w, "// %5d %9d %10d %7d %10d %8.2f%% %9d\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste, 1<<alignBits)
265 }
266 fmt.Fprintf(w, "\n")
267 268 fmt.Fprintf(w, "// %-9s %-4s %-12s\n", "alignment", "bits", "min obj size")
269 for bits, size := range minAligns {
270 if size == 0 {
271 break
272 }
273 if bits+1 < len(minAligns) && size == minAligns[bits+1] {
274 continue
275 }
276 fmt.Fprintf(w, "// %9d %4d %12d\n", 1<<bits, bits, size)
277 }
278 fmt.Fprintf(w, "\n")
279 }
280 281 func maxObjsPerSpan(classes []class) int {
282 most := 0
283 for _, c := range classes[1:] {
284 n := c.npages * pageSize / c.size
285 most = max(most, n)
286 }
287 return most
288 }
289 290 func printClasses(w io.Writer, classes []class) {
291 fmt.Fprintln(w, "const (")
292 fmt.Fprintf(w, "MinHeapAlign = %d\n", minHeapAlign)
293 fmt.Fprintf(w, "MaxSmallSize = %d\n", maxSmallSize)
294 fmt.Fprintf(w, "SmallSizeDiv = %d\n", smallSizeDiv)
295 fmt.Fprintf(w, "SmallSizeMax = %d\n", smallSizeMax)
296 fmt.Fprintf(w, "LargeSizeDiv = %d\n", largeSizeDiv)
297 fmt.Fprintf(w, "NumSizeClasses = %d\n", len(classes))
298 fmt.Fprintf(w, "PageShift = %d\n", pageShift)
299 fmt.Fprintf(w, "MaxObjsPerSpan = %d\n", maxObjsPerSpan(classes))
300 fmt.Fprintln(w, ")")
301 302 fmt.Fprint(w, "var SizeClassToSize = [NumSizeClasses]uint16 {")
303 for _, c := range classes {
304 fmt.Fprintf(w, "%d,", c.size)
305 }
306 fmt.Fprintln(w, "}")
307 308 fmt.Fprint(w, "var SizeClassToNPages = [NumSizeClasses]uint8 {")
309 for _, c := range classes {
310 fmt.Fprintf(w, "%d,", c.npages)
311 }
312 fmt.Fprintln(w, "}")
313 314 fmt.Fprint(w, "var SizeClassToDivMagic = [NumSizeClasses]uint32 {")
315 for _, c := range classes {
316 if c.size == 0 {
317 fmt.Fprintf(w, "0,")
318 continue
319 }
320 fmt.Fprintf(w, "^uint32(0)/%d+1,", c.size)
321 }
322 fmt.Fprintln(w, "}")
323 324 // map from size to size class, for small sizes.
325 sc := make([]int, smallSizeMax/smallSizeDiv+1)
326 for i := range sc {
327 size := i * smallSizeDiv
328 for j, c := range classes {
329 if c.size >= size {
330 sc[i] = j
331 break
332 }
333 }
334 }
335 fmt.Fprint(w, "var SizeToSizeClass8 = [SmallSizeMax/SmallSizeDiv+1]uint8 {")
336 for _, v := range sc {
337 fmt.Fprintf(w, "%d,", v)
338 }
339 fmt.Fprintln(w, "}")
340 341 // map from size to size class, for large sizes.
342 sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1)
343 for i := range sc {
344 size := smallSizeMax + i*largeSizeDiv
345 for j, c := range classes {
346 if c.size >= size {
347 sc[i] = j
348 break
349 }
350 }
351 }
352 fmt.Fprint(w, "var SizeToSizeClass128 = [(MaxSmallSize-SmallSizeMax)/LargeSizeDiv+1]uint8 {")
353 for _, v := range sc {
354 fmt.Fprintf(w, "%d,", v)
355 }
356 fmt.Fprintln(w, "}")
357 }
358