mksizeclasses.mx raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build ignore
   6  
   7  // Generate tables for small malloc size classes.
   8  //
   9  // See malloc.go for overview.
  10  //
  11  // The size classes are chosen so that rounding an allocation
  12  // request up to the next size class wastes at most 12.5% (1.125x).
  13  //
  14  // Each size class has its own page count that gets allocated
  15  // and chopped up when new objects of the size class are needed.
  16  // That page count is chosen so that chopping up the run of
  17  // pages into objects of the given size wastes at most 12.5% (1.125x)
  18  // of the memory. It is not necessary that the cutoff here be
  19  // the same as above.
  20  //
  21  // The two sources of waste multiply, so the worst possible case
  22  // for the above constraints would be that allocations of some
  23  // size might have a 26.6% (1.266x) overhead.
  24  // In practice, only one of the wastes comes into play for a
  25  // given size (sizes < 512 waste mainly on the round-up,
  26  // sizes > 512 waste mainly on the page chopping).
  27  // For really small sizes, alignment constraints force the
  28  // overhead higher.
  29  
  30  package main
  31  
  32  import (
  33  	"bytes"
  34  	"flag"
  35  	"fmt"
  36  	"go/format"
  37  	"io"
  38  	"log"
  39  	"math"
  40  	"math/bits"
  41  	"os"
  42  )
  43  
  44  // Generate msize.go
  45  
  46  var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go")
  47  
  48  func main() {
  49  	flag.Parse()
  50  
  51  	var b bytes.Buffer
  52  	fmt.Fprintln(&b, "// Code generated by mksizeclasses.go; DO NOT EDIT.")
  53  	fmt.Fprintln(&b, "//go:generate go run mksizeclasses.go")
  54  	fmt.Fprintln(&b)
  55  	fmt.Fprintln(&b, "package runtime")
  56  	classes := makeClasses()
  57  
  58  	printComment(&b, classes)
  59  
  60  	printClasses(&b, classes)
  61  
  62  	out, err := format.Source(b.Bytes())
  63  	if err != nil {
  64  		log.Fatal(err)
  65  	}
  66  	if *stdout {
  67  		_, err = os.Stdout.Write(out)
  68  	} else {
  69  		err = os.WriteFile("sizeclasses.go", out, 0666)
  70  	}
  71  	if err != nil {
  72  		log.Fatal(err)
  73  	}
  74  }
  75  
  76  const (
  77  	// Constants that we use and will transfer to the runtime.
  78  	minHeapAlign = 8
  79  	maxSmallSize = 32 << 10
  80  	smallSizeDiv = 8
  81  	smallSizeMax = 1024
  82  	largeSizeDiv = 128
  83  	pageShift    = 13
  84  
  85  	// Derived constants.
  86  	pageSize = 1 << pageShift
  87  )
  88  
  89  type class struct {
  90  	size   int // max size
  91  	npages int // number of pages
  92  }
  93  
  94  func powerOfTwo(x int) bool {
  95  	return x != 0 && x&(x-1) == 0
  96  }
  97  
  98  func makeClasses() []class {
  99  	var classes []class
 100  
 101  	classes = append(classes, class{}) // class #0 is a dummy entry
 102  
 103  	align := minHeapAlign
 104  	for size := align; size <= maxSmallSize; size += align {
 105  		if powerOfTwo(size) { // bump alignment once in a while
 106  			if size >= 2048 {
 107  				align = 256
 108  			} else if size >= 128 {
 109  				align = size / 8
 110  			} else if size >= 32 {
 111  				align = 16 // heap bitmaps assume 16 byte alignment for allocations >= 32 bytes.
 112  			}
 113  		}
 114  		if !powerOfTwo(align) {
 115  			panic("incorrect alignment")
 116  		}
 117  
 118  		// Make the allocnpages big enough that
 119  		// the leftover is less than 1/8 of the total,
 120  		// so wasted space is at most 12.5%.
 121  		allocsize := pageSize
 122  		for allocsize%size > allocsize/8 {
 123  			allocsize += pageSize
 124  		}
 125  		npages := allocsize / pageSize
 126  
 127  		// If the previous sizeclass chose the same
 128  		// allocation size and fit the same number of
 129  		// objects into the page, we might as well
 130  		// use just this size instead of having two
 131  		// different sizes.
 132  		if len(classes) > 1 && npages == classes[len(classes)-1].npages && allocsize/size == allocsize/classes[len(classes)-1].size {
 133  			classes[len(classes)-1].size = size
 134  			continue
 135  		}
 136  		classes = append(classes, class{size: size, npages: npages})
 137  	}
 138  
 139  	// Increase object sizes if we can fit the same number of larger objects
 140  	// into the same number of pages. For example, we choose size 8448 above
 141  	// with 6 objects in 7 pages. But we can well use object size 9472,
 142  	// which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
 143  	// We need to preserve at least largeSizeDiv alignment otherwise
 144  	// sizeToClass won't work.
 145  	for i := range classes {
 146  		if i == 0 {
 147  			continue
 148  		}
 149  		c := &classes[i]
 150  		psize := c.npages * pageSize
 151  		new_size := (psize / (psize / c.size)) &^ (largeSizeDiv - 1)
 152  		if new_size > c.size {
 153  			c.size = new_size
 154  		}
 155  	}
 156  
 157  	if len(classes) != 68 {
 158  		panic("number of size classes has changed")
 159  	}
 160  
 161  	for i := range classes {
 162  		computeDivMagic(&classes[i])
 163  	}
 164  
 165  	return classes
 166  }
 167  
 168  // computeDivMagic checks that the division required to compute object
 169  // index from span offset can be computed using 32-bit multiplication.
 170  // n / c.size is implemented as (n * (^uint32(0)/uint32(c.size) + 1)) >> 32
 171  // for all 0 <= n <= c.npages * pageSize
 172  func computeDivMagic(c *class) {
 173  	// divisor
 174  	d := c.size
 175  	if d == 0 {
 176  		return
 177  	}
 178  
 179  	// maximum input value for which the formula needs to work.
 180  	max := c.npages * pageSize
 181  
 182  	// As reported in [1], if n and d are unsigned N-bit integers, we
 183  	// can compute n / d as ⌊n * c / 2^F⌋, where c is ⌈2^F / d⌉ and F is
 184  	// computed with:
 185  	//
 186  	// 	Algorithm 2: Algorithm to select the number of fractional bits
 187  	// 	and the scaled approximate reciprocal in the case of unsigned
 188  	// 	integers.
 189  	//
 190  	// 	if d is a power of two then
 191  	// 		Let F ← log₂(d) and c = 1.
 192  	// 	else
 193  	// 		Let F ← N + L where L is the smallest integer
 194  	// 		such that d ≤ (2^(N+L) mod d) + 2^L.
 195  	// 	end if
 196  	//
 197  	// [1] "Faster Remainder by Direct Computation: Applications to
 198  	// Compilers and Software Libraries" Daniel Lemire, Owen Kaser,
 199  	// Nathan Kurz arXiv:1902.01961
 200  	//
 201  	// To minimize the risk of introducing errors, we implement the
 202  	// algorithm exactly as stated, rather than trying to adapt it to
 203  	// fit typical Go idioms.
 204  	N := bits.Len(uint(max))
 205  	var F int
 206  	if powerOfTwo(d) {
 207  		F = int(math.Log2(float64(d)))
 208  		if d != 1<<F {
 209  			panic("imprecise log2")
 210  		}
 211  	} else {
 212  		for L := 0; ; L++ {
 213  			if d <= ((1<<(N+L))%d)+(1<<L) {
 214  				F = N + L
 215  				break
 216  			}
 217  		}
 218  	}
 219  
 220  	// Also, noted in the paper, F is the smallest number of fractional
 221  	// bits required. We use 32 bits, because it works for all size
 222  	// classes and is fast on all CPU architectures that we support.
 223  	if F > 32 {
 224  		fmt.Printf("d=%d max=%d N=%d F=%d\n", c.size, max, N, F)
 225  		panic("size class requires more than 32 bits of precision")
 226  	}
 227  
 228  	// Brute force double-check with the exact computation that will be
 229  	// done by the runtime.
 230  	m := ^uint32(0)/uint32(c.size) + 1
 231  	for n := 0; n <= max; n++ {
 232  		if uint32((uint64(n)*uint64(m))>>32) != uint32(n/c.size) {
 233  			fmt.Printf("d=%d max=%d m=%d n=%d\n", d, max, m, n)
 234  			panic("bad 32-bit multiply magic")
 235  		}
 236  	}
 237  }
 238  
 239  func printComment(w io.Writer, classes []class) {
 240  	fmt.Fprintf(w, "// %-5s  %-9s  %-10s  %-7s  %-10s  %-9s  %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste", "min align")
 241  	prevSize := 0
 242  	var minAligns [pageShift + 1]int
 243  	for i, c := range classes {
 244  		if i == 0 {
 245  			continue
 246  		}
 247  		spanSize := c.npages * pageSize
 248  		objects := spanSize / c.size
 249  		tailWaste := spanSize - c.size*(spanSize/c.size)
 250  		maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize)
 251  		alignBits := bits.TrailingZeros(uint(c.size))
 252  		if alignBits > pageShift {
 253  			// object alignment is capped at page alignment
 254  			alignBits = pageShift
 255  		}
 256  		for i := range minAligns {
 257  			if i > alignBits {
 258  				minAligns[i] = 0
 259  			} else if minAligns[i] == 0 {
 260  				minAligns[i] = c.size
 261  			}
 262  		}
 263  		prevSize = c.size
 264  		fmt.Fprintf(w, "// %5d  %9d  %10d  %7d  %10d  %8.2f%%  %9d\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste, 1<<alignBits)
 265  	}
 266  	fmt.Fprintf(w, "\n")
 267  
 268  	fmt.Fprintf(w, "// %-9s  %-4s  %-12s\n", "alignment", "bits", "min obj size")
 269  	for bits, size := range minAligns {
 270  		if size == 0 {
 271  			break
 272  		}
 273  		if bits+1 < len(minAligns) && size == minAligns[bits+1] {
 274  			continue
 275  		}
 276  		fmt.Fprintf(w, "// %9d  %4d  %12d\n", 1<<bits, bits, size)
 277  	}
 278  	fmt.Fprintf(w, "\n")
 279  }
 280  
 281  func maxObjsPerSpan(classes []class) int {
 282  	most := 0
 283  	for _, c := range classes[1:] {
 284  		n := c.npages * pageSize / c.size
 285  		most = max(most, n)
 286  	}
 287  	return most
 288  }
 289  
 290  func printClasses(w io.Writer, classes []class) {
 291  	fmt.Fprintln(w, "const (")
 292  	fmt.Fprintf(w, "MinHeapAlign = %d\n", minHeapAlign)
 293  	fmt.Fprintf(w, "MaxSmallSize = %d\n", maxSmallSize)
 294  	fmt.Fprintf(w, "SmallSizeDiv = %d\n", smallSizeDiv)
 295  	fmt.Fprintf(w, "SmallSizeMax = %d\n", smallSizeMax)
 296  	fmt.Fprintf(w, "LargeSizeDiv = %d\n", largeSizeDiv)
 297  	fmt.Fprintf(w, "NumSizeClasses = %d\n", len(classes))
 298  	fmt.Fprintf(w, "PageShift = %d\n", pageShift)
 299  	fmt.Fprintf(w, "MaxObjsPerSpan = %d\n", maxObjsPerSpan(classes))
 300  	fmt.Fprintln(w, ")")
 301  
 302  	fmt.Fprint(w, "var SizeClassToSize = [NumSizeClasses]uint16 {")
 303  	for _, c := range classes {
 304  		fmt.Fprintf(w, "%d,", c.size)
 305  	}
 306  	fmt.Fprintln(w, "}")
 307  
 308  	fmt.Fprint(w, "var SizeClassToNPages = [NumSizeClasses]uint8 {")
 309  	for _, c := range classes {
 310  		fmt.Fprintf(w, "%d,", c.npages)
 311  	}
 312  	fmt.Fprintln(w, "}")
 313  
 314  	fmt.Fprint(w, "var SizeClassToDivMagic = [NumSizeClasses]uint32 {")
 315  	for _, c := range classes {
 316  		if c.size == 0 {
 317  			fmt.Fprintf(w, "0,")
 318  			continue
 319  		}
 320  		fmt.Fprintf(w, "^uint32(0)/%d+1,", c.size)
 321  	}
 322  	fmt.Fprintln(w, "}")
 323  
 324  	// map from size to size class, for small sizes.
 325  	sc := make([]int, smallSizeMax/smallSizeDiv+1)
 326  	for i := range sc {
 327  		size := i * smallSizeDiv
 328  		for j, c := range classes {
 329  			if c.size >= size {
 330  				sc[i] = j
 331  				break
 332  			}
 333  		}
 334  	}
 335  	fmt.Fprint(w, "var SizeToSizeClass8 = [SmallSizeMax/SmallSizeDiv+1]uint8 {")
 336  	for _, v := range sc {
 337  		fmt.Fprintf(w, "%d,", v)
 338  	}
 339  	fmt.Fprintln(w, "}")
 340  
 341  	// map from size to size class, for large sizes.
 342  	sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1)
 343  	for i := range sc {
 344  		size := smallSizeMax + i*largeSizeDiv
 345  		for j, c := range classes {
 346  			if c.size >= size {
 347  				sc[i] = j
 348  				break
 349  			}
 350  		}
 351  	}
 352  	fmt.Fprint(w, "var SizeToSizeClass128 = [(MaxSmallSize-SmallSizeMax)/LargeSizeDiv+1]uint8 {")
 353  	for _, v := range sc {
 354  		fmt.Fprintf(w, "%d,", v)
 355  	}
 356  	fmt.Fprintln(w, "}")
 357  }
 358