intrinsics.mx raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package sys
   6  
   7  // Copied from math/bits to avoid dependence.
   8  
   9  var deBruijn32tab = [32]byte{
  10  	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
  11  	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
  12  }
  13  
  14  const deBruijn32 = 0x077CB531
  15  
  16  var deBruijn64tab = [64]byte{
  17  	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
  18  	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
  19  	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
  20  	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
  21  }
  22  
  23  const deBruijn64 = 0x03f79d71b4ca8b09
  24  
  25  const ntz8tab = "" +
  26  	"\x08\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  27  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  28  	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  29  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  30  	"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  31  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  32  	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  33  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  34  	"\x07\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  35  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  36  	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  37  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  38  	"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  39  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  40  	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
  41  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00"
  42  
  43  // TrailingZeros32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
  44  func TrailingZeros32(x uint32) int {
  45  	if x == 0 {
  46  		return 32
  47  	}
  48  	// see comment in TrailingZeros64
  49  	return int(deBruijn32tab[(x&-x)*deBruijn32>>(32-5)])
  50  }
  51  
  52  // TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
  53  func TrailingZeros64(x uint64) int {
  54  	if x == 0 {
  55  		return 64
  56  	}
  57  	// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
  58  	//
  59  	// x & -x leaves only the right-most bit set in the word. Let k be the
  60  	// index of that bit. Since only a single bit is set, the value is two
  61  	// to the power of k. Multiplying by a power of two is equivalent to
  62  	// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
  63  	// is such that all six bit, consecutive substrings are distinct.
  64  	// Therefore, if we have a left shifted version of this constant we can
  65  	// find by how many bits it was shifted by looking at which six bit
  66  	// substring ended up at the top of the word.
  67  	// (Knuth, volume 4, section 7.3.1)
  68  	return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
  69  }
  70  
  71  // TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
  72  func TrailingZeros8(x uint8) int {
  73  	return int(ntz8tab[x])
  74  }
  75  
  76  const len8tab = "" +
  77  	"\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" +
  78  	"\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" +
  79  	"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
  80  	"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
  81  	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
  82  	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
  83  	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
  84  	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
  85  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
  86  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
  87  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
  88  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
  89  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
  90  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
  91  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
  92  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
  93  
  94  // Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
  95  //
  96  // nosplit because this is used in src/runtime/histogram.go, which make run in sensitive contexts.
  97  //
  98  //go:nosplit
  99  func Len64(x uint64) (n int) {
 100  	if x >= 1<<32 {
 101  		x >>= 32
 102  		n = 32
 103  	}
 104  	if x >= 1<<16 {
 105  		x >>= 16
 106  		n += 16
 107  	}
 108  	if x >= 1<<8 {
 109  		x >>= 8
 110  		n += 8
 111  	}
 112  	return n + int(len8tab[x])
 113  }
 114  
 115  // --- OnesCount ---
 116  
 117  const m0 = 0x5555555555555555 // 01010101 ...
 118  const m1 = 0x3333333333333333 // 00110011 ...
 119  const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
 120  
 121  // OnesCount64 returns the number of one bits ("population count") in x.
 122  func OnesCount64(x uint64) int {
 123  	// Implementation: Parallel summing of adjacent bits.
 124  	// See "Hacker's Delight", Chap. 5: Counting Bits.
 125  	// The following pattern shows the general approach:
 126  	//
 127  	//   x = x>>1&(m0&m) + x&(m0&m)
 128  	//   x = x>>2&(m1&m) + x&(m1&m)
 129  	//   x = x>>4&(m2&m) + x&(m2&m)
 130  	//   x = x>>8&(m3&m) + x&(m3&m)
 131  	//   x = x>>16&(m4&m) + x&(m4&m)
 132  	//   x = x>>32&(m5&m) + x&(m5&m)
 133  	//   return int(x)
 134  	//
 135  	// Masking (& operations) can be left away when there's no
 136  	// danger that a field's sum will carry over into the next
 137  	// field: Since the result cannot be > 64, 8 bits is enough
 138  	// and we can ignore the masks for the shifts by 8 and up.
 139  	// Per "Hacker's Delight", the first line can be simplified
 140  	// more, but it saves at best one instruction, so we leave
 141  	// it alone for clarity.
 142  	const m = 1<<64 - 1
 143  	x = x>>1&(m0&m) + x&(m0&m)
 144  	x = x>>2&(m1&m) + x&(m1&m)
 145  	x = (x>>4 + x) & (m2 & m)
 146  	x += x >> 8
 147  	x += x >> 16
 148  	x += x >> 32
 149  	return int(x) & (1<<7 - 1)
 150  }
 151  
 152  // LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
 153  func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
 154  
 155  // LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
 156  func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
 157  
 158  // Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
 159  func Len8(x uint8) int {
 160  	return int(len8tab[x])
 161  }
 162  
 163  // Bswap64 returns its input with byte order reversed
 164  // 0x0102030405060708 -> 0x0807060504030201
 165  func Bswap64(x uint64) uint64 {
 166  	c8 := uint64(0x00ff00ff00ff00ff)
 167  	a := x >> 8 & c8
 168  	b := (x & c8) << 8
 169  	x = a | b
 170  	c16 := uint64(0x0000ffff0000ffff)
 171  	a = x >> 16 & c16
 172  	b = (x & c16) << 16
 173  	x = a | b
 174  	c32 := uint64(0x00000000ffffffff)
 175  	a = x >> 32 & c32
 176  	b = (x & c32) << 32
 177  	x = a | b
 178  	return x
 179  }
 180  
 181  // Bswap32 returns its input with byte order reversed
 182  // 0x01020304 -> 0x04030201
 183  func Bswap32(x uint32) uint32 {
 184  	c8 := uint32(0x00ff00ff)
 185  	a := x >> 8 & c8
 186  	b := (x & c8) << 8
 187  	x = a | b
 188  	c16 := uint32(0x0000ffff)
 189  	a = x >> 16 & c16
 190  	b = (x & c16) << 16
 191  	x = a | b
 192  	return x
 193  }
 194  
 195  // Prefetch prefetches data from memory addr to cache
 196  //
 197  // AMD64: Produce PREFETCHT0 instruction
 198  //
 199  // ARM64: Produce PRFM instruction with PLDL1KEEP option
 200  func Prefetch(addr uintptr) {}
 201  
 202  // PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed.
 203  // That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache.
 204  //
 205  // AMD64: Produce PREFETCHNTA instruction
 206  //
 207  // ARM64: Produce PRFM instruction with PLDL1STRM option
 208  func PrefetchStreamed(addr uintptr) {}
 209  
 210  // GetCallerPC returns the program counter (PC) of its caller's caller.
 211  // GetCallerSP returns the stack pointer (SP) of its caller's caller.
 212  // Both are implemented as intrinsics on every platform.
 213  //
 214  // For example:
 215  //
 216  //	func f(arg1, arg2, arg3 int) {
 217  //		pc := GetCallerPC()
 218  //		sp := GetCallerSP()
 219  //	}
 220  //
 221  // These two lines find the PC and SP immediately following
 222  // the call to f (where f will return).
 223  //
 224  // The call to GetCallerPC and GetCallerSP must be done in the
 225  // frame being asked about.
 226  //
 227  // The result of GetCallerSP is correct at the time of the return,
 228  // but it may be invalidated by any subsequent call to a function
 229  // that might relocate the stack in order to grow or shrink it.
 230  // A general rule is that the result of GetCallerSP should be used
 231  // immediately and can only be passed to nosplit functions.
 232  
 233  func GetCallerPC() uintptr
 234  
 235  func GetCallerSP() uintptr
 236  
 237  // GetClosurePtr returns the pointer to the current closure.
 238  // GetClosurePtr can only be used in an assignment statement
 239  // at the entry of a function. Moreover, go:nosplit directive
 240  // must be specified at the declaration of caller function,
 241  // so that the function prolog does not clobber the closure register.
 242  // for example:
 243  //
 244  //	//go:nosplit
 245  //	func f(arg1, arg2, arg3 int) {
 246  //		dx := GetClosurePtr()
 247  //	}
 248  //
 249  // The compiler rewrites calls to this function into instructions that fetch the
 250  // pointer from a well-known register (DX on x86 architecture, etc.) directly.
 251  //
 252  // WARNING: PGO-based devirtualization cannot detect that caller of
 253  // GetClosurePtr requires closure context, and thus must maintain a list of
 254  // these functions, which is in
 255  // cmd/compile/internal/devirtualize/pgo.maybeDevirtualizeFunctionCall.
 256  func GetClosurePtr() uintptr
 257