cpu_x86.go raw

   1  // Copyright 2017 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // +build 386 amd64 amd64p32
   6  
   7  package cpu
   8  
   9  import (
  10  	"fmt"
  11  	"strings"
  12  )
  13  
  14  const CacheLineSize = 64
  15  
  16  // cpuid is implemented in cpu_x86.s.
  17  func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
  18  
  19  // xgetbv with ecx = 0 is implemented in cpu_x86.s.
  20  func xgetbv() (eax, edx uint32)
  21  
  22  const (
  23  	// edx bits
  24  	cpuid_SSE2 = 1 << 26
  25  
  26  	// ecx bits
  27  	cpuid_SSE3      = 1 << 0
  28  	cpuid_PCLMULQDQ = 1 << 1
  29  	cpuid_SSSE3     = 1 << 9
  30  	cpuid_FMA       = 1 << 12
  31  	cpuid_SSE41     = 1 << 19
  32  	cpuid_SSE42     = 1 << 20
  33  	cpuid_POPCNT    = 1 << 23
  34  	cpuid_AES       = 1 << 25
  35  	cpuid_OSXSAVE   = 1 << 27
  36  	cpuid_AVX       = 1 << 28
  37  	cpuid_CMPXCHG16B = 1 << 13
  38  
  39  	// ebx bits
  40  	cpuid_BMI1     = 1 << 3
  41  	cpuid_AVX2     = 1 << 5
  42  	cpuid_BMI2     = 1 << 8
  43  	cpuid_ERMS     = 1 << 9
  44  	cpuid_ADX      = 1 << 19
  45  	cpuid_AVX512F  = 1 << 16
  46  	cpuid_AVX512DQ = 1 << 17
  47  	cpuid_AVX512BW = 1 << 30
  48  	cpuid_AVX512VL = 1 << 31
  49  
  50  	// edx bits
  51  	cpuid_Invariant_TSC = 1 << 8
  52  )
  53  
  54  func doinit() {
  55  	options = []option{
  56  		{"adx", &X86.HasADX},
  57  		{"aes", &X86.HasAES},
  58  		{"avx", &X86.HasAVX},
  59  		{"avx2", &X86.HasAVX2},
  60  		{"bmi1", &X86.HasBMI1},
  61  		{"bmi2", &X86.HasBMI2},
  62  		{"erms", &X86.HasERMS},
  63  		{"fma", &X86.HasFMA},
  64  		{"pclmulqdq", &X86.HasPCLMULQDQ},
  65  		{"popcnt", &X86.HasPOPCNT},
  66  		{"sse3", &X86.HasSSE3},
  67  		{"sse41", &X86.HasSSE41},
  68  		{"sse42", &X86.HasSSE42},
  69  		{"ssse3", &X86.HasSSSE3},
  70  		{"avx512f", &X86.HasAVX512F},
  71  		{"avx512dq", &X86.HasAVX512DQ},
  72  		{"avx512bw", &X86.HasAVX512BW},
  73  		{"avx512vl", &X86.HasAVX512VL},
  74  		{"invariant_tsc", &X86.HasInvariantTSC},
  75  
  76  		// sse2 set as last element so it can easily be removed again. See code below.
  77  		{"sse2", &X86.HasSSE2},
  78  	}
  79  
  80  	// Remove sse2 from options on amd64(p32) because SSE2 is a mandatory feature for these GOARCHs.
  81  	if GOARCH == "amd64" || GOARCH == "amd64p32" {
  82  		options = options[:len(options)-1]
  83  	}
  84  
  85  	maxID, _, _, _ := cpuid(0, 0)
  86  
  87  	if maxID < 1 {
  88  		return
  89  	}
  90  
  91  	_, _, ecx1, edx1 := cpuid(1, 0)
  92  	X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
  93  
  94  	X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
  95  	X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
  96  	X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
  97  	X86.HasFMA = isSet(ecx1, cpuid_FMA)
  98  	X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
  99  	X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
 100  	X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
 101  	X86.HasAES = isSet(ecx1, cpuid_AES)
 102  	X86.HasCMPXCHG16B = isSet(ecx1, cpuid_CMPXCHG16B)
 103  	X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
 104  
 105  	osSupportsAVX := false
 106  	osSupportsAVX512 := false
 107  	// For XGETBV, OSXSAVE bit is required and sufficient.
 108  	if X86.HasOSXSAVE {
 109  		eax, _ := xgetbv()
 110  		// Check if XMM and YMM registers have OS support.
 111  		osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2)
 112  		// Check is ZMM registers have OS support.
 113  		osSupportsAVX512 = isSet(eax>>5, 7) && isSet(eax>>1, 3)
 114  	}
 115  
 116  	X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
 117  
 118  	if maxID < 7 {
 119  		return
 120  	}
 121  
 122  	_, ebx7, _, _ := cpuid(7, 0)
 123  	X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
 124  	X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
 125  	X86.HasAVX512F = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512
 126  	X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ) && osSupportsAVX512
 127  	X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW) && osSupportsAVX512
 128  	X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL) && osSupportsAVX512
 129  	X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
 130  	X86.HasERMS = isSet(ebx7, cpuid_ERMS)
 131  	X86.HasADX = isSet(ebx7, cpuid_ADX)
 132  
 133  	X86.Cache = getCacheSize()
 134  
 135  	X86.HasInvariantTSC = hasInvariantTSC()
 136  
 137  	X86.Family, X86.Model, X86.SteppingID = getVersionInfo()
 138  
 139  	X86.Signature = makeSignature(X86.Family, X86.Model)
 140  
 141  	X86.Name = getName()
 142  
 143  	X86.TSCFrequency = getNativeTSCFrequency(X86.Name, X86.Signature, X86.SteppingID)
 144  }
 145  
 146  func isSet(hwc uint32, value uint32) bool {
 147  	return hwc&value != 0
 148  }
 149  
 150  func hasInvariantTSC() bool {
 151  	if maxExtendedFunction() < 0x80000007 {
 152  		return false
 153  	}
 154  	_, _, _, edx := cpuid(0x80000007, 0)
 155  	return isSet(edx, cpuid_Invariant_TSC)
 156  }
 157  
 158  func getName() string {
 159  	if maxExtendedFunction() >= 0x80000004 {
 160  		v := make([]uint32, 0, 48)
 161  		for i := uint32(0); i < 3; i++ {
 162  			a, b, c, d := cpuid(0x80000002+i, 0)
 163  			v = append(v, a, b, c, d)
 164  		}
 165  		return strings.Trim(string(valAsString(v...)), " ")
 166  	}
 167  	return "unknown"
 168  }
 169  
 170  // getNativeTSCFrequency gets TSC frequency from CPUID,
 171  // only supports Intel (Skylake or later microarchitecture) & key information is from Intel manual & kernel codes
 172  // (especially this commit: https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684).
 173  func getNativeTSCFrequency(name, sign string, steppingID uint32) uint64 {
 174  
 175  	if vendorID() != Intel {
 176  		return 0
 177  	}
 178  
 179  	if maxFunctionID() < 0x15 {
 180  		return 0
 181  	}
 182  
 183  	// ApolloLake, GeminiLake, CannonLake (and presumably all new chipsets
 184  	// from this point) report the crystal frequency directly via CPUID.0x15.
 185  	// That's definitive data that we can rely upon.
 186  	eax, ebx, ecx, _ := cpuid(0x15, 0)
 187  
 188  	// If ebx is 0, the TSC/”core crystal clock” ratio is not enumerated.
 189  	// We won't provide TSC frequency detection in this situation.
 190  	if eax == 0 || ebx == 0 {
 191  		return 0
 192  	}
 193  
 194  	// Skylake, Kabylake and all variants of those two chipsets report a
 195  	// crystal frequency of zero.
 196  	if ecx == 0 { // Crystal clock frequency is not enumerated.
 197  		ecx = getCrystalClockFrequency(sign, steppingID)
 198  	}
 199  
 200  	// TSC frequency = “core crystal clock frequency” * EBX/EAX.
 201  	return uint64(ecx) * (uint64(ebx) / uint64(eax))
 202  }
 203  
 204  // Copied from: CPUID Signature values of DisplayFamily and DisplayModel,
 205  // in Intel® 64 and IA-32 Architectures Software Developer’s Manual
 206  // Volume 4: Model-Specific Registers
 207  // & https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/intel-family.h
 208  const (
 209  	IntelFam6SkylakeL     = "06_4EH"
 210  	IntelFam6Skylake      = "06_5EH"
 211  	IntelFam6XeonScalable = "06_55H"
 212  	IntelFam6KabylakeL    = "06_8EH"
 213  	IntelFam6Kabylake     = "06_9EH"
 214  )
 215  
 216  // getCrystalClockFrequency gets crystal clock frequency
 217  // for Intel processors in which CPUID.15H.EBX[31:0] ÷ CPUID.0x15.EAX[31:0] is enumerated
 218  // but CPUID.15H.ECX is not enumerated using this function to get nominal core crystal clock frequency.
 219  //
 220  // Actually these crystal clock frequencies provided by Intel hardcoded tables are not so accurate in some cases,
 221  // e.g. SkyLake server CPU may have issue (All SKX subject the crystal to an EMI reduction circuit that
 222  //reduces its actual frequency by (approximately) -0.25%):
 223  // see https://lore.kernel.org/lkml/ff6dcea166e8ff8f2f6a03c17beab2cb436aa779.1513920414.git.len.brown@intel.com/
 224  // for more details.
 225  // With this report, I set a coefficient (0.9975) for IntelFam6SkyLakeX.
 226  //
 227  // Unlike the kernel way (mentioned in https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684),
 228  // I prefer the Intel hardcoded tables, (in <Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 3>
 229  // 18.7.3 Determining the Processor Base Frequency, Table 18-85. Nominal Core Crystal Clock Frequency)
 230  // because after some testing (comparing with wall clock, see https://github.com/templexxx/tsc/tsc_test.go for more details),
 231  // I found hardcoded tables are more accurate.
 232  func getCrystalClockFrequency(sign string, steppingID uint32) uint32 {
 233  
 234  	if maxFunctionID() < 0x16 {
 235  		return 0
 236  	}
 237  
 238  	switch sign {
 239  	case IntelFam6SkylakeL:
 240  		return 24 * 1000 * 1000
 241  	case IntelFam6Skylake:
 242  		return 24 * 1000 * 1000
 243  	case IntelFam6XeonScalable:
 244  		// SKL-SP.
 245  		// see: https://community.intel.com/t5/Software-Tuning-Performance/How-to-detect-microarchitecture-on-Xeon-Scalable/m-p/1205162#M7633.
 246  		if steppingID == 0x2 || steppingID == 0x3 || steppingID == 0x4 {
 247  			return 25 * 1000 * 1000 * 0.9975
 248  		}
 249  		return 25 * 1000 * 1000 // TODO check other Xeon Scalable has no slow down issue.
 250  	case IntelFam6KabylakeL:
 251  		return 24 * 1000 * 1000
 252  	case IntelFam6Kabylake:
 253  		return 24 * 1000 * 1000
 254  	}
 255  
 256  	return 0
 257  }
 258  
 259  func getVersionInfo() (uint32, uint32, uint32) {
 260  	if maxFunctionID() < 0x1 {
 261  		return 0, 0, 0
 262  	}
 263  	eax, _, _, _ := cpuid(1, 0)
 264  	family := (eax >> 8) & 0xf
 265  	displayFamily := family
 266  	if family == 0xf {
 267  		displayFamily = ((eax >> 20) & 0xff) + family
 268  	}
 269  	model := (eax >> 4) & 0xf
 270  	displayModel := model
 271  	if family == 0x6 || family == 0xf {
 272  		displayModel = ((eax >> 12) & 0xf0) + model
 273  	}
 274  	return displayFamily, displayModel, eax & 0x7
 275  }
 276  
 277  // signature format: XX_XXH
 278  func makeSignature(family, model uint32) string {
 279  	signature := strings.ToUpper(fmt.Sprintf("0%x_0%xH", family, model))
 280  	ss := strings.Split(signature, "_")
 281  	for i, s := range ss {
 282  		// Maybe insert too more `0`, drop it.
 283  		if len(s) > 2 {
 284  			s = s[1:]
 285  			ss[i] = s
 286  		}
 287  	}
 288  	return strings.Join(ss, "_")
 289  }
 290  
 291  // getCacheSize is from
 292  // https://github.com/klauspost/cpuid/blob/5a626f7029c910cc8329dae5405ee4f65034bce5/cpuid.go#L723
 293  func getCacheSize() Cache {
 294  	c := Cache{
 295  		L1I: -1,
 296  		L1D: -1,
 297  		L2:  -1,
 298  		L3:  -1,
 299  	}
 300  
 301  	vendor := vendorID()
 302  	switch vendor {
 303  	case Intel:
 304  		if maxFunctionID() < 4 {
 305  			return c
 306  		}
 307  		for i := uint32(0); ; i++ {
 308  			eax, ebx, ecx, _ := cpuid(4, i)
 309  			cacheType := eax & 15
 310  			if cacheType == 0 {
 311  				break
 312  			}
 313  			cacheLevel := (eax >> 5) & 7
 314  			coherency := int(ebx&0xfff) + 1
 315  			partitions := int((ebx>>12)&0x3ff) + 1
 316  			associativity := int((ebx>>22)&0x3ff) + 1
 317  			sets := int(ecx) + 1
 318  			size := associativity * partitions * coherency * sets
 319  			switch cacheLevel {
 320  			case 1:
 321  				if cacheType == 1 {
 322  					// 1 = Data Cache
 323  					c.L1D = size
 324  				} else if cacheType == 2 {
 325  					// 2 = Instruction Cache
 326  					c.L1I = size
 327  				} else {
 328  					if c.L1D < 0 {
 329  						c.L1I = size
 330  					}
 331  					if c.L1I < 0 {
 332  						c.L1I = size
 333  					}
 334  				}
 335  			case 2:
 336  				c.L2 = size
 337  			case 3:
 338  				c.L3 = size
 339  			}
 340  		}
 341  	case AMD, Hygon:
 342  		// Untested.
 343  		if maxExtendedFunction() < 0x80000005 {
 344  			return c
 345  		}
 346  		_, _, ecx, edx := cpuid(0x80000005, 0)
 347  		c.L1D = int(((ecx >> 24) & 0xFF) * 1024)
 348  		c.L1I = int(((edx >> 24) & 0xFF) * 1024)
 349  
 350  		if maxExtendedFunction() < 0x80000006 {
 351  			return c
 352  		}
 353  		_, _, ecx, _ = cpuid(0x80000006, 0)
 354  		c.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
 355  	}
 356  
 357  	return c
 358  }
 359  
 360  func maxFunctionID() uint32 {
 361  	a, _, _, _ := cpuid(0, 0)
 362  	return a
 363  }
 364  
 365  func maxExtendedFunction() uint32 {
 366  	eax, _, _, _ := cpuid(0x80000000, 0)
 367  	return eax
 368  }
 369  
 370  const (
 371  	Other = iota
 372  	Intel
 373  	AMD
 374  	VIA
 375  	Transmeta
 376  	NSC
 377  	KVM  // Kernel-based Virtual Machine
 378  	MSVM // Microsoft Hyper-V or Windows Virtual PC
 379  	VMware
 380  	XenHVM
 381  	Bhyve
 382  	Hygon
 383  )
 384  
 385  // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
 386  var vendorMapping = map[string]int{
 387  	"AMDisbetter!": AMD,
 388  	"AuthenticAMD": AMD,
 389  	"CentaurHauls": VIA,
 390  	"GenuineIntel": Intel,
 391  	"TransmetaCPU": Transmeta,
 392  	"GenuineTMx86": Transmeta,
 393  	"Geode by NSC": NSC,
 394  	"VIA VIA VIA ": VIA,
 395  	"KVMKVMKVMKVM": KVM,
 396  	"Microsoft Hv": MSVM,
 397  	"VMwareVMware": VMware,
 398  	"XenVMMXenVMM": XenHVM,
 399  	"bhyve bhyve ": Bhyve,
 400  	"HygonGenuine": Hygon,
 401  }
 402  
 403  func vendorID() int {
 404  	_, b, c, d := cpuid(0, 0)
 405  	v := valAsString(b, d, c)
 406  	vend, ok := vendorMapping[string(v)]
 407  	if !ok {
 408  		return Other
 409  	}
 410  	return vend
 411  }
 412  
 413  func valAsString(values ...uint32) []byte {
 414  	r := make([]byte, 4*len(values))
 415  	for i, v := range values {
 416  		dst := r[i*4:]
 417  		dst[0] = byte(v & 0xff)
 418  		dst[1] = byte((v >> 8) & 0xff)
 419  		dst[2] = byte((v >> 16) & 0xff)
 420  		dst[3] = byte((v >> 24) & 0xff)
 421  		switch {
 422  		case dst[0] == 0:
 423  			return r[:i*4]
 424  		case dst[1] == 0:
 425  			return r[:i*4+1]
 426  		case dst[2] == 0:
 427  			return r[:i*4+2]
 428  		case dst[3] == 0:
 429  			return r[:i*4+3]
 430  		}
 431  	}
 432  	return r
 433  }
 434