cpu.mx raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package cpu implements processor feature detection for
   6  // various CPU architectures.
   7  package cpu
   8  
   9  import (
  10  	"os"
  11  	"bytes"
  12  )
  13  
  14  // Initialized reports whether the CPU features were initialized.
  15  //
  16  // For some GOOS/GOARCH combinations initialization of the CPU features depends
  17  // on reading an operating specific file, e.g. /proc/self/auxv on linux/arm
  18  // Initialized will report false if reading the file fails.
  19  var Initialized bool
  20  
  21  // CacheLinePad is used to pad structs to avoid false sharing.
  22  type CacheLinePad struct{ _ [cacheLineSize]byte }
  23  
  24  // X86 contains the supported CPU features of the
  25  // current X86/AMD64 platform. If the current platform
  26  // is not X86/AMD64 then all feature flags are false.
  27  //
  28  // X86 is padded to avoid false sharing. Further the HasAVX
  29  // and HasAVX2 are only set if the OS supports XMM and YMM
  30  // registers in addition to the CPUID feature bit being set.
  31  var X86 struct {
  32  	_                   CacheLinePad
  33  	HasAES              bool // AES hardware implementation (AES NI)
  34  	HasADX              bool // Multi-precision add-carry instruction extensions
  35  	HasAVX              bool // Advanced vector extension
  36  	HasAVX2             bool // Advanced vector extension 2
  37  	HasAVX512           bool // Advanced vector extension 512
  38  	HasAVX512F          bool // Advanced vector extension 512 Foundation Instructions
  39  	HasAVX512CD         bool // Advanced vector extension 512 Conflict Detection Instructions
  40  	HasAVX512ER         bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
  41  	HasAVX512PF         bool // Advanced vector extension 512 Prefetch Instructions
  42  	HasAVX512VL         bool // Advanced vector extension 512 Vector Length Extensions
  43  	HasAVX512BW         bool // Advanced vector extension 512 Byte and Word Instructions
  44  	HasAVX512DQ         bool // Advanced vector extension 512 Doubleword and Quadword Instructions
  45  	HasAVX512IFMA       bool // Advanced vector extension 512 Integer Fused Multiply Add
  46  	HasAVX512VBMI       bool // Advanced vector extension 512 Vector Byte Manipulation Instructions
  47  	HasAVX5124VNNIW     bool // Advanced vector extension 512 Vector Neural Network Instructions Word variable precision
  48  	HasAVX5124FMAPS     bool // Advanced vector extension 512 Fused Multiply Accumulation Packed Single precision
  49  	HasAVX512VPOPCNTDQ  bool // Advanced vector extension 512 Double and quad word population count instructions
  50  	HasAVX512VPCLMULQDQ bool // Advanced vector extension 512 Vector carry-less multiply operations
  51  	HasAVX512VNNI       bool // Advanced vector extension 512 Vector Neural Network Instructions
  52  	HasAVX512GFNI       bool // Advanced vector extension 512 Galois field New Instructions
  53  	HasAVX512VAES       bool // Advanced vector extension 512 Vector AES instructions
  54  	HasAVX512VBMI2      bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
  55  	HasAVX512BITALG     bool // Advanced vector extension 512 Bit Algorithms
  56  	HasAVX512BF16       bool // Advanced vector extension 512 BFloat16 Instructions
  57  	HasAMXTile          bool // Advanced Matrix Extension Tile instructions
  58  	HasAMXInt8          bool // Advanced Matrix Extension Int8 instructions
  59  	HasAMXBF16          bool // Advanced Matrix Extension BFloat16 instructions
  60  	HasBMI1             bool // Bit manipulation instruction set 1
  61  	HasBMI2             bool // Bit manipulation instruction set 2
  62  	HasCX16             bool // Compare and exchange 16 Bytes
  63  	HasERMS             bool // Enhanced REP for MOVSB and STOSB
  64  	HasFMA              bool // Fused-multiply-add instructions
  65  	HasOSXSAVE          bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
  66  	HasPCLMULQDQ        bool // PCLMULQDQ instruction - most often used for AES-GCM
  67  	HasPOPCNT           bool // Hamming weight instruction POPCNT.
  68  	HasRDRAND           bool // RDRAND instruction (on-chip random number generator)
  69  	HasRDSEED           bool // RDSEED instruction (on-chip random number generator)
  70  	HasSSE2             bool // Streaming SIMD extension 2 (always available on amd64)
  71  	HasSSE3             bool // Streaming SIMD extension 3
  72  	HasSSSE3            bool // Supplemental streaming SIMD extension 3
  73  	HasSSE41            bool // Streaming SIMD extension 4 and 4.1
  74  	HasSSE42            bool // Streaming SIMD extension 4 and 4.2
  75  	HasAVXIFMA          bool // Advanced vector extension Integer Fused Multiply Add
  76  	HasAVXVNNI          bool // Advanced vector extension Vector Neural Network Instructions
  77  	HasAVXVNNIInt8      bool // Advanced vector extension Vector Neural Network Int8 instructions
  78  	_                   CacheLinePad
  79  }
  80  
  81  // ARM64 contains the supported CPU features of the
  82  // current ARMv8(aarch64) platform. If the current platform
  83  // is not arm64 then all feature flags are false.
  84  var ARM64 struct {
  85  	_           CacheLinePad
  86  	HasFP       bool // Floating-point instruction set (always available)
  87  	HasASIMD    bool // Advanced SIMD (always available)
  88  	HasEVTSTRM  bool // Event stream support
  89  	HasAES      bool // AES hardware implementation
  90  	HasPMULL    bool // Polynomial multiplication instruction set
  91  	HasSHA1     bool // SHA1 hardware implementation
  92  	HasSHA2     bool // SHA2 hardware implementation
  93  	HasCRC32    bool // CRC32 hardware implementation
  94  	HasATOMICS  bool // Atomic memory operation instruction set
  95  	HasFPHP     bool // Half precision floating-point instruction set
  96  	HasASIMDHP  bool // Advanced SIMD half precision instruction set
  97  	HasCPUID    bool // CPUID identification scheme registers
  98  	HasASIMDRDM bool // Rounding double multiply add/subtract instruction set
  99  	HasJSCVT    bool // Javascript conversion from floating-point to integer
 100  	HasFCMA     bool // Floating-point multiplication and addition of complex numbers
 101  	HasLRCPC    bool // Release Consistent processor consistent support
 102  	HasDCPOP    bool // Persistent memory support
 103  	HasSHA3     bool // SHA3 hardware implementation
 104  	HasSM3      bool // SM3 hardware implementation
 105  	HasSM4      bool // SM4 hardware implementation
 106  	HasASIMDDP  bool // Advanced SIMD double precision instruction set
 107  	HasSHA512   bool // SHA512 hardware implementation
 108  	HasSVE      bool // Scalable Vector Extensions
 109  	HasSVE2     bool // Scalable Vector Extensions 2
 110  	HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
 111  	HasDIT      bool // Data Independent Timing support
 112  	HasI8MM     bool // Advanced SIMD Int8 matrix multiplication instructions
 113  	_           CacheLinePad
 114  }
 115  
 116  // ARM contains the supported CPU features of the current ARM (32-bit) platform.
 117  // All feature flags are false if:
 118  //  1. the current platform is not arm, or
 119  //  2. the current operating system is not Linux.
 120  var ARM struct {
 121  	_           CacheLinePad
 122  	HasSWP      bool // SWP instruction support
 123  	HasHALF     bool // Half-word load and store support
 124  	HasTHUMB    bool // ARM Thumb instruction set
 125  	Has26BIT    bool // Address space limited to 26-bits
 126  	HasFASTMUL  bool // 32-bit operand, 64-bit result multiplication support
 127  	HasFPA      bool // Floating point arithmetic support
 128  	HasVFP      bool // Vector floating point support
 129  	HasEDSP     bool // DSP Extensions support
 130  	HasJAVA     bool // Java instruction set
 131  	HasIWMMXT   bool // Intel Wireless MMX technology support
 132  	HasCRUNCH   bool // MaverickCrunch context switching and handling
 133  	HasTHUMBEE  bool // Thumb EE instruction set
 134  	HasNEON     bool // NEON instruction set
 135  	HasVFPv3    bool // Vector floating point version 3 support
 136  	HasVFPv3D16 bool // Vector floating point version 3 D8-D15
 137  	HasTLS      bool // Thread local storage support
 138  	HasVFPv4    bool // Vector floating point version 4 support
 139  	HasIDIVA    bool // Integer divide instruction support in ARM mode
 140  	HasIDIVT    bool // Integer divide instruction support in Thumb mode
 141  	HasVFPD32   bool // Vector floating point version 3 D15-D31
 142  	HasLPAE     bool // Large Physical Address Extensions
 143  	HasEVTSTRM  bool // Event stream support
 144  	HasAES      bool // AES hardware implementation
 145  	HasPMULL    bool // Polynomial multiplication instruction set
 146  	HasSHA1     bool // SHA1 hardware implementation
 147  	HasSHA2     bool // SHA2 hardware implementation
 148  	HasCRC32    bool // CRC32 hardware implementation
 149  	_           CacheLinePad
 150  }
 151  
 152  // The booleans in Loong64 contain the correspondingly named cpu feature bit.
 153  // The struct is padded to avoid false sharing.
 154  var Loong64 struct {
 155  	_         CacheLinePad
 156  	HasLSX    bool // support 128-bit vector extension
 157  	HasLASX   bool // support 256-bit vector extension
 158  	HasCRC32  bool // support CRC instruction
 159  	HasLAM_BH bool // support AM{SWAP/ADD}[_DB].{B/H} instruction
 160  	HasLAMCAS bool // support AMCAS[_DB].{B/H/W/D} instruction
 161  	_         CacheLinePad
 162  }
 163  
 164  // MIPS64X contains the supported CPU features of the current mips64/mips64le
 165  // platforms. If the current platform is not mips64/mips64le or the current
 166  // operating system is not Linux then all feature flags are false.
 167  var MIPS64X struct {
 168  	_      CacheLinePad
 169  	HasMSA bool // MIPS SIMD architecture
 170  	_      CacheLinePad
 171  }
 172  
 173  // PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms.
 174  // If the current platform is not ppc64/ppc64le then all feature flags are false.
 175  //
 176  // For ppc64/ppc64le, it is safe to check only for ISA level starting on ISA v3.00,
 177  // since there are no optional categories. There are some exceptions that also
 178  // require kernel support to work (DARN, SCV), so there are feature bits for
 179  // those as well. The struct is padded to avoid false sharing.
 180  var PPC64 struct {
 181  	_        CacheLinePad
 182  	HasDARN  bool // Hardware random number generator (requires kernel enablement)
 183  	HasSCV   bool // Syscall vectored (requires kernel enablement)
 184  	IsPOWER8 bool // ISA v2.07 (POWER8)
 185  	IsPOWER9 bool // ISA v3.00 (POWER9), implies IsPOWER8
 186  	_        CacheLinePad
 187  }
 188  
 189  // S390X contains the supported CPU features of the current IBM Z
 190  // (s390x) platform. If the current platform is not IBM Z then all
 191  // feature flags are false.
 192  //
 193  // S390X is padded to avoid false sharing. Further HasVX is only set
 194  // if the OS supports vector registers in addition to the STFLE
 195  // feature bit being set.
 196  var S390X struct {
 197  	_         CacheLinePad
 198  	HasZARCH  bool // z/Architecture mode is active [mandatory]
 199  	HasSTFLE  bool // store facility list extended
 200  	HasLDISP  bool // long (20-bit) displacements
 201  	HasEIMM   bool // 32-bit immediates
 202  	HasDFP    bool // decimal floating point
 203  	HasETF3EH bool // ETF-3 enhanced
 204  	HasMSA    bool // message security assist (CPACF)
 205  	HasAES    bool // KM-AES{128,192,256} functions
 206  	HasAESCBC bool // KMC-AES{128,192,256} functions
 207  	HasAESCTR bool // KMCTR-AES{128,192,256} functions
 208  	HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
 209  	HasGHASH  bool // KIMD-GHASH function
 210  	HasSHA1   bool // K{I,L}MD-SHA-1 functions
 211  	HasSHA256 bool // K{I,L}MD-SHA-256 functions
 212  	HasSHA512 bool // K{I,L}MD-SHA-512 functions
 213  	HasSHA3   bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions
 214  	HasVX     bool // vector facility
 215  	HasVXE    bool // vector-enhancements facility 1
 216  	_         CacheLinePad
 217  }
 218  
 219  // RISCV64 contains the supported CPU features and performance characteristics for riscv64
 220  // platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate
 221  // the presence of RISC-V extensions.
 222  //
 223  // It is safe to assume that all the RV64G extensions are supported and so they are omitted from
 224  // this structure. As riscv64 Go programs require at least RV64G, the code that populates
 225  // this structure cannot run successfully if some of the RV64G extensions are missing.
 226  // The struct is padded to avoid false sharing.
 227  var RISCV64 struct {
 228  	_                 CacheLinePad
 229  	HasFastMisaligned bool // Fast misaligned accesses
 230  	HasC              bool // Compressed instruction-set extension
 231  	HasV              bool // Vector extension compatible with RVV 1.0
 232  	HasZba            bool // Address generation instructions extension
 233  	HasZbb            bool // Basic bit-manipulation extension
 234  	HasZbs            bool // Single-bit instructions extension
 235  	HasZvbb           bool // Vector Basic Bit-manipulation
 236  	HasZvbc           bool // Vector Carryless Multiplication
 237  	HasZvkb           bool // Vector Cryptography Bit-manipulation
 238  	HasZvkt           bool // Vector Data-Independent Execution Latency
 239  	HasZvkg           bool // Vector GCM/GMAC
 240  	HasZvkn           bool // NIST Algorithm Suite (AES/SHA256/SHA512)
 241  	HasZvknc          bool // NIST Algorithm Suite with carryless multiply
 242  	HasZvkng          bool // NIST Algorithm Suite with GCM
 243  	HasZvks           bool // ShangMi Algorithm Suite
 244  	HasZvksc          bool // ShangMi Algorithm Suite with carryless multiplication
 245  	HasZvksg          bool // ShangMi Algorithm Suite with GCM
 246  	_                 CacheLinePad
 247  }
 248  
 249  func init() {
 250  	archInit()
 251  	initOptions()
 252  	processOptions()
 253  }
 254  
 255  // options contains the cpu debug options that can be used in GODEBUG.
 256  // Options are arch dependent and are added by the arch specific initOptions functions.
 257  // Features that are mandatory for the specific GOARCH should have the Required field set
 258  // (e.g. SSE2 on amd64).
 259  var options []option
 260  
 261  // Option names should be lower case. e.g. avx instead of AVX.
 262  type option struct {
 263  	Name      []byte
 264  	Feature   *bool
 265  	Specified bool // whether feature value was specified in GODEBUG
 266  	Enable    bool // whether feature should be enabled
 267  	Required  bool // whether feature is mandatory and can not be disabled
 268  }
 269  
 270  func processOptions() {
 271  	env := os.Getenv("GODEBUG")
 272  field:
 273  	for env != "" {
 274  		field := ""
 275  		i := bytes.IndexByte(env, ',')
 276  		if i < 0 {
 277  			field, env = env, ""
 278  		} else {
 279  			field, env = env[:i], env[i+1:]
 280  		}
 281  		if len(field) < 4 || field[:4] != "cpu." {
 282  			continue
 283  		}
 284  		i = bytes.IndexByte(field, '=')
 285  		if i < 0 {
 286  			print("GODEBUG sys/cpu: no value specified for \"", field, "\"\n")
 287  			continue
 288  		}
 289  		key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on"
 290  
 291  		var enable bool
 292  		switch value {
 293  		case "on":
 294  			enable = true
 295  		case "off":
 296  			enable = false
 297  		default:
 298  			print("GODEBUG sys/cpu: value \"", value, "\" not supported for cpu option \"", key, "\"\n")
 299  			continue field
 300  		}
 301  
 302  		if key == "all" {
 303  			for i := range options {
 304  				options[i].Specified = true
 305  				options[i].Enable = enable || options[i].Required
 306  			}
 307  			continue field
 308  		}
 309  
 310  		for i := range options {
 311  			if options[i].Name == key {
 312  				options[i].Specified = true
 313  				options[i].Enable = enable
 314  				continue field
 315  			}
 316  		}
 317  
 318  		print("GODEBUG sys/cpu: unknown cpu feature \"", key, "\"\n")
 319  	}
 320  
 321  	for _, o := range options {
 322  		if !o.Specified {
 323  			continue
 324  		}
 325  
 326  		if o.Enable && !*o.Feature {
 327  			print("GODEBUG sys/cpu: can not enable \"", o.Name, "\", missing CPU support\n")
 328  			continue
 329  		}
 330  
 331  		if !o.Enable && o.Required {
 332  			print("GODEBUG sys/cpu: can not disable \"", o.Name, "\", required CPU feature\n")
 333  			continue
 334  		}
 335  
 336  		*o.Feature = o.Enable
 337  	}
 338  }
 339