1 // Copyright (c) 2024 The Decred developers
2 // Use of this source code is governed by an ISC
3 // license that can be found in the LICENSE file.
4 //
5 // Feature detection originally written by Dave Collins Feb 2019. Additional
6 // cleanup and comments added Jul 2024.
7 8 //go:build !purego
9 10 package compress
11 12 import (
13 "os"
14 )
15 16 var (
17 // features houses the result of querying the CPU and OS for supported
18 // features.
19 features = querySupportedFeatures()
20 21 hasSSE2 = features.SSE2 && os.Getenv("BLAKE256_DISABLE_SSE2") != "1"
22 hasSSE41 = features.SSE41 && os.Getenv("BLAKE256_DISABLE_SSE41") != "1"
23 hasAVX = features.AVX && os.Getenv("BLAKE256_DISABLE_AVX") != "1"
24 )
25 26 // supportsCPUID returns true when the CPU supports the CPUID opcode.
27 //
28 //go:noescape
29 func supportsCPUID() bool
30 31 // cpuid provides access to the CPUID opcode.
32 //
33 //go:noescape
34 func cpuid(eaxIn, ecxIn uint32) (eax, ebx, ecx, edx uint32)
35 36 // xgetbv provides access to the XGETBV opcode to read the contents of the
37 // extended control register with ECX = 0x00.
38 //
39 //go:noescape
40 func xgetbv() (eax uint32)
41 42 // isBitSet returns whether or not the provided bit is set in the given test
43 // value.
44 func isBitSet(testVal uint32, bit uint8) bool {
45 return testVal>>bit&1 == 1
46 }
47 48 // supportedFeatures houses flags that specify whether or not various features
49 // are supported by the CPU.
50 type supportedFeatures struct {
51 SSE2 bool
52 SSE41 bool
53 AVX bool
54 AVX2 bool
55 }
56 57 // querySupportedFeatures returns the result of querying the CPU and OS to
58 // determine supported features.
59 func querySupportedFeatures() supportedFeatures {
60 // Per CPUID—CPU Identification in Chapter 3 of the Intel 64 and IA-32
61 // Architectures Software Developer's Manual, Volume 2A:
62 //
63 // "The ID flag (bit 21) in the EFLAGS register indicates support for the
64 // CPUID instruction. If a software procedure can set and clear this flag,
65 // the processor executing the procedure supports the CPUID instruction.
66 // This instruction operates the same in non-64-bit modes and 64-bit mode.
67 //
68 // CPUID returns processor identification and feature information in the
69 // EAX, EBX, ECX, and EDX registers. The output is dependent on the
70 // contents of the EAX register upon execution (in some cases, ECX as
71 // well)."
72 //
73 // The inputs and outputs for determining various levels of SIMD support
74 // that are likely relevant to BLAKE are:
75 //
76 // Initial EAX Value | Output
77 // ------------------|------------------------------------------------
78 // 0x00 | EAX = Maximum Input Value for Basic CPUID Info.
79 // -------------------------------------------------------------------
80 // 0x01 | ECX = Feature Information
81 // | Bit 0 = Streaming SIMD Extensions 3 (SSE3)
82 // | Bit 9 = Supplemental SSE3 (SSSE3)
83 // | Bit 19 = Streaming SIMD Extensions 4.1 (SSE4.1)
84 // | Bit 20 = Streaming SIMD Extensions 4.2 (SSE4.2)
85 // | Bit 27 = OS sets to enable XSAVE features (OSXSAVE)
86 // | Bit 28 = Advanced Vector Extensions (AVX)
87 // | EDX = Feature Information
88 // | Bit 25 = Streaming SIMD Extensions (SSE)
89 // | Bit 26 = Streaming SIMD Extensions 2 (SSE2)
90 // -------------------------------------------------------------------
91 // 0x07 | EBX = Feature Information
92 // | Bit 5 = Advanced Vector Extensions 2 (AVX2)
93 // | Bit 16 = AVX-512 Foundation (AVX512F)
94 // | Bit 17 = AVX-512 Double and Quadword (AVX512DQ)
95 // | Bit 30 = AVX-512 Byte and Word (AVX512BW)
96 // | Bit 31 = AVX-512 Vector Length Extensions (AVX512VL)
97 //
98 // Note that all SSE and AVX variants also require operating system support
99 // in order to properly save the additional state when doing context
100 // switches. Starting with AVX, this is signaled by the OS by setting bits
101 // in the extended control register which itself requires CPU support as
102 // specified by the OSXSAVE bit in the table above.
103 //
104 // Per Chapter 13 of the Intel 64 and IA-32 Architectures Software
105 // Developer’s Manual, Volume 1, the XGETBV opcode is used to obtain the
106 // aforementioned extended control register (XCR). Per the "XSAVE-SUPPORTED
107 // FEATURES AND STATE-COMPONENT BITMAPS" section, the relevant bits for
108 // AVX/AVX-512 support are:
109 //
110 // XCR | Output
111 // -----|------------------------------------------------
112 // 0x00 | EAX
113 // | Bit 1 = SSE state (XMM registers)
114 // | Bit 2 = AVX state (YMM registers)
115 // | Bits 5-7 = AVX-512 state components
116 // | Bit 5 = Opmask state (K0-K7 registers)
117 // | Bit 6 = ZMM high 256 state (upper 256 bits of ZMM0-ZMM15 registers)
118 // | Bit 7 = High 16 ZMM state (ZMM16-ZMM31 registers)
119 const (
120 eaxInputQueryMax = 0x00
121 eaxInputQueryFeatureInfo = 0x01
122 eaxInputQueryExtFeatFlags = 0x07
123 124 ecx1OutputOSXSAVEBit = 27
125 edx1OutputSSE2Bit = 26
126 ecx1OutputSSE41Bit = 19
127 ecx1OutputAVXBit = 28
128 ebx7OutputAVX2Bit = 5
129 130 xgbvEaxOutputSSEStateBit = 1
131 xgbvEaxOutputAVXStateBit = 2
132 )
133 134 // Nothing to do if the CPU somehow does not support CPUID. Go probably
135 // won't even run on such a CPU, but as the Intel manual states, it is
136 // technically required to check if CPUID is supported before querying it
137 // and it's best to be safe.
138 var features supportedFeatures
139 if !supportsCPUID() {
140 return features
141 }
142 143 // Perform initial query to determine the max CPUID input value since the
144 // remaining checks are only valid if the CPU at least supports querying
145 // them to begin with.
146 maxEAXInput, _, _, _ := cpuid(eaxInputQueryMax, 0)
147 if maxEAXInput < eaxInputQueryFeatureInfo {
148 return features
149 }
150 151 // Query basic feature info to determine if the CPU supports SSE2/SSE4.1.
152 //
153 // Note that SSE2 is always active on amd64, so checking for it could
154 // probably technically be skipped, but it doesn't really cost anything
155 // extra to check for it and checking is more correct.
156 _, _, ecx, edx := cpuid(eaxInputQueryFeatureInfo, 0)
157 features.SSE2 = isBitSet(edx, edx1OutputSSE2Bit)
158 features.SSE41 = isBitSet(ecx, ecx1OutputSSE41Bit)
159 hasOSXSAVE := isBitSet(ecx, ecx1OutputOSXSAVEBit)
160 161 // Query basic feature info to determine AVX support as well as if the OS
162 // supports AVX/AVX2. See the description above for details.
163 var osSupportsAVX bool
164 if hasOSXSAVE {
165 eax := xgetbv()
166 osSupportsSSE := isBitSet(eax, xgbvEaxOutputSSEStateBit)
167 osSupportsAVX = osSupportsSSE && isBitSet(eax, xgbvEaxOutputAVXStateBit)
168 }
169 features.AVX = isBitSet(ecx, ecx1OutputAVXBit) && osSupportsAVX
170 171 // Querying the supported feature info for AVX2 is only valid if the CPU at
172 // least supports querying it to begin with.
173 if maxEAXInput < eaxInputQueryExtFeatFlags {
174 return features
175 }
176 177 // Query extended feature info to determine AVX2 support.
178 _, ebx, _, _ := cpuid(eaxInputQueryExtFeatFlags, 0)
179 features.AVX2 = isBitSet(ebx, ebx7OutputAVX2Bit) && osSupportsAVX
180 181 return features
182 }
183