1 //go:build amd64
2 3 package p256k1
4 5 // AMD64-specific scalar operations with optional AVX2/BMI2 acceleration.
6 // The Scalar type uses 4×uint64 limbs which are memory-compatible with
7 // the AVX package's 2×Uint128 representation.
8 9 // scalarMulAVX2 multiplies two scalars using AVX2 assembly.
10 // Both input and output use the same memory layout as the pure Go implementation.
11 //
12 //go:noescape
13 func scalarMulAVX2(r, a, b *Scalar)
14 15 // scalarMulBMI2 multiplies two scalars using BMI2 MULX instruction.
16 // This is faster than traditional MUL because MULX doesn't clobber flags,
17 // allowing better instruction scheduling with carry chains.
18 //
19 //go:noescape
20 func scalarMulBMI2(r, a, b *Scalar)
21 22 // scalarAddAVX2 adds two scalars using AVX2 assembly.
23 //
24 //go:noescape
25 func scalarAddAVX2(r, a, b *Scalar)
26 27 // scalarSubAVX2 subtracts two scalars using AVX2 assembly.
28 //
29 //go:noescape
30 func scalarSubAVX2(r, a, b *Scalar)
31