//go:build amd64 package p256k1 // AMD64-specific scalar operations with optional AVX2/BMI2 acceleration. // The Scalar type uses 4×uint64 limbs which are memory-compatible with // the AVX package's 2×Uint128 representation. // scalarMulAVX2 multiplies two scalars using AVX2 assembly. // Both input and output use the same memory layout as the pure Go implementation. // //go:noescape func scalarMulAVX2(r, a, b *Scalar) // scalarMulBMI2 multiplies two scalars using BMI2 MULX instruction. // This is faster than traditional MUL because MULX doesn't clobber flags, // allowing better instruction scheduling with carry chains. // //go:noescape func scalarMulBMI2(r, a, b *Scalar) // scalarAddAVX2 adds two scalars using AVX2 assembly. // //go:noescape func scalarAddAVX2(r, a, b *Scalar) // scalarSubAVX2 subtracts two scalars using AVX2 assembly. // //go:noescape func scalarSubAVX2(r, a, b *Scalar)