package p256k1 import ( "errors" "fmt" "unsafe" ) const ( // Window sizes for elliptic curve multiplication optimizations windowA = 5 // Window size for main scalar (A) windowG = 14 // Window size for generator (G) - larger for better performance ) // EcmultConst computes r = q * a using constant-time multiplication // Uses simple binary method func EcmultConst(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) { if a.isInfinity() { r.setInfinity() return } if q.isZero() { r.setInfinity() return } // Convert affine point to Jacobian var aJac GroupElementJacobian aJac.setGE(a) // Use simple binary method for constant-time behavior r.setInfinity() var base GroupElementJacobian base = aJac // Process bits from MSB to LSB for i := 0; i < 256; i++ { if i > 0 { r.double(r) } // Get bit i (from MSB) bit := q.getBits(uint(255-i), 1) if bit != 0 { if r.isInfinity() { *r = base } else { r.addVar(r, &base) } } } } // ecmultWindowedVar computes r = q * a using optimized windowed multiplication (variable-time) // Uses a window size of 6 bits (64 precomputed multiples) for better CPU performance // Trades memory (64 entries vs 32) for ~20% faster multiplication func ecmultWindowedVar(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) { if a.isInfinity() { r.setInfinity() return } if q.isZero() { r.setInfinity() return } const windowSize = 6 // Increased from 5 to 6 for better performance const tableSize = 1 << windowSize // 64 // Convert point to Jacobian once var aJac GroupElementJacobian aJac.setGE(a) // Build table efficiently using Jacobian coordinates, only convert to affine at end // Store odd multiples in Jacobian form to avoid frequent conversions var tableJac [tableSize]GroupElementJacobian tableJac[0].setInfinity() tableJac[1] = aJac // Build odd multiples efficiently: tableJac[2*i+1] = (2*i+1) * a // Start with 3*a = a + 2*a var twoA GroupElementJacobian twoA.double(&aJac) // Build table: tableJac[i] = tableJac[i-2] + 2*a for odd i for i := 3; i < tableSize; i += 2 { tableJac[i].addVar(&tableJac[i-2], &twoA) } // Build even multiples: tableJac[2*i] = 2 * tableJac[i] for i := 1; i < tableSize/2; i++ { tableJac[2*i].double(&tableJac[i]) } // Process scalar in windows of 6 bits from MSB to LSB r.setInfinity() numWindows := (256 + windowSize - 1) / windowSize // Ceiling division for window := 0; window < numWindows; window++ { // Calculate bit offset for this window (MSB first) bitOffset := 255 - window*windowSize if bitOffset < 0 { break } // Extract window bits actualWindowSize := windowSize if bitOffset < windowSize-1 { actualWindowSize = bitOffset + 1 } windowBits := q.getBits(uint(bitOffset-actualWindowSize+1), uint(actualWindowSize)) // Double result windowSize times (once per bit position in window) if !r.isInfinity() { for j := 0; j < actualWindowSize; j++ { r.double(r) } } // Add precomputed point if window is non-zero if windowBits != 0 && windowBits < tableSize { if r.isInfinity() { *r = tableJac[windowBits] } else { r.addVar(r, &tableJac[windowBits]) } } } } // Ecmult computes r = q * a using optimized GLV+Strauss+wNAF multiplication // This provides good performance for verification and ECDH operations func Ecmult(r *GroupElementJacobian, a *GroupElementJacobian, q *Scalar) { if a.isInfinity() { r.setInfinity() return } if q.isZero() { r.setInfinity() return } // Convert to affine for GLV multiplication var aAff GroupElementAffine aAff.setGEJ(a) // Use optimized GLV+Strauss+wNAF multiplication ecmultStraussWNAFGLV(r, &aAff, q) } // ecmultCombinedGeneric computes r = na*a + ng*G using combined Strauss algorithm // This is the generic (5x52) implementation func ecmultCombinedGeneric(r *GroupElementJacobian, a *GroupElementJacobian, na, ng *Scalar) { // Handle edge cases naZero := na == nil || na.isZero() ngZero := ng == nil || ng.isZero() aInf := a == nil || a.isInfinity() // If both scalars are zero, result is infinity if naZero && ngZero { r.setInfinity() return } // If na is zero or a is infinity, just compute ng*G if naZero || aInf { EcmultGen(r, ng) return } // If ng is zero, just compute na*a if ngZero { var aAff GroupElementAffine aAff.setGEJ(a) ecmultStraussWNAFGLV(r, &aAff, na) return } // Use combined Strauss algorithm - shares doublings between both multiplications ecmultStraussCombined(r, a, na, ng) } // ecmultStraussCombined computes r = na*a + ng*G using combined Strauss with GLV // This processes all four wNAF representations (na1, na2, ng1, ng2) in a single loop // sharing doublings for significant performance improvement func ecmultStraussCombined(r *GroupElementJacobian, a *GroupElementJacobian, na, ng *Scalar) { // Ensure generator tables are initialized initGenTables() // Convert a to affine var aAff GroupElementAffine aAff.setGEJ(a) // Split na using GLV: na = na1 + na2*λ var na1, na2 Scalar var p1, p2 GroupElementAffine ecmultEndoSplit(&na1, &na2, &p1, &p2, na, &aAff) // Split ng using GLV: ng = ng1 + ng2*λ var ng1, ng2 Scalar scalarSplitLambda(&ng1, &ng2, ng) // Normalize all scalars to be "low" (not high) negNa1 := na1.isHigh() if negNa1 { na1.negate(&na1) } negNa2 := na2.isHigh() if negNa2 { na2.negate(&na2) } negNg1 := ng1.isHigh() if negNg1 { ng1.negate(&ng1) } negNg2 := ng2.isHigh() if negNg2 { ng2.negate(&ng2) } // Build precomputed tables for a and λ*a var p1Jac GroupElementJacobian p1Jac.setGE(&p1) var preA1 [glvTableSize]GroupElementAffine var preBetaX1 [glvTableSize]FieldElement buildOddMultiplesTableAffineFixed(&preA1, &preBetaX1, &p1Jac) var p2Jac GroupElementJacobian p2Jac.setGE(&p2) var preA2 [glvTableSize]GroupElementAffine var preBetaX2 [glvTableSize]FieldElement buildOddMultiplesTableAffineFixed(&preA2, &preBetaX2, &p2Jac) // Convert all four scalars to wNAF var wnafNa1, wnafNa2 [257]int8 var wnafNg1, wnafNg2 [257]int8 bitsNa1 := na1.wNAF(&wnafNa1, glvWNAFW) bitsNa2 := na2.wNAF(&wnafNa2, glvWNAFW) bitsNg1 := ng1.wNAF(&wnafNg1, genWindowSize) bitsNg2 := ng2.wNAF(&wnafNg2, genWindowSize) // Find maximum bit position across all four maxBits := bitsNa1 if bitsNa2 > maxBits { maxBits = bitsNa2 } if bitsNg1 > maxBits { maxBits = bitsNg1 } if bitsNg2 > maxBits { maxBits = bitsNg2 } // Combined Strauss loop - one set of doublings for all four contributions r.setInfinity() for i := maxBits - 1; i >= 0; i-- { // Double once (shared across all four multiplications) if !r.isInfinity() { r.double(r) } // Add contribution from na1 (using preA1 table) if i < bitsNa1 && wnafNa1[i] != 0 { var pt GroupElementAffine tableGetGEFixed(&pt, &preA1, int(wnafNa1[i])) if negNa1 { pt.negate(&pt) } if r.isInfinity() { r.setGE(&pt) } else { r.addGE(r, &pt) } } // Add contribution from na2 (using preA2 table) if i < bitsNa2 && wnafNa2[i] != 0 { var pt GroupElementAffine tableGetGEFixed(&pt, &preA2, int(wnafNa2[i])) if negNa2 { pt.negate(&pt) } if r.isInfinity() { r.setGE(&pt) } else { r.addGE(r, &pt) } } // Add contribution from ng1 (using preGenG table) if i < bitsNg1 && wnafNg1[i] != 0 { var pt GroupElementAffine n := int(wnafNg1[i]) var idx int if n > 0 { idx = (n - 1) / 2 } else { idx = (-n - 1) / 2 } if idx < genTableSize { pt = preGenG[idx] if n < 0 { pt.negate(&pt) } if negNg1 { pt.negate(&pt) } if r.isInfinity() { r.setGE(&pt) } else { r.addGE(r, &pt) } } } // Add contribution from ng2 (using preGenLambdaG table) if i < bitsNg2 && wnafNg2[i] != 0 { var pt GroupElementAffine n := int(wnafNg2[i]) var idx int if n > 0 { idx = (n - 1) / 2 } else { idx = (-n - 1) / 2 } if idx < genTableSize { pt = preGenLambdaG[idx] if n < 0 { pt.negate(&pt) } if negNg2 { pt.negate(&pt) } if r.isInfinity() { r.setGE(&pt) } else { r.addGE(r, &pt) } } } } } // ecmultStraussGLV computes r = q * a using Strauss algorithm with GLV endomorphism // This provides significant speedup for both verification and ECDH operations func ecmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) { if a.isInfinity() { r.setInfinity() return } if q.isZero() { r.setInfinity() return } // For now, use simplified Strauss algorithm without GLV endomorphism // Convert base point to Jacobian var aJac GroupElementJacobian aJac.setGE(a) // Compute odd multiples for the scalar var preA [1 << (windowA - 1)]GroupElementJacobian buildOddMultiples(&preA, &aJac, windowA) // Convert scalar to wNAF representation var wnaf [257]int8 bits := q.wNAF(&wnaf, windowA) // Perform Strauss algorithm r.setInfinity() for i := bits - 1; i >= 0; i-- { // Double the result r.double(r) // Add contribution if wnaf[i] != 0 { n := int(wnaf[i]) var pt GroupElementJacobian if n > 0 { idx := (n-1)/2 if idx >= len(preA) { panic(fmt.Sprintf("wNAF positive index out of bounds: n=%d, idx=%d, len=%d", n, idx, len(preA))) } pt = preA[idx] } else { if (-n-1)/2 >= len(preA) { panic("wNAF index out of bounds (negative)") } pt = preA[(-n-1)/2] pt.y.negate(&pt.y, 1) } r.addVar(r, &pt) } } } // buildOddMultiples builds a table of odd multiples of a point // pre[i] = (2*i+1) * a for i = 0 to (1<<(w-1))-1 func buildOddMultiples(pre *[1 << (windowA - 1)]GroupElementJacobian, a *GroupElementJacobian, w uint) { tableSize := 1 << (w - 1) // pre[0] = a (which is 1*a) pre[0] = *a if tableSize > 1 { // Compute 2*a var twoA GroupElementJacobian twoA.double(a) // Build odd multiples: pre[i] = pre[i-2] + 2*a for i >= 2, i even for i := 2; i < tableSize; i += 2 { pre[i].addVar(&pre[i-2], &twoA) } } } // EcmultStraussGLV is the public interface for optimized Strauss+GLV multiplication func EcmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) { ecmultStraussGLV(r, a, q) } // ECDHHashFunction is a function type for hashing ECDH shared secrets type ECDHHashFunction func(output []byte, x32 []byte, y32 []byte) bool // ecdhHashFunctionSHA256 implements the default SHA-256 based hash function for ECDH // Following the C reference implementation exactly func ecdhHashFunctionSHA256(output []byte, x32 []byte, y32 []byte) bool { if len(output) != 32 || len(x32) != 32 || len(y32) != 32 { return false } // Version byte: (y32[31] & 0x01) | 0x02 version := byte((y32[31] & 0x01) | 0x02) sha := NewSHA256() sha.Write([]byte{version}) sha.Write(x32) sha.Finalize(output) sha.Clear() return true } // ECDH computes an EC Diffie-Hellman shared secret // Following the C reference implementation secp256k1_ecdh func ECDH(output []byte, pubkey *PublicKey, seckey []byte, hashfp ECDHHashFunction) error { if len(output) != 32 { return errors.New("output must be 32 bytes") } if len(seckey) != 32 { return errors.New("seckey must be 32 bytes") } if pubkey == nil { return errors.New("pubkey cannot be nil") } // Use default hash function if none provided if hashfp == nil { hashfp = ecdhHashFunctionSHA256 } // Load public key var pt GroupElementAffine pt.fromBytes(pubkey.data[:]) if pt.isInfinity() { return errors.New("invalid public key") } // Parse scalar var s Scalar if !s.setB32Seckey(seckey) { return errors.New("invalid secret key") } // Handle zero scalar if s.isZero() { return errors.New("secret key cannot be zero") } // Compute res = s * pt using optimized windowed multiplication (variable-time) // ECDH doesn't require constant-time since the secret key is already known var res GroupElementJacobian ecmultWindowedVar(&res, &pt, &s) // Convert to affine var resAff GroupElementAffine resAff.setGEJ(&res) resAff.x.normalize() resAff.y.normalize() // Extract x and y coordinates var x, y [32]byte resAff.x.getB32(x[:]) resAff.y.getB32(y[:]) // Compute hash success := hashfp(output, x[:], y[:]) // Clear sensitive data memclear(unsafe.Pointer(&x[0]), 32) memclear(unsafe.Pointer(&y[0]), 32) s.clear() resAff.clear() res.clear() if !success { return errors.New("hash function failed") } return nil } // HKDF performs HMAC-based Key Derivation Function (RFC 5869) // Outputs key material of the specified length func HKDF(output []byte, ikm []byte, salt []byte, info []byte) error { if len(output) == 0 { return errors.New("output length must be greater than 0") } // Step 1: Extract (if salt is empty, use zeros) if len(salt) == 0 { salt = make([]byte, 32) } // PRK = HMAC-SHA256(salt, IKM) var prk [32]byte hmac := NewHMACSHA256(salt) hmac.Write(ikm) hmac.Finalize(prk[:]) hmac.Clear() // Step 2: Expand // Generate output using HKDF-Expand // T(0) = empty // T(i) = HMAC(PRK, T(i-1) || info || i) outlen := len(output) outidx := 0 // T(0) is empty var t []byte // Generate blocks until we have enough output blockNum := byte(1) for outidx < outlen { // Compute T(i) = HMAC(PRK, T(i-1) || info || i) hmac = NewHMACSHA256(prk[:]) if len(t) > 0 { hmac.Write(t) } if len(info) > 0 { hmac.Write(info) } hmac.Write([]byte{blockNum}) var tBlock [32]byte hmac.Finalize(tBlock[:]) hmac.Clear() // Copy to output copyLen := len(tBlock) if copyLen > outlen-outidx { copyLen = outlen - outidx } copy(output[outidx:outidx+copyLen], tBlock[:copyLen]) outidx += copyLen // Update T for next iteration t = tBlock[:] blockNum++ } // Clear sensitive data memclear(unsafe.Pointer(&prk[0]), 32) if len(t) > 0 { memclear(unsafe.Pointer(&t[0]), uintptr(len(t))) } return nil } // ECDHWithHKDF computes ECDH and derives a key using HKDF func ECDHWithHKDF(output []byte, pubkey *PublicKey, seckey []byte, salt []byte, info []byte) error { // Compute ECDH shared secret var sharedSecret [32]byte if err := ECDH(sharedSecret[:], pubkey, seckey, nil); err != nil { return err } // Derive key using HKDF err := HKDF(output, sharedSecret[:], salt, info) // Clear shared secret memclear(unsafe.Pointer(&sharedSecret[0]), 32) return err } // ============================================================================= // Phase 4: Strauss-GLV Algorithm with wNAF // ============================================================================= // buildOddMultiplesTableAffine builds a table of odd multiples of a point in affine coordinates // pre[i] = (2*i+1) * a for i = 0 to tableSize-1 // Also returns the precomputed β*x values for λ-transformed lookups // // The table is built efficiently using: // 1. Compute odd multiples in Jacobian: 1*a, 3*a, 5*a, ... // 2. Batch normalize all points to affine // 3. Precompute β*x for each point for GLV lookups // // Reference: libsecp256k1 ecmult_impl.h:secp256k1_ecmult_odd_multiples_table func buildOddMultiplesTableAffine(preA []GroupElementAffine, preBetaX []FieldElement, a *GroupElementJacobian, tableSize int) { if tableSize == 0 { return } // Build odd multiples in Jacobian coordinates preJac := make([]GroupElementJacobian, tableSize) // pre[0] = a (which is 1*a) preJac[0] = *a if tableSize > 1 { // Compute 2*a var twoA GroupElementJacobian twoA.double(a) // Build odd multiples: pre[i] = pre[i-1] + 2*a for i >= 1 for i := 1; i < tableSize; i++ { preJac[i].addVar(&preJac[i-1], &twoA) } } // Batch normalize to affine coordinates BatchNormalize(preA, preJac) // Precompute β*x for each point (for λ-transformed lookups) for i := 0; i < tableSize; i++ { if preA[i].isInfinity() { preBetaX[i] = FieldElementZero } else { preBetaX[i].mul(&preA[i].x, &fieldBeta) } } } // tableGetGE retrieves a point from the table, handling sign // n is the wNAF digit (can be negative) // Returns pre[(|n|-1)/2], negated if n < 0 // // Reference: libsecp256k1 ecmult_impl.h:ECMULT_TABLE_GET_GE func tableGetGE(r *GroupElementAffine, pre []GroupElementAffine, n int) { if n == 0 { r.setInfinity() return } var idx int if n > 0 { idx = (n - 1) / 2 } else { idx = (-n - 1) / 2 } if idx >= len(pre) { r.setInfinity() return } *r = pre[idx] // Negate if n < 0 if n < 0 { r.negate(r) } } // tableGetGELambda retrieves the λ-transformed point from the table // Uses precomputed β*x values for efficiency // n is the wNAF digit (can be negative) // Returns λ*pre[(|n|-1)/2], negated if n < 0 // // Since λ*(x, y) = (β*x, y), and we precomputed β*x, // we just need to use the precomputed β*x instead of x // // Reference: libsecp256k1 ecmult_impl.h:ECMULT_TABLE_GET_GE_LAMBDA func tableGetGELambda(r *GroupElementAffine, pre []GroupElementAffine, preBetaX []FieldElement, n int) { if n == 0 { r.setInfinity() return } var idx int if n > 0 { idx = (n - 1) / 2 } else { idx = (-n - 1) / 2 } if idx >= len(pre) { r.setInfinity() return } // Use precomputed β*x instead of x r.x = preBetaX[idx] r.y = pre[idx].y r.infinity = pre[idx].infinity // Negate if n < 0 if n < 0 { r.negate(r) } } // Window size for the GLV split scalars // Window 5 (16 entries) is optimal - larger windows have more table-building overhead // than savings from fewer point additions for single-use tables const glvWNAFW = 5 const glvTableSize = 1 << (glvWNAFW - 1) // 16 entries for window size 5 // ecmultStraussWNAFGLV computes r = q * a using Strauss algorithm with GLV endomorphism // This splits the scalar using GLV and processes two ~128-bit scalars simultaneously // using wNAF representation for efficient point multiplication. // // The algorithm: // 1. Split q into q1, q2 such that q1 + q2*λ ≡ q (mod n), where q1, q2 are ~128 bits // 2. Build odd multiples table for a and precompute β*x for λ-transformed lookups // 3. Convert q1, q2 to wNAF representation // 4. Process both wNAF representations simultaneously in a single pass // // Reference: libsecp256k1 ecmult_impl.h:secp256k1_ecmult_strauss_wnaf func ecmultStraussWNAFGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) { if a.isInfinity() { r.setInfinity() return } if q.isZero() { r.setInfinity() return } // Split scalar using GLV endomorphism: q = q1 + q2*λ // Also get the transformed points p1 = a, p2 = λ*a var q1, q2 Scalar var p1, p2 GroupElementAffine ecmultEndoSplit(&q1, &q2, &p1, &p2, q, a) // Build odd multiples tables using stack-allocated arrays var aJac GroupElementJacobian aJac.setGE(&p1) var preA [glvTableSize]GroupElementAffine var preBetaX [glvTableSize]FieldElement buildOddMultiplesTableAffineFixed(&preA, &preBetaX, &aJac) // Build odd multiples table for p2 (which is λ*a) var p2Jac GroupElementJacobian p2Jac.setGE(&p2) var preA2 [glvTableSize]GroupElementAffine var preBetaX2 [glvTableSize]FieldElement buildOddMultiplesTableAffineFixed(&preA2, &preBetaX2, &p2Jac) // Convert scalars to wNAF representation var wnaf1, wnaf2 [257]int8 bits1 := q1.wNAF(&wnaf1, glvWNAFW) bits2 := q2.wNAF(&wnaf2, glvWNAFW) // Find the maximum bit position maxBits := bits1 if bits2 > maxBits { maxBits = bits2 } // Perform the Strauss algorithm r.setInfinity() for i := maxBits - 1; i >= 0; i-- { // Double the result if !r.isInfinity() { r.double(r) } // Add contribution from q1 if i < bits1 && wnaf1[i] != 0 { var pt GroupElementAffine tableGetGEFixed(&pt, &preA, int(wnaf1[i])) if r.isInfinity() { r.setGE(&pt) } else { r.addGE(r, &pt) } } // Add contribution from q2 if i < bits2 && wnaf2[i] != 0 { var pt GroupElementAffine tableGetGEFixed(&pt, &preA2, int(wnaf2[i])) if r.isInfinity() { r.setGE(&pt) } else { r.addGE(r, &pt) } } } } // buildOddMultiplesTableAffineFixed is like buildOddMultiplesTableAffine but uses fixed-size arrays // and zero heap allocations. Optimized for glvTableSize = 16. func buildOddMultiplesTableAffineFixed(preA *[glvTableSize]GroupElementAffine, preBetaX *[glvTableSize]FieldElement, a *GroupElementJacobian) { // Build odd multiples in Jacobian coordinates (stack-allocated) var preJac [glvTableSize]GroupElementJacobian // pre[0] = a (which is 1*a) preJac[0] = *a if glvTableSize > 1 { // Compute 2*a var twoA GroupElementJacobian twoA.double(a) // Build odd multiples: pre[i] = pre[i-1] + 2*a for i >= 1 for i := 1; i < glvTableSize; i++ { preJac[i].addVar(&preJac[i-1], &twoA) } } // Batch normalize to affine coordinates with zero allocations batchNormalize16(preA, &preJac) // Precompute β*x for each point for i := 0; i < glvTableSize; i++ { if preA[i].isInfinity() { preBetaX[i] = FieldElementZero } else { preBetaX[i].mul(&preA[i].x, &fieldBeta) } } } // tableGetGEFixed retrieves a point from a fixed-size table func tableGetGEFixed(r *GroupElementAffine, pre *[glvTableSize]GroupElementAffine, n int) { if n == 0 { r.setInfinity() return } var idx int if n > 0 { idx = (n - 1) / 2 } else { idx = (-n - 1) / 2 } if idx >= glvTableSize { r.setInfinity() return } *r = pre[idx] // Negate if n < 0 if n < 0 { r.negate(r) } } // EcmultStraussWNAFGLV is the public interface for optimized Strauss+GLV+wNAF multiplication func EcmultStraussWNAFGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) { ecmultStraussWNAFGLV(r, a, q) } // ECDHXOnly computes X-only ECDH (BIP-340 style) // Outputs only the X coordinate of the shared secret point func ECDHXOnly(output []byte, pubkey *PublicKey, seckey []byte) error { if len(output) != 32 { return errors.New("output must be 32 bytes") } if len(seckey) != 32 { return errors.New("seckey must be 32 bytes") } if pubkey == nil { return errors.New("pubkey cannot be nil") } // Load public key var pt GroupElementAffine pt.fromBytes(pubkey.data[:]) if pt.isInfinity() { return errors.New("invalid public key") } // Parse scalar var s Scalar if !s.setB32Seckey(seckey) { return errors.New("invalid secret key") } if s.isZero() { return errors.New("secret key cannot be zero") } // Compute res = s * pt using optimized windowed multiplication (variable-time) // ECDH doesn't require constant-time since the secret key is already known var res GroupElementJacobian ecmultWindowedVar(&res, &pt, &s) // Convert to affine var resAff GroupElementAffine resAff.setGEJ(&res) resAff.x.normalize() // Extract X coordinate only resAff.x.getB32(output) // Clear sensitive data s.clear() resAff.clear() res.clear() return nil }