ecdh.go raw

   1  package p256k1
   2  
   3  import (
   4  	"errors"
   5  	"fmt"
   6  	"unsafe"
   7  )
   8  
   9  const (
  10  	// Window sizes for elliptic curve multiplication optimizations
  11  	windowA = 5  // Window size for main scalar (A)
  12  	windowG = 14 // Window size for generator (G) - larger for better performance
  13  )
  14  
  15  // EcmultConst computes r = q * a using constant-time multiplication
  16  // Uses simple binary method
  17  func EcmultConst(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
  18  	if a.isInfinity() {
  19  		r.setInfinity()
  20  		return
  21  	}
  22  	
  23  	if q.isZero() {
  24  		r.setInfinity()
  25  		return
  26  	}
  27  	
  28  	// Convert affine point to Jacobian
  29  	var aJac GroupElementJacobian
  30  	aJac.setGE(a)
  31  	
  32  	// Use simple binary method for constant-time behavior
  33  	r.setInfinity()
  34  	
  35  	var base GroupElementJacobian
  36  	base = aJac
  37  	
  38  	// Process bits from MSB to LSB
  39  	for i := 0; i < 256; i++ {
  40  		if i > 0 {
  41  			r.double(r)
  42  		}
  43  		
  44  		// Get bit i (from MSB)
  45  		bit := q.getBits(uint(255-i), 1)
  46  		if bit != 0 {
  47  			if r.isInfinity() {
  48  				*r = base
  49  			} else {
  50  				r.addVar(r, &base)
  51  			}
  52  		}
  53  	}
  54  }
  55  
  56  // ecmultWindowedVar computes r = q * a using optimized windowed multiplication (variable-time)
  57  // Uses a window size of 6 bits (64 precomputed multiples) for better CPU performance
  58  // Trades memory (64 entries vs 32) for ~20% faster multiplication
  59  func ecmultWindowedVar(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
  60  	if a.isInfinity() {
  61  		r.setInfinity()
  62  		return
  63  	}
  64  	
  65  	if q.isZero() {
  66  		r.setInfinity()
  67  		return
  68  	}
  69  	
  70  	const windowSize = 6  // Increased from 5 to 6 for better performance
  71  	const tableSize = 1 << windowSize // 64
  72  	
  73  	// Convert point to Jacobian once
  74  	var aJac GroupElementJacobian
  75  	aJac.setGE(a)
  76  	
  77  	// Build table efficiently using Jacobian coordinates, only convert to affine at end
  78  	// Store odd multiples in Jacobian form to avoid frequent conversions
  79  	var tableJac [tableSize]GroupElementJacobian
  80  	tableJac[0].setInfinity()
  81  	tableJac[1] = aJac
  82  	
  83  	// Build odd multiples efficiently: tableJac[2*i+1] = (2*i+1) * a
  84  	// Start with 3*a = a + 2*a
  85  	var twoA GroupElementJacobian
  86  	twoA.double(&aJac)
  87  	
  88  	// Build table: tableJac[i] = tableJac[i-2] + 2*a for odd i
  89  	for i := 3; i < tableSize; i += 2 {
  90  		tableJac[i].addVar(&tableJac[i-2], &twoA)
  91  	}
  92  	
  93  	// Build even multiples: tableJac[2*i] = 2 * tableJac[i]
  94  	for i := 1; i < tableSize/2; i++ {
  95  		tableJac[2*i].double(&tableJac[i])
  96  	}
  97  	
  98  	// Process scalar in windows of 6 bits from MSB to LSB
  99  	r.setInfinity()
 100  	numWindows := (256 + windowSize - 1) / windowSize // Ceiling division
 101  	
 102  	for window := 0; window < numWindows; window++ {
 103  		// Calculate bit offset for this window (MSB first)
 104  		bitOffset := 255 - window*windowSize
 105  		if bitOffset < 0 {
 106  			break
 107  		}
 108  		
 109  		// Extract window bits
 110  		actualWindowSize := windowSize
 111  		if bitOffset < windowSize-1 {
 112  			actualWindowSize = bitOffset + 1
 113  		}
 114  		
 115  		windowBits := q.getBits(uint(bitOffset-actualWindowSize+1), uint(actualWindowSize))
 116  		
 117  		// Double result windowSize times (once per bit position in window)
 118  		if !r.isInfinity() {
 119  			for j := 0; j < actualWindowSize; j++ {
 120  				r.double(r)
 121  			}
 122  		}
 123  		
 124  		// Add precomputed point if window is non-zero
 125  		if windowBits != 0 && windowBits < tableSize {
 126  			if r.isInfinity() {
 127  				*r = tableJac[windowBits]
 128  			} else {
 129  				r.addVar(r, &tableJac[windowBits])
 130  			}
 131  		}
 132  	}
 133  }
 134  
 135  // Ecmult computes r = q * a using optimized GLV+Strauss+wNAF multiplication
 136  // This provides good performance for verification and ECDH operations
 137  func Ecmult(r *GroupElementJacobian, a *GroupElementJacobian, q *Scalar) {
 138  	if a.isInfinity() {
 139  		r.setInfinity()
 140  		return
 141  	}
 142  
 143  	if q.isZero() {
 144  		r.setInfinity()
 145  		return
 146  	}
 147  
 148  	// Convert to affine for GLV multiplication
 149  	var aAff GroupElementAffine
 150  	aAff.setGEJ(a)
 151  
 152  	// Use optimized GLV+Strauss+wNAF multiplication
 153  	ecmultStraussWNAFGLV(r, &aAff, q)
 154  }
 155  
 156  // ecmultCombinedGeneric computes r = na*a + ng*G using combined Strauss algorithm
 157  // This is the generic (5x52) implementation
 158  func ecmultCombinedGeneric(r *GroupElementJacobian, a *GroupElementJacobian, na, ng *Scalar) {
 159  	// Handle edge cases
 160  	naZero := na == nil || na.isZero()
 161  	ngZero := ng == nil || ng.isZero()
 162  	aInf := a == nil || a.isInfinity()
 163  
 164  	// If both scalars are zero, result is infinity
 165  	if naZero && ngZero {
 166  		r.setInfinity()
 167  		return
 168  	}
 169  
 170  	// If na is zero or a is infinity, just compute ng*G
 171  	if naZero || aInf {
 172  		EcmultGen(r, ng)
 173  		return
 174  	}
 175  
 176  	// If ng is zero, just compute na*a
 177  	if ngZero {
 178  		var aAff GroupElementAffine
 179  		aAff.setGEJ(a)
 180  		ecmultStraussWNAFGLV(r, &aAff, na)
 181  		return
 182  	}
 183  
 184  	// Use combined Strauss algorithm - shares doublings between both multiplications
 185  	ecmultStraussCombined(r, a, na, ng)
 186  }
 187  
 188  // ecmultStraussCombined computes r = na*a + ng*G using combined Strauss with GLV
 189  // This processes all four wNAF representations (na1, na2, ng1, ng2) in a single loop
 190  // sharing doublings for significant performance improvement
 191  func ecmultStraussCombined(r *GroupElementJacobian, a *GroupElementJacobian, na, ng *Scalar) {
 192  	// Ensure generator tables are initialized
 193  	initGenTables()
 194  
 195  	// Convert a to affine
 196  	var aAff GroupElementAffine
 197  	aAff.setGEJ(a)
 198  
 199  	// Split na using GLV: na = na1 + na2*λ
 200  	var na1, na2 Scalar
 201  	var p1, p2 GroupElementAffine
 202  	ecmultEndoSplit(&na1, &na2, &p1, &p2, na, &aAff)
 203  
 204  	// Split ng using GLV: ng = ng1 + ng2*λ
 205  	var ng1, ng2 Scalar
 206  	scalarSplitLambda(&ng1, &ng2, ng)
 207  
 208  	// Normalize all scalars to be "low" (not high)
 209  	negNa1 := na1.isHigh()
 210  	if negNa1 {
 211  		na1.negate(&na1)
 212  	}
 213  	negNa2 := na2.isHigh()
 214  	if negNa2 {
 215  		na2.negate(&na2)
 216  	}
 217  	negNg1 := ng1.isHigh()
 218  	if negNg1 {
 219  		ng1.negate(&ng1)
 220  	}
 221  	negNg2 := ng2.isHigh()
 222  	if negNg2 {
 223  		ng2.negate(&ng2)
 224  	}
 225  
 226  	// Build precomputed tables for a and λ*a
 227  	var p1Jac GroupElementJacobian
 228  	p1Jac.setGE(&p1)
 229  	var preA1 [glvTableSize]GroupElementAffine
 230  	var preBetaX1 [glvTableSize]FieldElement
 231  	buildOddMultiplesTableAffineFixed(&preA1, &preBetaX1, &p1Jac)
 232  
 233  	var p2Jac GroupElementJacobian
 234  	p2Jac.setGE(&p2)
 235  	var preA2 [glvTableSize]GroupElementAffine
 236  	var preBetaX2 [glvTableSize]FieldElement
 237  	buildOddMultiplesTableAffineFixed(&preA2, &preBetaX2, &p2Jac)
 238  
 239  	// Convert all four scalars to wNAF
 240  	var wnafNa1, wnafNa2 [257]int8
 241  	var wnafNg1, wnafNg2 [257]int8
 242  
 243  	bitsNa1 := na1.wNAF(&wnafNa1, glvWNAFW)
 244  	bitsNa2 := na2.wNAF(&wnafNa2, glvWNAFW)
 245  	bitsNg1 := ng1.wNAF(&wnafNg1, genWindowSize)
 246  	bitsNg2 := ng2.wNAF(&wnafNg2, genWindowSize)
 247  
 248  	// Find maximum bit position across all four
 249  	maxBits := bitsNa1
 250  	if bitsNa2 > maxBits {
 251  		maxBits = bitsNa2
 252  	}
 253  	if bitsNg1 > maxBits {
 254  		maxBits = bitsNg1
 255  	}
 256  	if bitsNg2 > maxBits {
 257  		maxBits = bitsNg2
 258  	}
 259  
 260  	// Combined Strauss loop - one set of doublings for all four contributions
 261  	r.setInfinity()
 262  
 263  	for i := maxBits - 1; i >= 0; i-- {
 264  		// Double once (shared across all four multiplications)
 265  		if !r.isInfinity() {
 266  			r.double(r)
 267  		}
 268  
 269  		// Add contribution from na1 (using preA1 table)
 270  		if i < bitsNa1 && wnafNa1[i] != 0 {
 271  			var pt GroupElementAffine
 272  			tableGetGEFixed(&pt, &preA1, int(wnafNa1[i]))
 273  			if negNa1 {
 274  				pt.negate(&pt)
 275  			}
 276  			if r.isInfinity() {
 277  				r.setGE(&pt)
 278  			} else {
 279  				r.addGE(r, &pt)
 280  			}
 281  		}
 282  
 283  		// Add contribution from na2 (using preA2 table)
 284  		if i < bitsNa2 && wnafNa2[i] != 0 {
 285  			var pt GroupElementAffine
 286  			tableGetGEFixed(&pt, &preA2, int(wnafNa2[i]))
 287  			if negNa2 {
 288  				pt.negate(&pt)
 289  			}
 290  			if r.isInfinity() {
 291  				r.setGE(&pt)
 292  			} else {
 293  				r.addGE(r, &pt)
 294  			}
 295  		}
 296  
 297  		// Add contribution from ng1 (using preGenG table)
 298  		if i < bitsNg1 && wnafNg1[i] != 0 {
 299  			var pt GroupElementAffine
 300  			n := int(wnafNg1[i])
 301  			var idx int
 302  			if n > 0 {
 303  				idx = (n - 1) / 2
 304  			} else {
 305  				idx = (-n - 1) / 2
 306  			}
 307  			if idx < genTableSize {
 308  				pt = preGenG[idx]
 309  				if n < 0 {
 310  					pt.negate(&pt)
 311  				}
 312  				if negNg1 {
 313  					pt.negate(&pt)
 314  				}
 315  				if r.isInfinity() {
 316  					r.setGE(&pt)
 317  				} else {
 318  					r.addGE(r, &pt)
 319  				}
 320  			}
 321  		}
 322  
 323  		// Add contribution from ng2 (using preGenLambdaG table)
 324  		if i < bitsNg2 && wnafNg2[i] != 0 {
 325  			var pt GroupElementAffine
 326  			n := int(wnafNg2[i])
 327  			var idx int
 328  			if n > 0 {
 329  				idx = (n - 1) / 2
 330  			} else {
 331  				idx = (-n - 1) / 2
 332  			}
 333  			if idx < genTableSize {
 334  				pt = preGenLambdaG[idx]
 335  				if n < 0 {
 336  					pt.negate(&pt)
 337  				}
 338  				if negNg2 {
 339  					pt.negate(&pt)
 340  				}
 341  				if r.isInfinity() {
 342  					r.setGE(&pt)
 343  				} else {
 344  					r.addGE(r, &pt)
 345  				}
 346  			}
 347  		}
 348  	}
 349  }
 350  
 351  // ecmultStraussGLV computes r = q * a using Strauss algorithm with GLV endomorphism
 352  // This provides significant speedup for both verification and ECDH operations
 353  func ecmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
 354  	if a.isInfinity() {
 355  		r.setInfinity()
 356  		return
 357  	}
 358  
 359  	if q.isZero() {
 360  		r.setInfinity()
 361  		return
 362  	}
 363  
 364  	// For now, use simplified Strauss algorithm without GLV endomorphism
 365  	// Convert base point to Jacobian
 366  	var aJac GroupElementJacobian
 367  	aJac.setGE(a)
 368  
 369  	// Compute odd multiples for the scalar
 370  	var preA [1 << (windowA - 1)]GroupElementJacobian
 371  	buildOddMultiples(&preA, &aJac, windowA)
 372  
 373  	// Convert scalar to wNAF representation
 374  	var wnaf [257]int8
 375  	bits := q.wNAF(&wnaf, windowA)
 376  
 377  	// Perform Strauss algorithm
 378  	r.setInfinity()
 379  
 380  	for i := bits - 1; i >= 0; i-- {
 381  		// Double the result
 382  		r.double(r)
 383  
 384  		// Add contribution
 385  		if wnaf[i] != 0 {
 386  			n := int(wnaf[i])
 387  			var pt GroupElementJacobian
 388  			if n > 0 {
 389  				idx := (n-1)/2
 390  				if idx >= len(preA) {
 391  					panic(fmt.Sprintf("wNAF positive index out of bounds: n=%d, idx=%d, len=%d", n, idx, len(preA)))
 392  				}
 393  				pt = preA[idx]
 394  			} else {
 395  				if (-n-1)/2 >= len(preA) {
 396  					panic("wNAF index out of bounds (negative)")
 397  				}
 398  				pt = preA[(-n-1)/2]
 399  				pt.y.negate(&pt.y, 1)
 400  			}
 401  			r.addVar(r, &pt)
 402  		}
 403  	}
 404  }
 405  
 406  // buildOddMultiples builds a table of odd multiples of a point
 407  // pre[i] = (2*i+1) * a for i = 0 to (1<<(w-1))-1
 408  func buildOddMultiples(pre *[1 << (windowA - 1)]GroupElementJacobian, a *GroupElementJacobian, w uint) {
 409  	tableSize := 1 << (w - 1)
 410  
 411  	// pre[0] = a (which is 1*a)
 412  	pre[0] = *a
 413  
 414  	if tableSize > 1 {
 415  		// Compute 2*a
 416  		var twoA GroupElementJacobian
 417  		twoA.double(a)
 418  
 419  		// Build odd multiples: pre[i] = pre[i-2] + 2*a for i >= 2, i even
 420  		for i := 2; i < tableSize; i += 2 {
 421  			pre[i].addVar(&pre[i-2], &twoA)
 422  		}
 423  	}
 424  }
 425  
 426  // EcmultStraussGLV is the public interface for optimized Strauss+GLV multiplication
 427  func EcmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
 428  	ecmultStraussGLV(r, a, q)
 429  }
 430  
 431  // ECDHHashFunction is a function type for hashing ECDH shared secrets
 432  type ECDHHashFunction func(output []byte, x32 []byte, y32 []byte) bool
 433  
 434  // ecdhHashFunctionSHA256 implements the default SHA-256 based hash function for ECDH
 435  // Following the C reference implementation exactly
 436  func ecdhHashFunctionSHA256(output []byte, x32 []byte, y32 []byte) bool {
 437  	if len(output) != 32 || len(x32) != 32 || len(y32) != 32 {
 438  		return false
 439  	}
 440  	
 441  	// Version byte: (y32[31] & 0x01) | 0x02
 442  	version := byte((y32[31] & 0x01) | 0x02)
 443  	
 444  	sha := NewSHA256()
 445  	sha.Write([]byte{version})
 446  	sha.Write(x32)
 447  	sha.Finalize(output)
 448  	sha.Clear()
 449  	
 450  	return true
 451  }
 452  
 453  // ECDH computes an EC Diffie-Hellman shared secret
 454  // Following the C reference implementation secp256k1_ecdh
 455  func ECDH(output []byte, pubkey *PublicKey, seckey []byte, hashfp ECDHHashFunction) error {
 456  	if len(output) != 32 {
 457  		return errors.New("output must be 32 bytes")
 458  	}
 459  	if len(seckey) != 32 {
 460  		return errors.New("seckey must be 32 bytes")
 461  	}
 462  	if pubkey == nil {
 463  		return errors.New("pubkey cannot be nil")
 464  	}
 465  	
 466  	// Use default hash function if none provided
 467  	if hashfp == nil {
 468  		hashfp = ecdhHashFunctionSHA256
 469  	}
 470  	
 471  	// Load public key
 472  	var pt GroupElementAffine
 473  	pt.fromBytes(pubkey.data[:])
 474  	if pt.isInfinity() {
 475  		return errors.New("invalid public key")
 476  	}
 477  	
 478  	// Parse scalar
 479  	var s Scalar
 480  	if !s.setB32Seckey(seckey) {
 481  		return errors.New("invalid secret key")
 482  	}
 483  	
 484  	// Handle zero scalar
 485  	if s.isZero() {
 486  		return errors.New("secret key cannot be zero")
 487  	}
 488  
 489  	// Compute res = s * pt using optimized windowed multiplication (variable-time)
 490  	// ECDH doesn't require constant-time since the secret key is already known
 491  	var res GroupElementJacobian
 492  	ecmultWindowedVar(&res, &pt, &s)
 493  	
 494  	// Convert to affine
 495  	var resAff GroupElementAffine
 496  	resAff.setGEJ(&res)
 497  	resAff.x.normalize()
 498  	resAff.y.normalize()
 499  	
 500  	// Extract x and y coordinates
 501  	var x, y [32]byte
 502  	resAff.x.getB32(x[:])
 503  	resAff.y.getB32(y[:])
 504  	
 505  	// Compute hash
 506  	success := hashfp(output, x[:], y[:])
 507  	
 508  	// Clear sensitive data
 509  	memclear(unsafe.Pointer(&x[0]), 32)
 510  	memclear(unsafe.Pointer(&y[0]), 32)
 511  	s.clear()
 512  	resAff.clear()
 513  	res.clear()
 514  	
 515  	if !success {
 516  		return errors.New("hash function failed")
 517  	}
 518  	
 519  	return nil
 520  }
 521  
 522  // HKDF performs HMAC-based Key Derivation Function (RFC 5869)
 523  // Outputs key material of the specified length
 524  func HKDF(output []byte, ikm []byte, salt []byte, info []byte) error {
 525  	if len(output) == 0 {
 526  		return errors.New("output length must be greater than 0")
 527  	}
 528  	
 529  	// Step 1: Extract (if salt is empty, use zeros)
 530  	if len(salt) == 0 {
 531  		salt = make([]byte, 32)
 532  	}
 533  	
 534  	// PRK = HMAC-SHA256(salt, IKM)
 535  	var prk [32]byte
 536  	hmac := NewHMACSHA256(salt)
 537  	hmac.Write(ikm)
 538  	hmac.Finalize(prk[:])
 539  	hmac.Clear()
 540  	
 541  	// Step 2: Expand
 542  	// Generate output using HKDF-Expand
 543  	// T(0) = empty
 544  	// T(i) = HMAC(PRK, T(i-1) || info || i)
 545  	
 546  	outlen := len(output)
 547  	outidx := 0
 548  	
 549  	// T(0) is empty
 550  	var t []byte
 551  	
 552  	// Generate blocks until we have enough output
 553  	blockNum := byte(1)
 554  	for outidx < outlen {
 555  		// Compute T(i) = HMAC(PRK, T(i-1) || info || i)
 556  		hmac = NewHMACSHA256(prk[:])
 557  		if len(t) > 0 {
 558  			hmac.Write(t)
 559  		}
 560  		if len(info) > 0 {
 561  			hmac.Write(info)
 562  		}
 563  		hmac.Write([]byte{blockNum})
 564  		
 565  		var tBlock [32]byte
 566  		hmac.Finalize(tBlock[:])
 567  		hmac.Clear()
 568  		
 569  		// Copy to output
 570  		copyLen := len(tBlock)
 571  		if copyLen > outlen-outidx {
 572  			copyLen = outlen - outidx
 573  		}
 574  		copy(output[outidx:outidx+copyLen], tBlock[:copyLen])
 575  		outidx += copyLen
 576  		
 577  		// Update T for next iteration
 578  		t = tBlock[:]
 579  		blockNum++
 580  	}
 581  	
 582  	// Clear sensitive data
 583  	memclear(unsafe.Pointer(&prk[0]), 32)
 584  	if len(t) > 0 {
 585  		memclear(unsafe.Pointer(&t[0]), uintptr(len(t)))
 586  	}
 587  	
 588  	return nil
 589  }
 590  
 591  // ECDHWithHKDF computes ECDH and derives a key using HKDF
 592  func ECDHWithHKDF(output []byte, pubkey *PublicKey, seckey []byte, salt []byte, info []byte) error {
 593  	// Compute ECDH shared secret
 594  	var sharedSecret [32]byte
 595  	if err := ECDH(sharedSecret[:], pubkey, seckey, nil); err != nil {
 596  		return err
 597  	}
 598  	
 599  	// Derive key using HKDF
 600  	err := HKDF(output, sharedSecret[:], salt, info)
 601  	
 602  	// Clear shared secret
 603  	memclear(unsafe.Pointer(&sharedSecret[0]), 32)
 604  	
 605  	return err
 606  }
 607  
 608  // =============================================================================
 609  // Phase 4: Strauss-GLV Algorithm with wNAF
 610  // =============================================================================
 611  
 612  // buildOddMultiplesTableAffine builds a table of odd multiples of a point in affine coordinates
 613  // pre[i] = (2*i+1) * a for i = 0 to tableSize-1
 614  // Also returns the precomputed β*x values for λ-transformed lookups
 615  //
 616  // The table is built efficiently using:
 617  // 1. Compute odd multiples in Jacobian: 1*a, 3*a, 5*a, ...
 618  // 2. Batch normalize all points to affine
 619  // 3. Precompute β*x for each point for GLV lookups
 620  //
 621  // Reference: libsecp256k1 ecmult_impl.h:secp256k1_ecmult_odd_multiples_table
 622  func buildOddMultiplesTableAffine(preA []GroupElementAffine, preBetaX []FieldElement, a *GroupElementJacobian, tableSize int) {
 623  	if tableSize == 0 {
 624  		return
 625  	}
 626  
 627  	// Build odd multiples in Jacobian coordinates
 628  	preJac := make([]GroupElementJacobian, tableSize)
 629  
 630  	// pre[0] = a (which is 1*a)
 631  	preJac[0] = *a
 632  
 633  	if tableSize > 1 {
 634  		// Compute 2*a
 635  		var twoA GroupElementJacobian
 636  		twoA.double(a)
 637  
 638  		// Build odd multiples: pre[i] = pre[i-1] + 2*a for i >= 1
 639  		for i := 1; i < tableSize; i++ {
 640  			preJac[i].addVar(&preJac[i-1], &twoA)
 641  		}
 642  	}
 643  
 644  	// Batch normalize to affine coordinates
 645  	BatchNormalize(preA, preJac)
 646  
 647  	// Precompute β*x for each point (for λ-transformed lookups)
 648  	for i := 0; i < tableSize; i++ {
 649  		if preA[i].isInfinity() {
 650  			preBetaX[i] = FieldElementZero
 651  		} else {
 652  			preBetaX[i].mul(&preA[i].x, &fieldBeta)
 653  		}
 654  	}
 655  }
 656  
 657  // tableGetGE retrieves a point from the table, handling sign
 658  // n is the wNAF digit (can be negative)
 659  // Returns pre[(|n|-1)/2], negated if n < 0
 660  //
 661  // Reference: libsecp256k1 ecmult_impl.h:ECMULT_TABLE_GET_GE
 662  func tableGetGE(r *GroupElementAffine, pre []GroupElementAffine, n int) {
 663  	if n == 0 {
 664  		r.setInfinity()
 665  		return
 666  	}
 667  
 668  	var idx int
 669  	if n > 0 {
 670  		idx = (n - 1) / 2
 671  	} else {
 672  		idx = (-n - 1) / 2
 673  	}
 674  
 675  	if idx >= len(pre) {
 676  		r.setInfinity()
 677  		return
 678  	}
 679  
 680  	*r = pre[idx]
 681  
 682  	// Negate if n < 0
 683  	if n < 0 {
 684  		r.negate(r)
 685  	}
 686  }
 687  
 688  // tableGetGELambda retrieves the λ-transformed point from the table
 689  // Uses precomputed β*x values for efficiency
 690  // n is the wNAF digit (can be negative)
 691  // Returns λ*pre[(|n|-1)/2], negated if n < 0
 692  //
 693  // Since λ*(x, y) = (β*x, y), and we precomputed β*x,
 694  // we just need to use the precomputed β*x instead of x
 695  //
 696  // Reference: libsecp256k1 ecmult_impl.h:ECMULT_TABLE_GET_GE_LAMBDA
 697  func tableGetGELambda(r *GroupElementAffine, pre []GroupElementAffine, preBetaX []FieldElement, n int) {
 698  	if n == 0 {
 699  		r.setInfinity()
 700  		return
 701  	}
 702  
 703  	var idx int
 704  	if n > 0 {
 705  		idx = (n - 1) / 2
 706  	} else {
 707  		idx = (-n - 1) / 2
 708  	}
 709  
 710  	if idx >= len(pre) {
 711  		r.setInfinity()
 712  		return
 713  	}
 714  
 715  	// Use precomputed β*x instead of x
 716  	r.x = preBetaX[idx]
 717  	r.y = pre[idx].y
 718  	r.infinity = pre[idx].infinity
 719  
 720  	// Negate if n < 0
 721  	if n < 0 {
 722  		r.negate(r)
 723  	}
 724  }
 725  
 726  // Window size for the GLV split scalars
 727  // Window 5 (16 entries) is optimal - larger windows have more table-building overhead
 728  // than savings from fewer point additions for single-use tables
 729  const glvWNAFW = 5
 730  const glvTableSize = 1 << (glvWNAFW - 1) // 16 entries for window size 5
 731  
 732  // ecmultStraussWNAFGLV computes r = q * a using Strauss algorithm with GLV endomorphism
 733  // This splits the scalar using GLV and processes two ~128-bit scalars simultaneously
 734  // using wNAF representation for efficient point multiplication.
 735  //
 736  // The algorithm:
 737  // 1. Split q into q1, q2 such that q1 + q2*λ ≡ q (mod n), where q1, q2 are ~128 bits
 738  // 2. Build odd multiples table for a and precompute β*x for λ-transformed lookups
 739  // 3. Convert q1, q2 to wNAF representation
 740  // 4. Process both wNAF representations simultaneously in a single pass
 741  //
 742  // Reference: libsecp256k1 ecmult_impl.h:secp256k1_ecmult_strauss_wnaf
 743  func ecmultStraussWNAFGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
 744  	if a.isInfinity() {
 745  		r.setInfinity()
 746  		return
 747  	}
 748  
 749  	if q.isZero() {
 750  		r.setInfinity()
 751  		return
 752  	}
 753  
 754  	// Split scalar using GLV endomorphism: q = q1 + q2*λ
 755  	// Also get the transformed points p1 = a, p2 = λ*a
 756  	var q1, q2 Scalar
 757  	var p1, p2 GroupElementAffine
 758  	ecmultEndoSplit(&q1, &q2, &p1, &p2, q, a)
 759  
 760  	// Build odd multiples tables using stack-allocated arrays
 761  	var aJac GroupElementJacobian
 762  	aJac.setGE(&p1)
 763  
 764  	var preA [glvTableSize]GroupElementAffine
 765  	var preBetaX [glvTableSize]FieldElement
 766  	buildOddMultiplesTableAffineFixed(&preA, &preBetaX, &aJac)
 767  
 768  	// Build odd multiples table for p2 (which is λ*a)
 769  	var p2Jac GroupElementJacobian
 770  	p2Jac.setGE(&p2)
 771  
 772  	var preA2 [glvTableSize]GroupElementAffine
 773  	var preBetaX2 [glvTableSize]FieldElement
 774  	buildOddMultiplesTableAffineFixed(&preA2, &preBetaX2, &p2Jac)
 775  
 776  	// Convert scalars to wNAF representation
 777  	var wnaf1, wnaf2 [257]int8
 778  
 779  	bits1 := q1.wNAF(&wnaf1, glvWNAFW)
 780  	bits2 := q2.wNAF(&wnaf2, glvWNAFW)
 781  
 782  	// Find the maximum bit position
 783  	maxBits := bits1
 784  	if bits2 > maxBits {
 785  		maxBits = bits2
 786  	}
 787  
 788  	// Perform the Strauss algorithm
 789  	r.setInfinity()
 790  
 791  	for i := maxBits - 1; i >= 0; i-- {
 792  		// Double the result
 793  		if !r.isInfinity() {
 794  			r.double(r)
 795  		}
 796  
 797  		// Add contribution from q1
 798  		if i < bits1 && wnaf1[i] != 0 {
 799  			var pt GroupElementAffine
 800  			tableGetGEFixed(&pt, &preA, int(wnaf1[i]))
 801  
 802  			if r.isInfinity() {
 803  				r.setGE(&pt)
 804  			} else {
 805  				r.addGE(r, &pt)
 806  			}
 807  		}
 808  
 809  		// Add contribution from q2
 810  		if i < bits2 && wnaf2[i] != 0 {
 811  			var pt GroupElementAffine
 812  			tableGetGEFixed(&pt, &preA2, int(wnaf2[i]))
 813  
 814  			if r.isInfinity() {
 815  				r.setGE(&pt)
 816  			} else {
 817  				r.addGE(r, &pt)
 818  			}
 819  		}
 820  	}
 821  }
 822  
 823  // buildOddMultiplesTableAffineFixed is like buildOddMultiplesTableAffine but uses fixed-size arrays
 824  // and zero heap allocations. Optimized for glvTableSize = 16.
 825  func buildOddMultiplesTableAffineFixed(preA *[glvTableSize]GroupElementAffine, preBetaX *[glvTableSize]FieldElement, a *GroupElementJacobian) {
 826  	// Build odd multiples in Jacobian coordinates (stack-allocated)
 827  	var preJac [glvTableSize]GroupElementJacobian
 828  
 829  	// pre[0] = a (which is 1*a)
 830  	preJac[0] = *a
 831  
 832  	if glvTableSize > 1 {
 833  		// Compute 2*a
 834  		var twoA GroupElementJacobian
 835  		twoA.double(a)
 836  
 837  		// Build odd multiples: pre[i] = pre[i-1] + 2*a for i >= 1
 838  		for i := 1; i < glvTableSize; i++ {
 839  			preJac[i].addVar(&preJac[i-1], &twoA)
 840  		}
 841  	}
 842  
 843  	// Batch normalize to affine coordinates with zero allocations
 844  	batchNormalize16(preA, &preJac)
 845  
 846  	// Precompute β*x for each point
 847  	for i := 0; i < glvTableSize; i++ {
 848  		if preA[i].isInfinity() {
 849  			preBetaX[i] = FieldElementZero
 850  		} else {
 851  			preBetaX[i].mul(&preA[i].x, &fieldBeta)
 852  		}
 853  	}
 854  }
 855  
 856  // tableGetGEFixed retrieves a point from a fixed-size table
 857  func tableGetGEFixed(r *GroupElementAffine, pre *[glvTableSize]GroupElementAffine, n int) {
 858  	if n == 0 {
 859  		r.setInfinity()
 860  		return
 861  	}
 862  
 863  	var idx int
 864  	if n > 0 {
 865  		idx = (n - 1) / 2
 866  	} else {
 867  		idx = (-n - 1) / 2
 868  	}
 869  
 870  	if idx >= glvTableSize {
 871  		r.setInfinity()
 872  		return
 873  	}
 874  
 875  	*r = pre[idx]
 876  
 877  	// Negate if n < 0
 878  	if n < 0 {
 879  		r.negate(r)
 880  	}
 881  }
 882  
 883  // EcmultStraussWNAFGLV is the public interface for optimized Strauss+GLV+wNAF multiplication
 884  func EcmultStraussWNAFGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
 885  	ecmultStraussWNAFGLV(r, a, q)
 886  }
 887  
 888  // ECDHXOnly computes X-only ECDH (BIP-340 style)
 889  // Outputs only the X coordinate of the shared secret point
 890  func ECDHXOnly(output []byte, pubkey *PublicKey, seckey []byte) error {
 891  	if len(output) != 32 {
 892  		return errors.New("output must be 32 bytes")
 893  	}
 894  	if len(seckey) != 32 {
 895  		return errors.New("seckey must be 32 bytes")
 896  	}
 897  	if pubkey == nil {
 898  		return errors.New("pubkey cannot be nil")
 899  	}
 900  	
 901  	// Load public key
 902  	var pt GroupElementAffine
 903  	pt.fromBytes(pubkey.data[:])
 904  	if pt.isInfinity() {
 905  		return errors.New("invalid public key")
 906  	}
 907  	
 908  	// Parse scalar
 909  	var s Scalar
 910  	if !s.setB32Seckey(seckey) {
 911  		return errors.New("invalid secret key")
 912  	}
 913  	
 914  	if s.isZero() {
 915  		return errors.New("secret key cannot be zero")
 916  	}
 917  	
 918  	// Compute res = s * pt using optimized windowed multiplication (variable-time)
 919  	// ECDH doesn't require constant-time since the secret key is already known
 920  	var res GroupElementJacobian
 921  	ecmultWindowedVar(&res, &pt, &s)
 922  	
 923  	// Convert to affine
 924  	var resAff GroupElementAffine
 925  	resAff.setGEJ(&res)
 926  	resAff.x.normalize()
 927  	
 928  	// Extract X coordinate only
 929  	resAff.x.getB32(output)
 930  	
 931  	// Clear sensitive data
 932  	s.clear()
 933  	resAff.clear()
 934  	res.clear()
 935  	
 936  	return nil
 937  }
 938  
 939  
 940