ecdh.go raw
1 package p256k1
2
3 import (
4 "errors"
5 "fmt"
6 "unsafe"
7 )
8
9 const (
10 // Window sizes for elliptic curve multiplication optimizations
11 windowA = 5 // Window size for main scalar (A)
12 windowG = 14 // Window size for generator (G) - larger for better performance
13 )
14
15 // EcmultConst computes r = q * a using constant-time multiplication
16 // Uses simple binary method
17 func EcmultConst(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
18 if a.isInfinity() {
19 r.setInfinity()
20 return
21 }
22
23 if q.isZero() {
24 r.setInfinity()
25 return
26 }
27
28 // Convert affine point to Jacobian
29 var aJac GroupElementJacobian
30 aJac.setGE(a)
31
32 // Use simple binary method for constant-time behavior
33 r.setInfinity()
34
35 var base GroupElementJacobian
36 base = aJac
37
38 // Process bits from MSB to LSB
39 for i := 0; i < 256; i++ {
40 if i > 0 {
41 r.double(r)
42 }
43
44 // Get bit i (from MSB)
45 bit := q.getBits(uint(255-i), 1)
46 if bit != 0 {
47 if r.isInfinity() {
48 *r = base
49 } else {
50 r.addVar(r, &base)
51 }
52 }
53 }
54 }
55
56 // ecmultWindowedVar computes r = q * a using optimized windowed multiplication (variable-time)
57 // Uses a window size of 6 bits (64 precomputed multiples) for better CPU performance
58 // Trades memory (64 entries vs 32) for ~20% faster multiplication
59 func ecmultWindowedVar(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
60 if a.isInfinity() {
61 r.setInfinity()
62 return
63 }
64
65 if q.isZero() {
66 r.setInfinity()
67 return
68 }
69
70 const windowSize = 6 // Increased from 5 to 6 for better performance
71 const tableSize = 1 << windowSize // 64
72
73 // Convert point to Jacobian once
74 var aJac GroupElementJacobian
75 aJac.setGE(a)
76
77 // Build table efficiently using Jacobian coordinates, only convert to affine at end
78 // Store odd multiples in Jacobian form to avoid frequent conversions
79 var tableJac [tableSize]GroupElementJacobian
80 tableJac[0].setInfinity()
81 tableJac[1] = aJac
82
83 // Build odd multiples efficiently: tableJac[2*i+1] = (2*i+1) * a
84 // Start with 3*a = a + 2*a
85 var twoA GroupElementJacobian
86 twoA.double(&aJac)
87
88 // Build table: tableJac[i] = tableJac[i-2] + 2*a for odd i
89 for i := 3; i < tableSize; i += 2 {
90 tableJac[i].addVar(&tableJac[i-2], &twoA)
91 }
92
93 // Build even multiples: tableJac[2*i] = 2 * tableJac[i]
94 for i := 1; i < tableSize/2; i++ {
95 tableJac[2*i].double(&tableJac[i])
96 }
97
98 // Process scalar in windows of 6 bits from MSB to LSB
99 r.setInfinity()
100 numWindows := (256 + windowSize - 1) / windowSize // Ceiling division
101
102 for window := 0; window < numWindows; window++ {
103 // Calculate bit offset for this window (MSB first)
104 bitOffset := 255 - window*windowSize
105 if bitOffset < 0 {
106 break
107 }
108
109 // Extract window bits
110 actualWindowSize := windowSize
111 if bitOffset < windowSize-1 {
112 actualWindowSize = bitOffset + 1
113 }
114
115 windowBits := q.getBits(uint(bitOffset-actualWindowSize+1), uint(actualWindowSize))
116
117 // Double result windowSize times (once per bit position in window)
118 if !r.isInfinity() {
119 for j := 0; j < actualWindowSize; j++ {
120 r.double(r)
121 }
122 }
123
124 // Add precomputed point if window is non-zero
125 if windowBits != 0 && windowBits < tableSize {
126 if r.isInfinity() {
127 *r = tableJac[windowBits]
128 } else {
129 r.addVar(r, &tableJac[windowBits])
130 }
131 }
132 }
133 }
134
135 // Ecmult computes r = q * a using optimized GLV+Strauss+wNAF multiplication
136 // This provides good performance for verification and ECDH operations
137 func Ecmult(r *GroupElementJacobian, a *GroupElementJacobian, q *Scalar) {
138 if a.isInfinity() {
139 r.setInfinity()
140 return
141 }
142
143 if q.isZero() {
144 r.setInfinity()
145 return
146 }
147
148 // Convert to affine for GLV multiplication
149 var aAff GroupElementAffine
150 aAff.setGEJ(a)
151
152 // Use optimized GLV+Strauss+wNAF multiplication
153 ecmultStraussWNAFGLV(r, &aAff, q)
154 }
155
156 // ecmultCombinedGeneric computes r = na*a + ng*G using combined Strauss algorithm
157 // This is the generic (5x52) implementation
158 func ecmultCombinedGeneric(r *GroupElementJacobian, a *GroupElementJacobian, na, ng *Scalar) {
159 // Handle edge cases
160 naZero := na == nil || na.isZero()
161 ngZero := ng == nil || ng.isZero()
162 aInf := a == nil || a.isInfinity()
163
164 // If both scalars are zero, result is infinity
165 if naZero && ngZero {
166 r.setInfinity()
167 return
168 }
169
170 // If na is zero or a is infinity, just compute ng*G
171 if naZero || aInf {
172 EcmultGen(r, ng)
173 return
174 }
175
176 // If ng is zero, just compute na*a
177 if ngZero {
178 var aAff GroupElementAffine
179 aAff.setGEJ(a)
180 ecmultStraussWNAFGLV(r, &aAff, na)
181 return
182 }
183
184 // Use combined Strauss algorithm - shares doublings between both multiplications
185 ecmultStraussCombined(r, a, na, ng)
186 }
187
188 // ecmultStraussCombined computes r = na*a + ng*G using combined Strauss with GLV
189 // This processes all four wNAF representations (na1, na2, ng1, ng2) in a single loop
190 // sharing doublings for significant performance improvement
191 func ecmultStraussCombined(r *GroupElementJacobian, a *GroupElementJacobian, na, ng *Scalar) {
192 // Ensure generator tables are initialized
193 initGenTables()
194
195 // Convert a to affine
196 var aAff GroupElementAffine
197 aAff.setGEJ(a)
198
199 // Split na using GLV: na = na1 + na2*λ
200 var na1, na2 Scalar
201 var p1, p2 GroupElementAffine
202 ecmultEndoSplit(&na1, &na2, &p1, &p2, na, &aAff)
203
204 // Split ng using GLV: ng = ng1 + ng2*λ
205 var ng1, ng2 Scalar
206 scalarSplitLambda(&ng1, &ng2, ng)
207
208 // Normalize all scalars to be "low" (not high)
209 negNa1 := na1.isHigh()
210 if negNa1 {
211 na1.negate(&na1)
212 }
213 negNa2 := na2.isHigh()
214 if negNa2 {
215 na2.negate(&na2)
216 }
217 negNg1 := ng1.isHigh()
218 if negNg1 {
219 ng1.negate(&ng1)
220 }
221 negNg2 := ng2.isHigh()
222 if negNg2 {
223 ng2.negate(&ng2)
224 }
225
226 // Build precomputed tables for a and λ*a
227 var p1Jac GroupElementJacobian
228 p1Jac.setGE(&p1)
229 var preA1 [glvTableSize]GroupElementAffine
230 var preBetaX1 [glvTableSize]FieldElement
231 buildOddMultiplesTableAffineFixed(&preA1, &preBetaX1, &p1Jac)
232
233 var p2Jac GroupElementJacobian
234 p2Jac.setGE(&p2)
235 var preA2 [glvTableSize]GroupElementAffine
236 var preBetaX2 [glvTableSize]FieldElement
237 buildOddMultiplesTableAffineFixed(&preA2, &preBetaX2, &p2Jac)
238
239 // Convert all four scalars to wNAF
240 var wnafNa1, wnafNa2 [257]int8
241 var wnafNg1, wnafNg2 [257]int8
242
243 bitsNa1 := na1.wNAF(&wnafNa1, glvWNAFW)
244 bitsNa2 := na2.wNAF(&wnafNa2, glvWNAFW)
245 bitsNg1 := ng1.wNAF(&wnafNg1, genWindowSize)
246 bitsNg2 := ng2.wNAF(&wnafNg2, genWindowSize)
247
248 // Find maximum bit position across all four
249 maxBits := bitsNa1
250 if bitsNa2 > maxBits {
251 maxBits = bitsNa2
252 }
253 if bitsNg1 > maxBits {
254 maxBits = bitsNg1
255 }
256 if bitsNg2 > maxBits {
257 maxBits = bitsNg2
258 }
259
260 // Combined Strauss loop - one set of doublings for all four contributions
261 r.setInfinity()
262
263 for i := maxBits - 1; i >= 0; i-- {
264 // Double once (shared across all four multiplications)
265 if !r.isInfinity() {
266 r.double(r)
267 }
268
269 // Add contribution from na1 (using preA1 table)
270 if i < bitsNa1 && wnafNa1[i] != 0 {
271 var pt GroupElementAffine
272 tableGetGEFixed(&pt, &preA1, int(wnafNa1[i]))
273 if negNa1 {
274 pt.negate(&pt)
275 }
276 if r.isInfinity() {
277 r.setGE(&pt)
278 } else {
279 r.addGE(r, &pt)
280 }
281 }
282
283 // Add contribution from na2 (using preA2 table)
284 if i < bitsNa2 && wnafNa2[i] != 0 {
285 var pt GroupElementAffine
286 tableGetGEFixed(&pt, &preA2, int(wnafNa2[i]))
287 if negNa2 {
288 pt.negate(&pt)
289 }
290 if r.isInfinity() {
291 r.setGE(&pt)
292 } else {
293 r.addGE(r, &pt)
294 }
295 }
296
297 // Add contribution from ng1 (using preGenG table)
298 if i < bitsNg1 && wnafNg1[i] != 0 {
299 var pt GroupElementAffine
300 n := int(wnafNg1[i])
301 var idx int
302 if n > 0 {
303 idx = (n - 1) / 2
304 } else {
305 idx = (-n - 1) / 2
306 }
307 if idx < genTableSize {
308 pt = preGenG[idx]
309 if n < 0 {
310 pt.negate(&pt)
311 }
312 if negNg1 {
313 pt.negate(&pt)
314 }
315 if r.isInfinity() {
316 r.setGE(&pt)
317 } else {
318 r.addGE(r, &pt)
319 }
320 }
321 }
322
323 // Add contribution from ng2 (using preGenLambdaG table)
324 if i < bitsNg2 && wnafNg2[i] != 0 {
325 var pt GroupElementAffine
326 n := int(wnafNg2[i])
327 var idx int
328 if n > 0 {
329 idx = (n - 1) / 2
330 } else {
331 idx = (-n - 1) / 2
332 }
333 if idx < genTableSize {
334 pt = preGenLambdaG[idx]
335 if n < 0 {
336 pt.negate(&pt)
337 }
338 if negNg2 {
339 pt.negate(&pt)
340 }
341 if r.isInfinity() {
342 r.setGE(&pt)
343 } else {
344 r.addGE(r, &pt)
345 }
346 }
347 }
348 }
349 }
350
351 // ecmultStraussGLV computes r = q * a using Strauss algorithm with GLV endomorphism
352 // This provides significant speedup for both verification and ECDH operations
353 func ecmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
354 if a.isInfinity() {
355 r.setInfinity()
356 return
357 }
358
359 if q.isZero() {
360 r.setInfinity()
361 return
362 }
363
364 // For now, use simplified Strauss algorithm without GLV endomorphism
365 // Convert base point to Jacobian
366 var aJac GroupElementJacobian
367 aJac.setGE(a)
368
369 // Compute odd multiples for the scalar
370 var preA [1 << (windowA - 1)]GroupElementJacobian
371 buildOddMultiples(&preA, &aJac, windowA)
372
373 // Convert scalar to wNAF representation
374 var wnaf [257]int8
375 bits := q.wNAF(&wnaf, windowA)
376
377 // Perform Strauss algorithm
378 r.setInfinity()
379
380 for i := bits - 1; i >= 0; i-- {
381 // Double the result
382 r.double(r)
383
384 // Add contribution
385 if wnaf[i] != 0 {
386 n := int(wnaf[i])
387 var pt GroupElementJacobian
388 if n > 0 {
389 idx := (n-1)/2
390 if idx >= len(preA) {
391 panic(fmt.Sprintf("wNAF positive index out of bounds: n=%d, idx=%d, len=%d", n, idx, len(preA)))
392 }
393 pt = preA[idx]
394 } else {
395 if (-n-1)/2 >= len(preA) {
396 panic("wNAF index out of bounds (negative)")
397 }
398 pt = preA[(-n-1)/2]
399 pt.y.negate(&pt.y, 1)
400 }
401 r.addVar(r, &pt)
402 }
403 }
404 }
405
406 // buildOddMultiples builds a table of odd multiples of a point
407 // pre[i] = (2*i+1) * a for i = 0 to (1<<(w-1))-1
408 func buildOddMultiples(pre *[1 << (windowA - 1)]GroupElementJacobian, a *GroupElementJacobian, w uint) {
409 tableSize := 1 << (w - 1)
410
411 // pre[0] = a (which is 1*a)
412 pre[0] = *a
413
414 if tableSize > 1 {
415 // Compute 2*a
416 var twoA GroupElementJacobian
417 twoA.double(a)
418
419 // Build odd multiples: pre[i] = pre[i-2] + 2*a for i >= 2, i even
420 for i := 2; i < tableSize; i += 2 {
421 pre[i].addVar(&pre[i-2], &twoA)
422 }
423 }
424 }
425
426 // EcmultStraussGLV is the public interface for optimized Strauss+GLV multiplication
427 func EcmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
428 ecmultStraussGLV(r, a, q)
429 }
430
431 // ECDHHashFunction is a function type for hashing ECDH shared secrets
432 type ECDHHashFunction func(output []byte, x32 []byte, y32 []byte) bool
433
434 // ecdhHashFunctionSHA256 implements the default SHA-256 based hash function for ECDH
435 // Following the C reference implementation exactly
436 func ecdhHashFunctionSHA256(output []byte, x32 []byte, y32 []byte) bool {
437 if len(output) != 32 || len(x32) != 32 || len(y32) != 32 {
438 return false
439 }
440
441 // Version byte: (y32[31] & 0x01) | 0x02
442 version := byte((y32[31] & 0x01) | 0x02)
443
444 sha := NewSHA256()
445 sha.Write([]byte{version})
446 sha.Write(x32)
447 sha.Finalize(output)
448 sha.Clear()
449
450 return true
451 }
452
453 // ECDH computes an EC Diffie-Hellman shared secret
454 // Following the C reference implementation secp256k1_ecdh
455 func ECDH(output []byte, pubkey *PublicKey, seckey []byte, hashfp ECDHHashFunction) error {
456 if len(output) != 32 {
457 return errors.New("output must be 32 bytes")
458 }
459 if len(seckey) != 32 {
460 return errors.New("seckey must be 32 bytes")
461 }
462 if pubkey == nil {
463 return errors.New("pubkey cannot be nil")
464 }
465
466 // Use default hash function if none provided
467 if hashfp == nil {
468 hashfp = ecdhHashFunctionSHA256
469 }
470
471 // Load public key
472 var pt GroupElementAffine
473 pt.fromBytes(pubkey.data[:])
474 if pt.isInfinity() {
475 return errors.New("invalid public key")
476 }
477
478 // Parse scalar
479 var s Scalar
480 if !s.setB32Seckey(seckey) {
481 return errors.New("invalid secret key")
482 }
483
484 // Handle zero scalar
485 if s.isZero() {
486 return errors.New("secret key cannot be zero")
487 }
488
489 // Compute res = s * pt using optimized windowed multiplication (variable-time)
490 // ECDH doesn't require constant-time since the secret key is already known
491 var res GroupElementJacobian
492 ecmultWindowedVar(&res, &pt, &s)
493
494 // Convert to affine
495 var resAff GroupElementAffine
496 resAff.setGEJ(&res)
497 resAff.x.normalize()
498 resAff.y.normalize()
499
500 // Extract x and y coordinates
501 var x, y [32]byte
502 resAff.x.getB32(x[:])
503 resAff.y.getB32(y[:])
504
505 // Compute hash
506 success := hashfp(output, x[:], y[:])
507
508 // Clear sensitive data
509 memclear(unsafe.Pointer(&x[0]), 32)
510 memclear(unsafe.Pointer(&y[0]), 32)
511 s.clear()
512 resAff.clear()
513 res.clear()
514
515 if !success {
516 return errors.New("hash function failed")
517 }
518
519 return nil
520 }
521
522 // HKDF performs HMAC-based Key Derivation Function (RFC 5869)
523 // Outputs key material of the specified length
524 func HKDF(output []byte, ikm []byte, salt []byte, info []byte) error {
525 if len(output) == 0 {
526 return errors.New("output length must be greater than 0")
527 }
528
529 // Step 1: Extract (if salt is empty, use zeros)
530 if len(salt) == 0 {
531 salt = make([]byte, 32)
532 }
533
534 // PRK = HMAC-SHA256(salt, IKM)
535 var prk [32]byte
536 hmac := NewHMACSHA256(salt)
537 hmac.Write(ikm)
538 hmac.Finalize(prk[:])
539 hmac.Clear()
540
541 // Step 2: Expand
542 // Generate output using HKDF-Expand
543 // T(0) = empty
544 // T(i) = HMAC(PRK, T(i-1) || info || i)
545
546 outlen := len(output)
547 outidx := 0
548
549 // T(0) is empty
550 var t []byte
551
552 // Generate blocks until we have enough output
553 blockNum := byte(1)
554 for outidx < outlen {
555 // Compute T(i) = HMAC(PRK, T(i-1) || info || i)
556 hmac = NewHMACSHA256(prk[:])
557 if len(t) > 0 {
558 hmac.Write(t)
559 }
560 if len(info) > 0 {
561 hmac.Write(info)
562 }
563 hmac.Write([]byte{blockNum})
564
565 var tBlock [32]byte
566 hmac.Finalize(tBlock[:])
567 hmac.Clear()
568
569 // Copy to output
570 copyLen := len(tBlock)
571 if copyLen > outlen-outidx {
572 copyLen = outlen - outidx
573 }
574 copy(output[outidx:outidx+copyLen], tBlock[:copyLen])
575 outidx += copyLen
576
577 // Update T for next iteration
578 t = tBlock[:]
579 blockNum++
580 }
581
582 // Clear sensitive data
583 memclear(unsafe.Pointer(&prk[0]), 32)
584 if len(t) > 0 {
585 memclear(unsafe.Pointer(&t[0]), uintptr(len(t)))
586 }
587
588 return nil
589 }
590
591 // ECDHWithHKDF computes ECDH and derives a key using HKDF
592 func ECDHWithHKDF(output []byte, pubkey *PublicKey, seckey []byte, salt []byte, info []byte) error {
593 // Compute ECDH shared secret
594 var sharedSecret [32]byte
595 if err := ECDH(sharedSecret[:], pubkey, seckey, nil); err != nil {
596 return err
597 }
598
599 // Derive key using HKDF
600 err := HKDF(output, sharedSecret[:], salt, info)
601
602 // Clear shared secret
603 memclear(unsafe.Pointer(&sharedSecret[0]), 32)
604
605 return err
606 }
607
608 // =============================================================================
609 // Phase 4: Strauss-GLV Algorithm with wNAF
610 // =============================================================================
611
612 // buildOddMultiplesTableAffine builds a table of odd multiples of a point in affine coordinates
613 // pre[i] = (2*i+1) * a for i = 0 to tableSize-1
614 // Also returns the precomputed β*x values for λ-transformed lookups
615 //
616 // The table is built efficiently using:
617 // 1. Compute odd multiples in Jacobian: 1*a, 3*a, 5*a, ...
618 // 2. Batch normalize all points to affine
619 // 3. Precompute β*x for each point for GLV lookups
620 //
621 // Reference: libsecp256k1 ecmult_impl.h:secp256k1_ecmult_odd_multiples_table
622 func buildOddMultiplesTableAffine(preA []GroupElementAffine, preBetaX []FieldElement, a *GroupElementJacobian, tableSize int) {
623 if tableSize == 0 {
624 return
625 }
626
627 // Build odd multiples in Jacobian coordinates
628 preJac := make([]GroupElementJacobian, tableSize)
629
630 // pre[0] = a (which is 1*a)
631 preJac[0] = *a
632
633 if tableSize > 1 {
634 // Compute 2*a
635 var twoA GroupElementJacobian
636 twoA.double(a)
637
638 // Build odd multiples: pre[i] = pre[i-1] + 2*a for i >= 1
639 for i := 1; i < tableSize; i++ {
640 preJac[i].addVar(&preJac[i-1], &twoA)
641 }
642 }
643
644 // Batch normalize to affine coordinates
645 BatchNormalize(preA, preJac)
646
647 // Precompute β*x for each point (for λ-transformed lookups)
648 for i := 0; i < tableSize; i++ {
649 if preA[i].isInfinity() {
650 preBetaX[i] = FieldElementZero
651 } else {
652 preBetaX[i].mul(&preA[i].x, &fieldBeta)
653 }
654 }
655 }
656
657 // tableGetGE retrieves a point from the table, handling sign
658 // n is the wNAF digit (can be negative)
659 // Returns pre[(|n|-1)/2], negated if n < 0
660 //
661 // Reference: libsecp256k1 ecmult_impl.h:ECMULT_TABLE_GET_GE
662 func tableGetGE(r *GroupElementAffine, pre []GroupElementAffine, n int) {
663 if n == 0 {
664 r.setInfinity()
665 return
666 }
667
668 var idx int
669 if n > 0 {
670 idx = (n - 1) / 2
671 } else {
672 idx = (-n - 1) / 2
673 }
674
675 if idx >= len(pre) {
676 r.setInfinity()
677 return
678 }
679
680 *r = pre[idx]
681
682 // Negate if n < 0
683 if n < 0 {
684 r.negate(r)
685 }
686 }
687
688 // tableGetGELambda retrieves the λ-transformed point from the table
689 // Uses precomputed β*x values for efficiency
690 // n is the wNAF digit (can be negative)
691 // Returns λ*pre[(|n|-1)/2], negated if n < 0
692 //
693 // Since λ*(x, y) = (β*x, y), and we precomputed β*x,
694 // we just need to use the precomputed β*x instead of x
695 //
696 // Reference: libsecp256k1 ecmult_impl.h:ECMULT_TABLE_GET_GE_LAMBDA
697 func tableGetGELambda(r *GroupElementAffine, pre []GroupElementAffine, preBetaX []FieldElement, n int) {
698 if n == 0 {
699 r.setInfinity()
700 return
701 }
702
703 var idx int
704 if n > 0 {
705 idx = (n - 1) / 2
706 } else {
707 idx = (-n - 1) / 2
708 }
709
710 if idx >= len(pre) {
711 r.setInfinity()
712 return
713 }
714
715 // Use precomputed β*x instead of x
716 r.x = preBetaX[idx]
717 r.y = pre[idx].y
718 r.infinity = pre[idx].infinity
719
720 // Negate if n < 0
721 if n < 0 {
722 r.negate(r)
723 }
724 }
725
726 // Window size for the GLV split scalars
727 // Window 5 (16 entries) is optimal - larger windows have more table-building overhead
728 // than savings from fewer point additions for single-use tables
729 const glvWNAFW = 5
730 const glvTableSize = 1 << (glvWNAFW - 1) // 16 entries for window size 5
731
732 // ecmultStraussWNAFGLV computes r = q * a using Strauss algorithm with GLV endomorphism
733 // This splits the scalar using GLV and processes two ~128-bit scalars simultaneously
734 // using wNAF representation for efficient point multiplication.
735 //
736 // The algorithm:
737 // 1. Split q into q1, q2 such that q1 + q2*λ ≡ q (mod n), where q1, q2 are ~128 bits
738 // 2. Build odd multiples table for a and precompute β*x for λ-transformed lookups
739 // 3. Convert q1, q2 to wNAF representation
740 // 4. Process both wNAF representations simultaneously in a single pass
741 //
742 // Reference: libsecp256k1 ecmult_impl.h:secp256k1_ecmult_strauss_wnaf
743 func ecmultStraussWNAFGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
744 if a.isInfinity() {
745 r.setInfinity()
746 return
747 }
748
749 if q.isZero() {
750 r.setInfinity()
751 return
752 }
753
754 // Split scalar using GLV endomorphism: q = q1 + q2*λ
755 // Also get the transformed points p1 = a, p2 = λ*a
756 var q1, q2 Scalar
757 var p1, p2 GroupElementAffine
758 ecmultEndoSplit(&q1, &q2, &p1, &p2, q, a)
759
760 // Build odd multiples tables using stack-allocated arrays
761 var aJac GroupElementJacobian
762 aJac.setGE(&p1)
763
764 var preA [glvTableSize]GroupElementAffine
765 var preBetaX [glvTableSize]FieldElement
766 buildOddMultiplesTableAffineFixed(&preA, &preBetaX, &aJac)
767
768 // Build odd multiples table for p2 (which is λ*a)
769 var p2Jac GroupElementJacobian
770 p2Jac.setGE(&p2)
771
772 var preA2 [glvTableSize]GroupElementAffine
773 var preBetaX2 [glvTableSize]FieldElement
774 buildOddMultiplesTableAffineFixed(&preA2, &preBetaX2, &p2Jac)
775
776 // Convert scalars to wNAF representation
777 var wnaf1, wnaf2 [257]int8
778
779 bits1 := q1.wNAF(&wnaf1, glvWNAFW)
780 bits2 := q2.wNAF(&wnaf2, glvWNAFW)
781
782 // Find the maximum bit position
783 maxBits := bits1
784 if bits2 > maxBits {
785 maxBits = bits2
786 }
787
788 // Perform the Strauss algorithm
789 r.setInfinity()
790
791 for i := maxBits - 1; i >= 0; i-- {
792 // Double the result
793 if !r.isInfinity() {
794 r.double(r)
795 }
796
797 // Add contribution from q1
798 if i < bits1 && wnaf1[i] != 0 {
799 var pt GroupElementAffine
800 tableGetGEFixed(&pt, &preA, int(wnaf1[i]))
801
802 if r.isInfinity() {
803 r.setGE(&pt)
804 } else {
805 r.addGE(r, &pt)
806 }
807 }
808
809 // Add contribution from q2
810 if i < bits2 && wnaf2[i] != 0 {
811 var pt GroupElementAffine
812 tableGetGEFixed(&pt, &preA2, int(wnaf2[i]))
813
814 if r.isInfinity() {
815 r.setGE(&pt)
816 } else {
817 r.addGE(r, &pt)
818 }
819 }
820 }
821 }
822
823 // buildOddMultiplesTableAffineFixed is like buildOddMultiplesTableAffine but uses fixed-size arrays
824 // and zero heap allocations. Optimized for glvTableSize = 16.
825 func buildOddMultiplesTableAffineFixed(preA *[glvTableSize]GroupElementAffine, preBetaX *[glvTableSize]FieldElement, a *GroupElementJacobian) {
826 // Build odd multiples in Jacobian coordinates (stack-allocated)
827 var preJac [glvTableSize]GroupElementJacobian
828
829 // pre[0] = a (which is 1*a)
830 preJac[0] = *a
831
832 if glvTableSize > 1 {
833 // Compute 2*a
834 var twoA GroupElementJacobian
835 twoA.double(a)
836
837 // Build odd multiples: pre[i] = pre[i-1] + 2*a for i >= 1
838 for i := 1; i < glvTableSize; i++ {
839 preJac[i].addVar(&preJac[i-1], &twoA)
840 }
841 }
842
843 // Batch normalize to affine coordinates with zero allocations
844 batchNormalize16(preA, &preJac)
845
846 // Precompute β*x for each point
847 for i := 0; i < glvTableSize; i++ {
848 if preA[i].isInfinity() {
849 preBetaX[i] = FieldElementZero
850 } else {
851 preBetaX[i].mul(&preA[i].x, &fieldBeta)
852 }
853 }
854 }
855
856 // tableGetGEFixed retrieves a point from a fixed-size table
857 func tableGetGEFixed(r *GroupElementAffine, pre *[glvTableSize]GroupElementAffine, n int) {
858 if n == 0 {
859 r.setInfinity()
860 return
861 }
862
863 var idx int
864 if n > 0 {
865 idx = (n - 1) / 2
866 } else {
867 idx = (-n - 1) / 2
868 }
869
870 if idx >= glvTableSize {
871 r.setInfinity()
872 return
873 }
874
875 *r = pre[idx]
876
877 // Negate if n < 0
878 if n < 0 {
879 r.negate(r)
880 }
881 }
882
883 // EcmultStraussWNAFGLV is the public interface for optimized Strauss+GLV+wNAF multiplication
884 func EcmultStraussWNAFGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
885 ecmultStraussWNAFGLV(r, a, q)
886 }
887
888 // ECDHXOnly computes X-only ECDH (BIP-340 style)
889 // Outputs only the X coordinate of the shared secret point
890 func ECDHXOnly(output []byte, pubkey *PublicKey, seckey []byte) error {
891 if len(output) != 32 {
892 return errors.New("output must be 32 bytes")
893 }
894 if len(seckey) != 32 {
895 return errors.New("seckey must be 32 bytes")
896 }
897 if pubkey == nil {
898 return errors.New("pubkey cannot be nil")
899 }
900
901 // Load public key
902 var pt GroupElementAffine
903 pt.fromBytes(pubkey.data[:])
904 if pt.isInfinity() {
905 return errors.New("invalid public key")
906 }
907
908 // Parse scalar
909 var s Scalar
910 if !s.setB32Seckey(seckey) {
911 return errors.New("invalid secret key")
912 }
913
914 if s.isZero() {
915 return errors.New("secret key cannot be zero")
916 }
917
918 // Compute res = s * pt using optimized windowed multiplication (variable-time)
919 // ECDH doesn't require constant-time since the secret key is already known
920 var res GroupElementJacobian
921 ecmultWindowedVar(&res, &pt, &s)
922
923 // Convert to affine
924 var resAff GroupElementAffine
925 resAff.setGEJ(&res)
926 resAff.x.normalize()
927
928 // Extract X coordinate only
929 resAff.x.getB32(output)
930
931 // Clear sensitive data
932 s.clear()
933 resAff.clear()
934 res.clear()
935
936 return nil
937 }
938
939
940