ecmult_gen_amd64.go raw
1 //go:build amd64 && !purego
2
3 package p256k1
4
5 // =============================================================================
6 // AMD64-optimized Generator Precomputation using Group4x64
7 // =============================================================================
8 //
9 // This file contains optimized generator multiplication using the faster
10 // Group4x64 field representation with BMI2 MULX instructions.
11
12 // Precomputed tables in 4x64 format for faster operations
13 var (
14 // preGenG4x64 contains odd multiples of G in 4x64 format
15 preGenG4x64 [genTableSize]GroupElement4x64Affine
16
17 // preGenLambdaG4x64 contains odd multiples of λ*G in 4x64 format
18 preGenLambdaG4x64 [genTableSize]GroupElement4x64Affine
19
20 // gen4x64TablesInitialized tracks whether the 4x64 tables have been computed
21 gen4x64TablesInitialized bool
22 )
23
24 // initGen4x64Tables converts the precomputed tables to 4x64 format
25 func initGen4x64Tables() {
26 if gen4x64TablesInitialized {
27 return
28 }
29
30 // Ensure base tables are initialized first
31 initGenTables()
32
33 // Convert preGenG to 4x64 format
34 for i := 0; i < genTableSize; i++ {
35 preGenG4x64[i].FromGroupElementAffine(&preGenG[i])
36 }
37
38 // Convert preGenLambdaG to 4x64 format
39 for i := 0; i < genTableSize; i++ {
40 preGenLambdaG4x64[i].FromGroupElementAffine(&preGenLambdaG[i])
41 }
42
43 gen4x64TablesInitialized = true
44 }
45
46 // ecmultGenGLV4x64 computes r = k * G using 4x64 optimized operations
47 // This is significantly faster than the generic version on AMD64
48 func ecmultGenGLV4x64(r *GroupElementJacobian, k *Scalar) {
49 if k.isZero() {
50 r.setInfinity()
51 return
52 }
53
54 // Ensure tables are initialized
55 initGen4x64Tables()
56
57 // Split scalar using GLV: k = k1 + k2*λ
58 var k1, k2 Scalar
59 scalarSplitLambda(&k1, &k2, k)
60
61 // Normalize k1 and k2 to be "low" (not high)
62 neg1 := k1.isHigh()
63 if neg1 {
64 k1.negate(&k1)
65 }
66
67 neg2 := k2.isHigh()
68 if neg2 {
69 k2.negate(&k2)
70 }
71
72 // Convert to wNAF
73 var wnaf1, wnaf2 [257]int8
74
75 bits1 := k1.wNAF(&wnaf1, genWindowSize)
76 bits2 := k2.wNAF(&wnaf2, genWindowSize)
77
78 // Find maximum bit position
79 maxBits := bits1
80 if bits2 > maxBits {
81 maxBits = bits2
82 }
83
84 // Perform Strauss algorithm using 4x64 operations
85 var r4x64 GroupElement4x64Jacobian
86 r4x64.setInfinity()
87
88 for i := maxBits - 1; i >= 0; i-- {
89 // Double the result
90 if !r4x64.isInfinity() {
91 r4x64.double(&r4x64)
92 }
93
94 // Add contribution from k1 (using preGenG4x64 table)
95 if i < bits1 && wnaf1[i] != 0 {
96 var pt GroupElement4x64Affine
97 n := int(wnaf1[i])
98
99 var idx int
100 if n > 0 {
101 idx = (n - 1) / 2
102 } else {
103 idx = (-n - 1) / 2
104 }
105
106 if idx < genTableSize {
107 pt = preGenG4x64[idx]
108 // Negate if wNAF digit is negative
109 if n < 0 {
110 pt.negate(&pt)
111 }
112 // Negate if k1 was negated during normalization
113 if neg1 {
114 pt.negate(&pt)
115 }
116
117 if r4x64.isInfinity() {
118 r4x64.setGE(&pt)
119 } else {
120 r4x64.addGE(&r4x64, &pt)
121 }
122 }
123 }
124
125 // Add contribution from k2 (using preGenLambdaG4x64 table)
126 if i < bits2 && wnaf2[i] != 0 {
127 var pt GroupElement4x64Affine
128 n := int(wnaf2[i])
129
130 var idx int
131 if n > 0 {
132 idx = (n - 1) / 2
133 } else {
134 idx = (-n - 1) / 2
135 }
136
137 if idx < genTableSize {
138 pt = preGenLambdaG4x64[idx]
139 // Negate if wNAF digit is negative
140 if n < 0 {
141 pt.negate(&pt)
142 }
143 // Negate if k2 was negated during normalization
144 if neg2 {
145 pt.negate(&pt)
146 }
147
148 if r4x64.isInfinity() {
149 r4x64.setGE(&pt)
150 } else {
151 r4x64.addGE(&r4x64, &pt)
152 }
153 }
154 }
155 }
156
157 // Convert result back to standard format
158 r4x64.ToGroupElementJacobian(r)
159 }
160
161 // EcmultGen computes r = k * G using the fastest available method
162 // On AMD64, this uses the optimized 4x64 implementation
163 func EcmultGen(r *GroupElementJacobian, k *Scalar) {
164 ecmultGenGLV4x64(r, k)
165 }
166
167 // EcmultGenGLV is the public interface for fast generator multiplication
168 // r = k * G
169 func EcmultGenGLV(r *GroupElementJacobian, k *Scalar) {
170 ecmultGenGLV4x64(r, k)
171 }
172