group_4x64.go raw
1 //go:build amd64 && !purego
2
3 package p256k1
4
5 // Group operations using Field4x64 for maximum performance on AMD64 with BMI2.
6 // These types mirror GroupElementAffine and GroupElementJacobian but use the
7 // faster 4x64 field representation.
8
9 // GroupElement4x64Affine represents a point on the secp256k1 curve in affine coordinates.
10 type GroupElement4x64Affine struct {
11 x, y Field4x64
12 infinity bool
13 }
14
15 // GroupElement4x64Jacobian represents a point on the secp256k1 curve in Jacobian coordinates.
16 // Affine coordinates are (x/z^2, y/z^3).
17 type GroupElement4x64Jacobian struct {
18 x, y, z Field4x64
19 infinity bool
20 }
21
22 // setInfinity sets the point to infinity.
23 func (r *GroupElement4x64Affine) setInfinity() {
24 r.x.SetZero()
25 r.y.SetZero()
26 r.infinity = true
27 }
28
29 // setInfinity sets the Jacobian point to infinity.
30 func (r *GroupElement4x64Jacobian) setInfinity() {
31 r.x.SetZero()
32 r.y.SetOne()
33 r.z.SetZero()
34 r.infinity = true
35 }
36
37 // isInfinity returns true if the point is at infinity.
38 func (r *GroupElement4x64Affine) isInfinity() bool {
39 return r.infinity
40 }
41
42 // isInfinity returns true if the Jacobian point is at infinity.
43 func (r *GroupElement4x64Jacobian) isInfinity() bool {
44 return r.infinity
45 }
46
47 // setGE sets a Jacobian element from an affine element.
48 func (r *GroupElement4x64Jacobian) setGE(a *GroupElement4x64Affine) {
49 if a.infinity {
50 r.setInfinity()
51 return
52 }
53
54 r.x = a.x
55 r.y = a.y
56 r.z.SetOne()
57 r.infinity = false
58 }
59
60 // negate sets r to the negation of a (mirror around X axis).
61 func (r *GroupElement4x64Affine) negate(a *GroupElement4x64Affine) {
62 if a.infinity {
63 r.setInfinity()
64 return
65 }
66
67 r.x = a.x
68 r.y.Negate(&a.y)
69 r.infinity = false
70 }
71
72 // negate sets r to the negation of a (mirror around X axis).
73 func (r *GroupElement4x64Jacobian) negate(a *GroupElement4x64Jacobian) {
74 if a.infinity {
75 r.setInfinity()
76 return
77 }
78
79 r.x = a.x
80 r.y.Negate(&a.y)
81 r.z = a.z
82 r.infinity = false
83 }
84
85 // double sets r = 2*a (point doubling in Jacobian coordinates).
86 // This follows the secp256k1 algorithm exactly, optimized for Field4x64.
87 // Operations: 3 mul, 4 sqr, plus add/negate/half
88 func (r *GroupElement4x64Jacobian) double(a *GroupElement4x64Jacobian) {
89 var l, s, t Field4x64
90
91 r.infinity = a.infinity
92
93 // Z3 = Y1*Z1
94 r.z.Mul(&a.z, &a.y)
95
96 // S = Y1^2
97 s.Sqr(&a.y)
98
99 // L = X1^2
100 l.Sqr(&a.x)
101
102 // L = 3*X1^2
103 l.mulInt(3)
104
105 // L = 3/2*X1^2
106 l.half(&l)
107
108 // T = -S = -Y1^2
109 t.Negate(&s)
110
111 // T = -X1*S = -X1*Y1^2
112 t.Mul(&t, &a.x)
113
114 // X3 = L^2
115 r.x.Sqr(&l)
116
117 // X3 = L^2 + T
118 r.x.Add(&r.x, &t)
119
120 // X3 = L^2 + 2*T
121 r.x.Add(&r.x, &t)
122
123 // S = S^2 = Y1^4
124 s.Sqr(&s)
125
126 // T = X3 + T
127 t.Add(&t, &r.x)
128
129 // Y3 = L*(X3 + T)
130 r.y.Mul(&t, &l)
131
132 // Y3 = L*(X3 + T) + S^2
133 r.y.Add(&r.y, &s)
134
135 // Y3 = -(L*(X3 + T) + S^2)
136 r.y.Negate(&r.y)
137 }
138
139 // addGE sets r = a + b where a is Jacobian and b is affine.
140 // This follows the secp256k1_gej_add_ge_var algorithm.
141 // Operations: 8 mul, 3 sqr
142 func (r *GroupElement4x64Jacobian) addGE(a *GroupElement4x64Jacobian, b *GroupElement4x64Affine) {
143 if a.infinity {
144 r.setGE(b)
145 return
146 }
147 if b.infinity {
148 *r = *a
149 return
150 }
151
152 var z12, u1, u2, s1, s2, h, i, h2, h3, t Field4x64
153
154 // z12 = a->z^2
155 z12.Sqr(&a.z)
156
157 // u1 = a->x
158 u1 = a.x
159
160 // u2 = b->x * z12
161 u2.Mul(&b.x, &z12)
162
163 // s1 = a->y
164 s1 = a.y
165
166 // s2 = b->y * z12 * a->z
167 s2.Mul(&b.y, &z12)
168 s2.Mul(&s2, &a.z)
169
170 // h = u2 - u1
171 h.Negate(&u1)
172 h.Add(&h, &u2)
173
174 // i = s2 - s1
175 i.Negate(&s2)
176 i.Add(&i, &s1)
177
178 // Check if h normalizes to zero
179 h.Reduce()
180 if h.IsZero() {
181 i.Reduce()
182 if i.IsZero() {
183 // Points are equal - double
184 r.double(a)
185 return
186 } else {
187 // Points are negatives - result is infinity
188 r.setInfinity()
189 return
190 }
191 }
192
193 // General addition case
194 r.infinity = false
195
196 // r->z = a->z * h
197 r.z.Mul(&a.z, &h)
198
199 // h2 = h^2
200 h2.Sqr(&h)
201
202 // h2 = -h2
203 h2.Negate(&h2)
204
205 // h3 = h2 * h
206 h3.Mul(&h2, &h)
207
208 // t = u1 * h2
209 t.Mul(&u1, &h2)
210
211 // r->x = i^2
212 r.x.Sqr(&i)
213
214 // r->x = i^2 + h3
215 r.x.Add(&r.x, &h3)
216
217 // r->x = i^2 + h3 + t
218 r.x.Add(&r.x, &t)
219
220 // r->x = i^2 + h3 + 2*t
221 r.x.Add(&r.x, &t)
222
223 // t = t + r->x
224 t.Add(&t, &r.x)
225
226 // r->y = t * i
227 r.y.Mul(&t, &i)
228
229 // h3 = h3 * s1
230 h3.Mul(&h3, &s1)
231
232 // r->y = t * i + h3
233 r.y.Add(&r.y, &h3)
234 }
235
236 // addVar sets r = a + b (variable-time point addition in Jacobian coordinates).
237 // Operations: 12 mul, 4 sqr
238 func (r *GroupElement4x64Jacobian) addVar(a, b *GroupElement4x64Jacobian) {
239 if a.infinity {
240 *r = *b
241 return
242 }
243 if b.infinity {
244 *r = *a
245 return
246 }
247
248 var z22, z12, u1, u2, s1, s2, h, i, h2, h3, t Field4x64
249
250 // z22 = b->z^2
251 z22.Sqr(&b.z)
252
253 // z12 = a->z^2
254 z12.Sqr(&a.z)
255
256 // u1 = a->x * z22
257 u1.Mul(&a.x, &z22)
258
259 // u2 = b->x * z12
260 u2.Mul(&b.x, &z12)
261
262 // s1 = a->y * z22 * b->z
263 s1.Mul(&a.y, &z22)
264 s1.Mul(&s1, &b.z)
265
266 // s2 = b->y * z12 * a->z
267 s2.Mul(&b.y, &z12)
268 s2.Mul(&s2, &a.z)
269
270 // h = u2 - u1
271 h.Negate(&u1)
272 h.Add(&h, &u2)
273
274 // i = s2 - s1
275 i.Negate(&s2)
276 i.Add(&i, &s1)
277
278 // Check if h normalizes to zero
279 h.Reduce()
280 if h.IsZero() {
281 i.Reduce()
282 if i.IsZero() {
283 // Points are equal - double
284 r.double(a)
285 return
286 } else {
287 // Points are negatives - result is infinity
288 r.setInfinity()
289 return
290 }
291 }
292
293 // General addition case
294 r.infinity = false
295
296 // t = h * b->z
297 t.Mul(&h, &b.z)
298
299 // r->z = a->z * t
300 r.z.Mul(&a.z, &t)
301
302 // h2 = h^2
303 h2.Sqr(&h)
304
305 // h2 = -h2
306 h2.Negate(&h2)
307
308 // h3 = h2 * h
309 h3.Mul(&h2, &h)
310
311 // t = u1 * h2
312 t.Mul(&u1, &h2)
313
314 // r->x = i^2
315 r.x.Sqr(&i)
316
317 // r->x = i^2 + h3
318 r.x.Add(&r.x, &h3)
319
320 // r->x = i^2 + h3 + t
321 r.x.Add(&r.x, &t)
322
323 // r->x = i^2 + h3 + 2*t
324 r.x.Add(&r.x, &t)
325
326 // t = t + r->x
327 t.Add(&t, &r.x)
328
329 // r->y = t * i
330 r.y.Mul(&t, &i)
331
332 // h3 = h3 * s1
333 h3.Mul(&h3, &s1)
334
335 // r->y = t * i + h3
336 r.y.Add(&r.y, &h3)
337 }
338
339 // setGEJ converts a Jacobian point to affine.
340 func (r *GroupElement4x64Affine) setGEJ(a *GroupElement4x64Jacobian) {
341 if a.infinity {
342 r.setInfinity()
343 return
344 }
345
346 r.infinity = false
347
348 // Compute z^(-1)
349 var zInv Field4x64
350 zInv.inv(&a.z)
351
352 // z2 = z^(-2)
353 var z2 Field4x64
354 z2.Sqr(&zInv)
355
356 // z3 = z^(-3)
357 var z3 Field4x64
358 z3.Mul(&zInv, &z2)
359
360 // x = X * z^(-2)
361 r.x.Mul(&a.x, &z2)
362
363 // y = Y * z^(-3)
364 r.y.Mul(&a.y, &z3)
365 }
366
367 // batchNormalize4x64 batch converts Jacobian points to affine using Montgomery's trick.
368 // This is much more efficient than individual conversions (1 inversion vs N inversions).
369 func batchNormalize4x64(dst []GroupElement4x64Affine, src []GroupElement4x64Jacobian) {
370 n := len(src)
371 if n == 0 {
372 return
373 }
374 if len(dst) < n {
375 panic("dst too small")
376 }
377
378 // Use Montgomery's trick: compute product of all z values, invert once,
379 // then extract individual inverses
380
381 // Allocate temporary storage for running products
382 var products [glvTableSize]Field4x64
383
384 // Compute running products: products[i] = z[0] * z[1] * ... * z[i]
385 // Skip infinity points
386 validCount := 0
387 for i := 0; i < n; i++ {
388 if src[i].infinity {
389 dst[i].setInfinity()
390 continue
391 }
392
393 if validCount == 0 {
394 products[validCount] = src[i].z
395 } else {
396 products[validCount].Mul(&products[validCount-1], &src[i].z)
397 }
398 validCount++
399 }
400
401 if validCount == 0 {
402 return
403 }
404
405 // Invert the final product (only 1 inversion!)
406 var invProduct Field4x64
407 invProduct.inv(&products[validCount-1])
408
409 // Extract individual inverses using Montgomery's trick
410 // z_inv[i] = invProduct * products[i-1]
411 validIdx := validCount - 1
412 for i := n - 1; i >= 0; i-- {
413 if src[i].infinity {
414 continue
415 }
416
417 var zInv Field4x64
418 if validIdx == 0 {
419 zInv = invProduct
420 } else {
421 zInv.Mul(&invProduct, &products[validIdx-1])
422 // Update invProduct for next iteration
423 invProduct.Mul(&invProduct, &src[i].z)
424 }
425 validIdx--
426
427 // Compute affine coordinates
428 dst[i].infinity = false
429
430 // z2 = z^(-2)
431 var z2 Field4x64
432 z2.Sqr(&zInv)
433
434 // z3 = z^(-3)
435 var z3 Field4x64
436 z3.Mul(&zInv, &z2)
437
438 // x = X * z^(-2)
439 dst[i].x.Mul(&src[i].x, &z2)
440
441 // y = Y * z^(-3)
442 dst[i].y.Mul(&src[i].y, &z3)
443 }
444 }
445
446 // FromFieldElement converts a FieldElement to Field4x64.
447 func (f *Field4x64) FromFieldElement(a *FieldElement) {
448 // Normalize input first
449 var aNorm FieldElement
450 aNorm = *a
451 aNorm.normalizeWeak()
452
453 // Pack 5x52 limbs into 4x64
454 f.n[0] = aNorm.n[0] | (aNorm.n[1] << 52)
455 f.n[1] = (aNorm.n[1] >> 12) | (aNorm.n[2] << 40)
456 f.n[2] = (aNorm.n[2] >> 24) | (aNorm.n[3] << 28)
457 f.n[3] = (aNorm.n[3] >> 36) | (aNorm.n[4] << 16)
458 f.magnitude = 1
459 f.normalized = false
460 }
461
462 // ToFieldElement converts Field4x64 to FieldElement.
463 func (f *Field4x64) ToFieldElement(r *FieldElement) {
464 // Ensure normalized
465 if f.magnitude > 1 {
466 f.Reduce()
467 }
468
469 // Unpack 4x64 to 5x52
470 r.n[0] = f.n[0] & 0xFFFFFFFFFFFFF
471 r.n[1] = ((f.n[0] >> 52) | (f.n[1] << 12)) & 0xFFFFFFFFFFFFF
472 r.n[2] = ((f.n[1] >> 40) | (f.n[2] << 24)) & 0xFFFFFFFFFFFFF
473 r.n[3] = ((f.n[2] >> 28) | (f.n[3] << 36)) & 0xFFFFFFFFFFFFF
474 r.n[4] = (f.n[3] >> 16) & 0x0FFFFFFFFFFFF
475
476 r.magnitude = 1
477 r.normalized = false
478 }
479
480 // FromGroupElementJacobian converts from the standard type.
481 func (r *GroupElement4x64Jacobian) FromGroupElementJacobian(a *GroupElementJacobian) {
482 if a.infinity {
483 r.setInfinity()
484 return
485 }
486 r.x.FromFieldElement(&a.x)
487 r.y.FromFieldElement(&a.y)
488 r.z.FromFieldElement(&a.z)
489 r.infinity = false
490 }
491
492 // ToGroupElementJacobian converts to the standard type.
493 func (r *GroupElement4x64Jacobian) ToGroupElementJacobian(a *GroupElementJacobian) {
494 if r.infinity {
495 a.setInfinity()
496 return
497 }
498 r.x.ToFieldElement(&a.x)
499 r.y.ToFieldElement(&a.y)
500 r.z.ToFieldElement(&a.z)
501 a.infinity = false
502 }
503
504 // FromGroupElementAffine converts from the standard type.
505 func (r *GroupElement4x64Affine) FromGroupElementAffine(a *GroupElementAffine) {
506 if a.infinity {
507 r.setInfinity()
508 return
509 }
510 r.x.FromFieldElement(&a.x)
511 r.y.FromFieldElement(&a.y)
512 r.infinity = false
513 }
514
515 // ToGroupElementAffine converts to the standard type.
516 func (r *GroupElement4x64Affine) ToGroupElementAffine(a *GroupElementAffine) {
517 if r.infinity {
518 a.setInfinity()
519 return
520 }
521 r.x.ToFieldElement(&a.x)
522 r.y.ToFieldElement(&a.y)
523 a.infinity = false
524 }
525