p256.mx raw
1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build (!amd64 && !arm64 && !ppc64le && !s390x) || purego
6
7 package nistec
8
9 import (
10 "crypto/internal/fips140/nistec/fiat"
11 "crypto/internal/fips140/subtle"
12 "crypto/internal/fips140deps/byteorder"
13 "crypto/internal/fips140deps/cpu"
14 "errors"
15 "math/bits"
16 "sync"
17 "unsafe"
18 )
19
20 // P256Point is a P-256 point. The zero value is NOT valid.
21 type P256Point struct {
22 // The point is represented in projective coordinates (X:Y:Z), where x = X/Z
23 // and y = Y/Z. Infinity is (0:1:0).
24 //
25 // fiat.P256Element is a base field element in [0, P-1] in the Montgomery
26 // domain (with R 2²⁵⁶ and P 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1) as four limbs in
27 // little-endian order value.
28 x, y, z fiat.P256Element
29 }
30
31 // NewP256Point returns a new P256Point representing the point at infinity point.
32 func NewP256Point() *P256Point {
33 p := &P256Point{}
34 p.y.One()
35 return p
36 }
37
38 // SetGenerator sets p to the canonical generator and returns p.
39 func (p *P256Point) SetGenerator() *P256Point {
40 p.x.SetBytes([]byte{0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, 0x77, 0x3, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96})
41 p.y.SetBytes([]byte{0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0xf, 0x9e, 0x16, 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5})
42 p.z.One()
43 return p
44 }
45
46 // Set sets p = q and returns p.
47 func (p *P256Point) Set(q *P256Point) *P256Point {
48 p.x.Set(&q.x)
49 p.y.Set(&q.y)
50 p.z.Set(&q.z)
51 return p
52 }
53
54 const p256ElementLength = 32
55 const p256UncompressedLength = 1 + 2*p256ElementLength
56 const p256CompressedLength = 1 + p256ElementLength
57
58 // SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
59 // b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
60 // the curve, it returns nil and an error, and the receiver is unchanged.
61 // Otherwise, it returns p.
62 func (p *P256Point) SetBytes(b []byte) (*P256Point, error) {
63 switch {
64 // Point at infinity.
65 case len(b) == 1 && b[0] == 0:
66 return p.Set(NewP256Point()), nil
67
68 // Uncompressed form.
69 case len(b) == p256UncompressedLength && b[0] == 4:
70 x, err := (&fiat.P256Element{}).SetBytes(b[1 : 1+p256ElementLength])
71 if err != nil {
72 return nil, err
73 }
74 y, err := (&fiat.P256Element{}).SetBytes(b[1+p256ElementLength:])
75 if err != nil {
76 return nil, err
77 }
78 if err := p256CheckOnCurve(x, y); err != nil {
79 return nil, err
80 }
81 p.x.Set(x)
82 p.y.Set(y)
83 p.z.One()
84 return p, nil
85
86 // Compressed form.
87 case len(b) == p256CompressedLength && (b[0] == 2 || b[0] == 3):
88 x, err := (&fiat.P256Element{}).SetBytes(b[1:])
89 if err != nil {
90 return nil, err
91 }
92
93 // y² = x³ - 3x + b
94 y := p256Polynomial(&fiat.P256Element{}, x)
95 if !p256Sqrt(y, y) {
96 return nil, errors.New("invalid P256 compressed point encoding")
97 }
98
99 // Select the positive or negative root, as indicated by the least
100 // significant bit, based on the encoding type byte.
101 otherRoot := &fiat.P256Element{}
102 otherRoot.Sub(otherRoot, y)
103 cond := y.Bytes()[p256ElementLength-1]&1 ^ b[0]&1
104 y.Select(otherRoot, y, int(cond))
105
106 p.x.Set(x)
107 p.y.Set(y)
108 p.z.One()
109 return p, nil
110
111 default:
112 return nil, errors.New("invalid P256 point encoding")
113 }
114 }
115
116 var _p256B *fiat.P256Element
117 var _p256BOnce sync.Once
118
119 func p256B() *fiat.P256Element {
120 _p256BOnce.Do(func() {
121 _p256B, _ = (&fiat.P256Element{}).SetBytes([]byte{0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, 0x65, 0x1d, 0x6, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b})
122 })
123 return _p256B
124 }
125
126 // p256Polynomial sets y2 to x³ - 3x + b, and returns y2.
127 func p256Polynomial(y2, x *fiat.P256Element) *fiat.P256Element {
128 y2.Square(x)
129 y2.Mul(y2, x)
130
131 threeX := (&fiat.P256Element{}).Add(x, x)
132 threeX.Add(threeX, x)
133 y2.Sub(y2, threeX)
134
135 return y2.Add(y2, p256B())
136 }
137
138 func p256CheckOnCurve(x, y *fiat.P256Element) error {
139 // y² = x³ - 3x + b
140 rhs := p256Polynomial(&fiat.P256Element{}, x)
141 lhs := (&fiat.P256Element{}).Square(y)
142 if rhs.Equal(lhs) != 1 {
143 return errors.New("P256 point not on curve")
144 }
145 return nil
146 }
147
148 // Bytes returns the uncompressed or infinity encoding of p, as specified in
149 // SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
150 // infinity is shorter than all other encodings.
151 func (p *P256Point) Bytes() []byte {
152 // This function is outlined to make the allocations inline in the caller
153 // rather than happen on the heap.
154 var out [p256UncompressedLength]byte
155 return p.bytes(&out)
156 }
157
158 func (p *P256Point) bytes(out *[p256UncompressedLength]byte) []byte {
159 // The SEC 1 representation of the point at infinity is a single zero byte,
160 // and only infinity has z = 0.
161 if p.z.IsZero() == 1 {
162 return append(out[:0], 0)
163 }
164
165 zinv := (&fiat.P256Element{}).Invert(&p.z)
166 x := (&fiat.P256Element{}).Mul(&p.x, zinv)
167 y := (&fiat.P256Element{}).Mul(&p.y, zinv)
168
169 buf := append(out[:0], 4)
170 buf = append(buf, x.Bytes()...)
171 buf = append(buf, y.Bytes()...)
172 return buf
173 }
174
175 // BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1,
176 // Version 2.0, Section 2.3.5, or an error if p is the point at infinity.
177 func (p *P256Point) BytesX() ([]byte, error) {
178 // This function is outlined to make the allocations inline in the caller
179 // rather than happen on the heap.
180 var out [p256ElementLength]byte
181 return p.bytesX(&out)
182 }
183
184 func (p *P256Point) bytesX(out *[p256ElementLength]byte) ([]byte, error) {
185 if p.z.IsZero() == 1 {
186 return nil, errors.New("P256 point is the point at infinity")
187 }
188
189 zinv := (&fiat.P256Element{}).Invert(&p.z)
190 x := (&fiat.P256Element{}).Mul(&p.x, zinv)
191
192 return append(out[:0], x.Bytes()...), nil
193 }
194
195 // BytesCompressed returns the compressed or infinity encoding of p, as
196 // specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the
197 // point at infinity is shorter than all other encodings.
198 func (p *P256Point) BytesCompressed() []byte {
199 // This function is outlined to make the allocations inline in the caller
200 // rather than happen on the heap.
201 var out [p256CompressedLength]byte
202 return p.bytesCompressed(&out)
203 }
204
205 func (p *P256Point) bytesCompressed(out *[p256CompressedLength]byte) []byte {
206 if p.z.IsZero() == 1 {
207 return append(out[:0], 0)
208 }
209
210 zinv := (&fiat.P256Element{}).Invert(&p.z)
211 x := (&fiat.P256Element{}).Mul(&p.x, zinv)
212 y := (&fiat.P256Element{}).Mul(&p.y, zinv)
213
214 // Encode the sign of the y coordinate (indicated by the least significant
215 // bit) as the encoding type (2 or 3).
216 buf := append(out[:0], 2)
217 buf[0] |= y.Bytes()[p256ElementLength-1] & 1
218 buf = append(buf, x.Bytes()...)
219 return buf
220 }
221
222 // Add sets q = p1 + p2, and returns q. The points may overlap.
223 func (q *P256Point) Add(p1, p2 *P256Point) *P256Point {
224 // Complete addition formula for a = -3 from "Complete addition formulas for
225 // prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
226
227 t0 := (&fiat.P256Element{}).Mul(&p1.x, &p2.x) // t0 := X1 * X2
228 t1 := (&fiat.P256Element{}).Mul(&p1.y, &p2.y) // t1 := Y1 * Y2
229 t2 := (&fiat.P256Element{}).Mul(&p1.z, &p2.z) // t2 := Z1 * Z2
230 t3 := (&fiat.P256Element{}).Add(&p1.x, &p1.y) // t3 := X1 + Y1
231 t4 := (&fiat.P256Element{}).Add(&p2.x, &p2.y) // t4 := X2 + Y2
232 t3.Mul(t3, t4) // t3 := t3 * t4
233 t4.Add(t0, t1) // t4 := t0 + t1
234 t3.Sub(t3, t4) // t3 := t3 - t4
235 t4.Add(&p1.y, &p1.z) // t4 := Y1 + Z1
236 x3 := (&fiat.P256Element{}).Add(&p2.y, &p2.z) // X3 := Y2 + Z2
237 t4.Mul(t4, x3) // t4 := t4 * X3
238 x3.Add(t1, t2) // X3 := t1 + t2
239 t4.Sub(t4, x3) // t4 := t4 - X3
240 x3.Add(&p1.x, &p1.z) // X3 := X1 + Z1
241 y3 := (&fiat.P256Element{}).Add(&p2.x, &p2.z) // Y3 := X2 + Z2
242 x3.Mul(x3, y3) // X3 := X3 * Y3
243 y3.Add(t0, t2) // Y3 := t0 + t2
244 y3.Sub(x3, y3) // Y3 := X3 - Y3
245 z3 := (&fiat.P256Element{}).Mul(p256B(), t2) // Z3 := b * t2
246 x3.Sub(y3, z3) // X3 := Y3 - Z3
247 z3.Add(x3, x3) // Z3 := X3 + X3
248 x3.Add(x3, z3) // X3 := X3 + Z3
249 z3.Sub(t1, x3) // Z3 := t1 - X3
250 x3.Add(t1, x3) // X3 := t1 + X3
251 y3.Mul(p256B(), y3) // Y3 := b * Y3
252 t1.Add(t2, t2) // t1 := t2 + t2
253 t2.Add(t1, t2) // t2 := t1 + t2
254 y3.Sub(y3, t2) // Y3 := Y3 - t2
255 y3.Sub(y3, t0) // Y3 := Y3 - t0
256 t1.Add(y3, y3) // t1 := Y3 + Y3
257 y3.Add(t1, y3) // Y3 := t1 + Y3
258 t1.Add(t0, t0) // t1 := t0 + t0
259 t0.Add(t1, t0) // t0 := t1 + t0
260 t0.Sub(t0, t2) // t0 := t0 - t2
261 t1.Mul(t4, y3) // t1 := t4 * Y3
262 t2.Mul(t0, y3) // t2 := t0 * Y3
263 y3.Mul(x3, z3) // Y3 := X3 * Z3
264 y3.Add(y3, t2) // Y3 := Y3 + t2
265 x3.Mul(t3, x3) // X3 := t3 * X3
266 x3.Sub(x3, t1) // X3 := X3 - t1
267 z3.Mul(t4, z3) // Z3 := t4 * Z3
268 t1.Mul(t3, t0) // t1 := t3 * t0
269 z3.Add(z3, t1) // Z3 := Z3 + t1
270
271 q.x.Set(x3)
272 q.y.Set(y3)
273 q.z.Set(z3)
274 return q
275 }
276
277 // Double sets q = p + p, and returns q. The points may overlap.
278 func (q *P256Point) Double(p *P256Point) *P256Point {
279 // Complete addition formula for a = -3 from "Complete addition formulas for
280 // prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
281
282 t0 := (&fiat.P256Element{}).Square(&p.x) // t0 := X ^ 2
283 t1 := (&fiat.P256Element{}).Square(&p.y) // t1 := Y ^ 2
284 t2 := (&fiat.P256Element{}).Square(&p.z) // t2 := Z ^ 2
285 t3 := (&fiat.P256Element{}).Mul(&p.x, &p.y) // t3 := X * Y
286 t3.Add(t3, t3) // t3 := t3 + t3
287 z3 := (&fiat.P256Element{}).Mul(&p.x, &p.z) // Z3 := X * Z
288 z3.Add(z3, z3) // Z3 := Z3 + Z3
289 y3 := (&fiat.P256Element{}).Mul(p256B(), t2) // Y3 := b * t2
290 y3.Sub(y3, z3) // Y3 := Y3 - Z3
291 x3 := (&fiat.P256Element{}).Add(y3, y3) // X3 := Y3 + Y3
292 y3.Add(x3, y3) // Y3 := X3 + Y3
293 x3.Sub(t1, y3) // X3 := t1 - Y3
294 y3.Add(t1, y3) // Y3 := t1 + Y3
295 y3.Mul(x3, y3) // Y3 := X3 * Y3
296 x3.Mul(x3, t3) // X3 := X3 * t3
297 t3.Add(t2, t2) // t3 := t2 + t2
298 t2.Add(t2, t3) // t2 := t2 + t3
299 z3.Mul(p256B(), z3) // Z3 := b * Z3
300 z3.Sub(z3, t2) // Z3 := Z3 - t2
301 z3.Sub(z3, t0) // Z3 := Z3 - t0
302 t3.Add(z3, z3) // t3 := Z3 + Z3
303 z3.Add(z3, t3) // Z3 := Z3 + t3
304 t3.Add(t0, t0) // t3 := t0 + t0
305 t0.Add(t3, t0) // t0 := t3 + t0
306 t0.Sub(t0, t2) // t0 := t0 - t2
307 t0.Mul(t0, z3) // t0 := t0 * Z3
308 y3.Add(y3, t0) // Y3 := Y3 + t0
309 t0.Mul(&p.y, &p.z) // t0 := Y * Z
310 t0.Add(t0, t0) // t0 := t0 + t0
311 z3.Mul(t0, z3) // Z3 := t0 * Z3
312 x3.Sub(x3, z3) // X3 := X3 - Z3
313 z3.Mul(t0, t1) // Z3 := t0 * t1
314 z3.Add(z3, z3) // Z3 := Z3 + Z3
315 z3.Add(z3, z3) // Z3 := Z3 + Z3
316
317 q.x.Set(x3)
318 q.y.Set(y3)
319 q.z.Set(z3)
320 return q
321 }
322
323 // p256AffinePoint is a point in affine coordinates (x, y). x and y are still
324 // Montgomery domain elements. The point can't be the point at infinity.
325 type p256AffinePoint struct {
326 x, y fiat.P256Element
327 }
328
329 func (p *p256AffinePoint) Projective() *P256Point {
330 pp := &P256Point{x: p.x, y: p.y}
331 pp.z.One()
332 return pp
333 }
334
335 // AddAffine sets q = p1 + p2, if infinity == 0, and to p1 if infinity == 1.
336 // p2 can't be the point at infinity as it can't be represented in affine
337 // coordinates, instead callers can set p2 to an arbitrary point and set
338 // infinity to 1.
339 func (q *P256Point) AddAffine(p1 *P256Point, p2 *p256AffinePoint, infinity int) *P256Point {
340 // Complete mixed addition formula for a = -3 from "Complete addition
341 // formulas for prime order elliptic curves"
342 // (https://eprint.iacr.org/2015/1060), Algorithm 5.
343
344 t0 := (&fiat.P256Element{}).Mul(&p1.x, &p2.x) // t0 ← X1 · X2
345 t1 := (&fiat.P256Element{}).Mul(&p1.y, &p2.y) // t1 ← Y1 · Y2
346 t3 := (&fiat.P256Element{}).Add(&p2.x, &p2.y) // t3 ← X2 + Y2
347 t4 := (&fiat.P256Element{}).Add(&p1.x, &p1.y) // t4 ← X1 + Y1
348 t3.Mul(t3, t4) // t3 ← t3 · t4
349 t4.Add(t0, t1) // t4 ← t0 + t1
350 t3.Sub(t3, t4) // t3 ← t3 − t4
351 t4.Mul(&p2.y, &p1.z) // t4 ← Y2 · Z1
352 t4.Add(t4, &p1.y) // t4 ← t4 + Y1
353 y3 := (&fiat.P256Element{}).Mul(&p2.x, &p1.z) // Y3 ← X2 · Z1
354 y3.Add(y3, &p1.x) // Y3 ← Y3 + X1
355 z3 := (&fiat.P256Element{}).Mul(p256B(), &p1.z) // Z3 ← b · Z1
356 x3 := (&fiat.P256Element{}).Sub(y3, z3) // X3 ← Y3 − Z3
357 z3.Add(x3, x3) // Z3 ← X3 + X3
358 x3.Add(x3, z3) // X3 ← X3 + Z3
359 z3.Sub(t1, x3) // Z3 ← t1 − X3
360 x3.Add(t1, x3) // X3 ← t1 + X3
361 y3.Mul(p256B(), y3) // Y3 ← b · Y3
362 t1.Add(&p1.z, &p1.z) // t1 ← Z1 + Z1
363 t2 := (&fiat.P256Element{}).Add(t1, &p1.z) // t2 ← t1 + Z1
364 y3.Sub(y3, t2) // Y3 ← Y3 − t2
365 y3.Sub(y3, t0) // Y3 ← Y3 − t0
366 t1.Add(y3, y3) // t1 ← Y3 + Y3
367 y3.Add(t1, y3) // Y3 ← t1 + Y3
368 t1.Add(t0, t0) // t1 ← t0 + t0
369 t0.Add(t1, t0) // t0 ← t1 + t0
370 t0.Sub(t0, t2) // t0 ← t0 − t2
371 t1.Mul(t4, y3) // t1 ← t4 · Y3
372 t2.Mul(t0, y3) // t2 ← t0 · Y3
373 y3.Mul(x3, z3) // Y3 ← X3 · Z3
374 y3.Add(y3, t2) // Y3 ← Y3 + t2
375 x3.Mul(t3, x3) // X3 ← t3 · X3
376 x3.Sub(x3, t1) // X3 ← X3 − t1
377 z3.Mul(t4, z3) // Z3 ← t4 · Z3
378 t1.Mul(t3, t0) // t1 ← t3 · t0
379 z3.Add(z3, t1) // Z3 ← Z3 + t1
380
381 q.x.Select(&p1.x, x3, infinity)
382 q.y.Select(&p1.y, y3, infinity)
383 q.z.Select(&p1.z, z3, infinity)
384 return q
385 }
386
387 // Select sets q to p1 if cond == 1, and to p2 if cond == 0.
388 func (q *P256Point) Select(p1, p2 *P256Point, cond int) *P256Point {
389 q.x.Select(&p1.x, &p2.x, cond)
390 q.y.Select(&p1.y, &p2.y, cond)
391 q.z.Select(&p1.z, &p2.z, cond)
392 return q
393 }
394
395 // p256OrdElement is a P-256 scalar field element in [0, ord(G)-1] in the
396 // Montgomery domain (with R 2²⁵⁶) as four uint64 limbs in little-endian order.
397 type p256OrdElement [4]uint64
398
399 // SetBytes sets s to the big-endian value of x, reducing it as necessary.
400 func (s *p256OrdElement) SetBytes(x []byte) (*p256OrdElement, error) {
401 if len(x) != 32 {
402 return nil, errors.New("invalid scalar length")
403 }
404
405 s[0] = byteorder.BEUint64(x[24:])
406 s[1] = byteorder.BEUint64(x[16:])
407 s[2] = byteorder.BEUint64(x[8:])
408 s[3] = byteorder.BEUint64(x[:])
409
410 // Ensure s is in the range [0, ord(G)-1]. Since 2 * ord(G) > 2²⁵⁶, we can
411 // just conditionally subtract ord(G), keeping the result if it doesn't
412 // underflow.
413 t0, b := bits.Sub64(s[0], 0xf3b9cac2fc632551, 0)
414 t1, b := bits.Sub64(s[1], 0xbce6faada7179e84, b)
415 t2, b := bits.Sub64(s[2], 0xffffffffffffffff, b)
416 t3, b := bits.Sub64(s[3], 0xffffffff00000000, b)
417 tMask := b - 1 // zero if subtraction underflowed
418 s[0] ^= (t0 ^ s[0]) & tMask
419 s[1] ^= (t1 ^ s[1]) & tMask
420 s[2] ^= (t2 ^ s[2]) & tMask
421 s[3] ^= (t3 ^ s[3]) & tMask
422
423 return s, nil
424 }
425
426 func (s *p256OrdElement) Bytes() []byte {
427 var out [32]byte
428 byteorder.BEPutUint64(out[24:], s[0])
429 byteorder.BEPutUint64(out[16:], s[1])
430 byteorder.BEPutUint64(out[8:], s[2])
431 byteorder.BEPutUint64(out[:], s[3])
432 return out[:]
433 }
434
435 // Rsh returns the 64 least significant bits of x >> n. n must be lower
436 // than 256. The value of n leaks through timing side-channels.
437 func (s *p256OrdElement) Rsh(n int) uint64 {
438 i := n / 64
439 n = n % 64
440 res := s[i] >> n
441 // Shift in the more significant limb, if present.
442 if i := i + 1; i < len(s) {
443 res |= s[i] << (64 - n)
444 }
445 return res
446 }
447
448 // p256Table is a table of the first 16 multiples of a point. Points are stored
449 // at an index offset of -1 so [8]P is at index 7, P is at 0, and [16]P is at 15.
450 // [0]P is the point at infinity and it's not stored.
451 type p256Table [16]P256Point
452
453 // Select selects the n-th multiple of the table base point into p. It works in
454 // constant time. n must be in [0, 16]. If n is 0, p is set to the identity point.
455 func (table *p256Table) Select(p *P256Point, n uint8) {
456 if n > 16 {
457 panic("nistec: internal error: p256Table called with out-of-bounds value")
458 }
459 p.Set(NewP256Point())
460 for i := uint8(1); i <= 16; i++ {
461 cond := subtle.ConstantTimeByteEq(i, n)
462 p.Select(&table[i-1], p, cond)
463 }
464 }
465
466 // Compute populates the table to the first 16 multiples of q.
467 func (table *p256Table) Compute(q *P256Point) *p256Table {
468 table[0].Set(q)
469 for i := 1; i < 16; i += 2 {
470 table[i].Double(&table[i/2])
471 if i+1 < 16 {
472 table[i+1].Add(&table[i], q)
473 }
474 }
475 return table
476 }
477
478 func boothW5(in uint64) (uint8, int) {
479 s := ^((in >> 5) - 1)
480 d := (1 << 6) - in - 1
481 d = (d & s) | (in & (^s))
482 d = (d >> 1) + (d & 1)
483 return uint8(d), int(s & 1)
484 }
485
486 // ScalarMult sets r = scalar * q, where scalar is a 32-byte big endian value,
487 // and returns r. If scalar is not 32 bytes long, ScalarMult returns an error
488 // and the receiver is unchanged.
489 func (p *P256Point) ScalarMult(q *P256Point, scalar []byte) (*P256Point, error) {
490 s, err := (&p256OrdElement{}).SetBytes(scalar)
491 if err != nil {
492 return nil, err
493 }
494
495 // Start scanning the window from the most significant bits. We move by
496 // 5 bits at a time and need to finish at -1, so -1 + 5 * 51 = 254.
497 index := 254
498
499 sel, sign := boothW5(s.Rsh(index))
500 // sign is always zero because the boothW5 input here is at
501 // most two bits long, so the top bit is never set.
502 _ = sign
503
504 // Neither Select nor Add have exceptions for the point at infinity /
505 // selector zero, so we don't need to check for it here or in the loop.
506 table := (&p256Table{}).Compute(q)
507 table.Select(p, sel)
508
509 t := NewP256Point()
510 for index >= 4 {
511 index -= 5
512
513 p.Double(p)
514 p.Double(p)
515 p.Double(p)
516 p.Double(p)
517 p.Double(p)
518
519 if index >= 0 {
520 sel, sign = boothW5(s.Rsh(index) & 0b111111)
521 } else {
522 // Booth encoding considers a virtual zero bit at index -1,
523 // so we shift left the least significant limb.
524 wvalue := (s[0] << 1) & 0b111111
525 sel, sign = boothW5(wvalue)
526 }
527
528 table.Select(t, sel)
529 t.Negate(sign)
530 p.Add(p, t)
531 }
532
533 return p, nil
534 }
535
536 // Negate sets p to -p, if cond == 1, and to p if cond == 0.
537 func (p *P256Point) Negate(cond int) *P256Point {
538 negY := &fiat.P256Element{}
539 negY.Sub(negY, &p.y)
540 p.y.Select(negY, &p.y, cond)
541 return p
542 }
543
544 // p256AffineTable is a table of the first 32 multiples of a point. Points are
545 // stored at an index offset of -1 like in p256Table, and [0]P is not stored.
546 type p256AffineTable [32]p256AffinePoint
547
548 // Select selects the n-th multiple of the table base point into p. It works in
549 // constant time. n can be in [0, 32], but (unlike p256Table.Select) if n is 0,
550 // p is set to an undefined value.
551 func (table *p256AffineTable) Select(p *p256AffinePoint, n uint8) {
552 if n > 32 {
553 panic("nistec: internal error: p256AffineTable.Select called with out-of-bounds value")
554 }
555 for i := uint8(1); i <= 32; i++ {
556 cond := subtle.ConstantTimeByteEq(i, n)
557 p.x.Select(&table[i-1].x, &p.x, cond)
558 p.y.Select(&table[i-1].y, &p.y, cond)
559 }
560 }
561
562 // p256GeneratorTables is a series of precomputed multiples of G, the canonical
563 // generator. The first p256AffineTable contains multiples of G. The second one
564 // multiples of [2⁶]G, the third one of [2¹²]G, and so on, where each successive
565 // table is the previous table doubled six times. Six is the width of the
566 // sliding window used in ScalarBaseMult, and having each table already
567 // pre-doubled lets us avoid the doublings between windows entirely. This table
568 // aliases into p256PrecomputedEmbed.
569 var p256GeneratorTables *[43]p256AffineTable
570
571 func init() {
572 p256GeneratorTablesPtr := unsafe.Pointer(&p256PrecomputedEmbed)
573 if cpu.BigEndian {
574 var newTable [43 * 32 * 2 * 4]uint64
575 for i, x := range (*[43 * 32 * 2 * 4][8]byte)(p256GeneratorTablesPtr) {
576 newTable[i] = byteorder.LEUint64(x[:])
577 }
578 p256GeneratorTablesPtr = unsafe.Pointer(&newTable)
579 }
580 p256GeneratorTables = (*[43]p256AffineTable)(p256GeneratorTablesPtr)
581 }
582
583 func boothW6(in uint64) (uint8, int) {
584 s := ^((in >> 6) - 1)
585 d := (1 << 7) - in - 1
586 d = (d & s) | (in & (^s))
587 d = (d >> 1) + (d & 1)
588 return uint8(d), int(s & 1)
589 }
590
591 // ScalarBaseMult sets p = scalar * generator, where scalar is a 32-byte big
592 // endian value, and returns r. If scalar is not 32 bytes long, ScalarBaseMult
593 // returns an error and the receiver is unchanged.
594 func (p *P256Point) ScalarBaseMult(scalar []byte) (*P256Point, error) {
595 // This function works like ScalarMult above, but the table is fixed and
596 // "pre-doubled" for each iteration, so instead of doubling we move to the
597 // next table at each iteration.
598
599 s, err := (&p256OrdElement{}).SetBytes(scalar)
600 if err != nil {
601 return nil, err
602 }
603
604 // Start scanning the window from the most significant bits. We move by
605 // 6 bits at a time and need to finish at -1, so -1 + 6 * 42 = 251.
606 index := 251
607
608 sel, sign := boothW6(s.Rsh(index))
609 // sign is always zero because the boothW6 input here is at
610 // most five bits long, so the top bit is never set.
611 _ = sign
612
613 t := &p256AffinePoint{}
614 table := &p256GeneratorTables[(index+1)/6]
615 table.Select(t, sel)
616
617 // Select's output is undefined if the selector is zero, when it should be
618 // the point at infinity (because infinity can't be represented in affine
619 // coordinates). Here we conditionally set p to the infinity if sel is zero.
620 // In the loop, that's handled by AddAffine.
621 selIsZero := subtle.ConstantTimeByteEq(sel, 0)
622 p.Select(NewP256Point(), t.Projective(), selIsZero)
623
624 for index >= 5 {
625 index -= 6
626
627 if index >= 0 {
628 sel, sign = boothW6(s.Rsh(index) & 0b1111111)
629 } else {
630 // Booth encoding considers a virtual zero bit at index -1,
631 // so we shift left the least significant limb.
632 wvalue := (s[0] << 1) & 0b1111111
633 sel, sign = boothW6(wvalue)
634 }
635
636 table := &p256GeneratorTables[(index+1)/6]
637 table.Select(t, sel)
638 t.Negate(sign)
639 selIsZero := subtle.ConstantTimeByteEq(sel, 0)
640 p.AddAffine(p, t, selIsZero)
641 }
642
643 return p, nil
644 }
645
646 // Negate sets p to -p, if cond == 1, and to p if cond == 0.
647 func (p *p256AffinePoint) Negate(cond int) *p256AffinePoint {
648 negY := &fiat.P256Element{}
649 negY.Sub(negY, &p.y)
650 p.y.Select(negY, &p.y, cond)
651 return p
652 }
653
654 // p256Sqrt sets e to a square root of x. If x is not a square, p256Sqrt returns
655 // false and e is unchanged. e and x can overlap.
656 func p256Sqrt(e, x *fiat.P256Element) (isSquare bool) {
657 t0, t1 := &fiat.P256Element{}, &fiat.P256Element{}
658
659 // Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate.
660 //
661 // The sequence of 7 multiplications and 253 squarings is derived from the
662 // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
663 //
664 // _10 = 2*1
665 // _11 = 1 + _10
666 // _1100 = _11 << 2
667 // _1111 = _11 + _1100
668 // _11110000 = _1111 << 4
669 // _11111111 = _1111 + _11110000
670 // x16 = _11111111 << 8 + _11111111
671 // x32 = x16 << 16 + x16
672 // return ((x32 << 32 + 1) << 96 + 1) << 94
673 //
674 p256Square(t0, x, 1)
675 t0.Mul(x, t0)
676 p256Square(t1, t0, 2)
677 t0.Mul(t0, t1)
678 p256Square(t1, t0, 4)
679 t0.Mul(t0, t1)
680 p256Square(t1, t0, 8)
681 t0.Mul(t0, t1)
682 p256Square(t1, t0, 16)
683 t0.Mul(t0, t1)
684 p256Square(t0, t0, 32)
685 t0.Mul(x, t0)
686 p256Square(t0, t0, 96)
687 t0.Mul(x, t0)
688 p256Square(t0, t0, 94)
689
690 // Check if the candidate t0 is indeed a square root of x.
691 t1.Square(t0)
692 if t1.Equal(x) != 1 {
693 return false
694 }
695 e.Set(t0)
696 return true
697 }
698
699 // p256Square sets e to the square of x, repeated n times > 1.
700 func p256Square(e, x *fiat.P256Element, n int) {
701 e.Square(x)
702 for i := 1; i < n; i++ {
703 e.Square(e)
704 }
705 }
706