1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 // This file contains the Go wrapper for the constant-time, 64-bit assembly
6 // implementation of P256. The optimizations performed here are described in
7 // detail in:
8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
9 // 256-bit primes"
10 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
11 // https://eprint.iacr.org/2013/816.pdf
12 13 //go:build (amd64 || arm64 || ppc64le || s390x) && !purego
14 15 package nistec
16 17 import (
18 "crypto/internal/fips140deps/byteorder"
19 "errors"
20 "math/bits"
21 "runtime"
22 "unsafe"
23 )
24 25 // p256Element is a P-256 base field element in [0, P-1] in the Montgomery
26 // domain (with R 2²⁵⁶) as four limbs in little-endian order value.
27 type p256Element [4]uint64
28 29 // p256One is one in the Montgomery domain.
30 var p256One = p256Element{0x0000000000000001, 0xffffffff00000000,
31 0xffffffffffffffff, 0x00000000fffffffe}
32 33 var p256Zero = p256Element{}
34 35 // p256P is 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1 in the Montgomery domain.
36 var p256P = p256Element{0xffffffffffffffff, 0x00000000ffffffff,
37 0x0000000000000000, 0xffffffff00000001}
38 39 // P256Point is a P-256 point. The zero value should not be assumed to be valid
40 // (although it is in this implementation).
41 type P256Point struct {
42 // (X:Y:Z) are Jacobian coordinates where x = X/Z² and y = Y/Z³. The point
43 // at infinity can be represented by any set of coordinates with Z = 0.
44 x, y, z p256Element
45 }
46 47 // NewP256Point returns a new P256Point representing the point at infinity.
48 func NewP256Point() *P256Point {
49 return &P256Point{
50 x: p256One, y: p256One, z: p256Zero,
51 }
52 }
53 54 // SetGenerator sets p to the canonical generator and returns p.
55 func (p *P256Point) SetGenerator() *P256Point {
56 p.x = p256Element{0x79e730d418a9143c, 0x75ba95fc5fedb601,
57 0x79fb732b77622510, 0x18905f76a53755c6}
58 p.y = p256Element{0xddf25357ce95560a, 0x8b4ab8e4ba19e45c,
59 0xd2e88688dd21f325, 0x8571ff1825885d85}
60 p.z = p256One
61 return p
62 }
63 64 // Set sets p = q and returns p.
65 func (p *P256Point) Set(q *P256Point) *P256Point {
66 p.x, p.y, p.z = q.x, q.y, q.z
67 return p
68 }
69 70 const p256ElementLength = 32
71 const p256UncompressedLength = 1 + 2*p256ElementLength
72 const p256CompressedLength = 1 + p256ElementLength
73 74 // SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
75 // b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
76 // the curve, it returns nil and an error, and the receiver is unchanged.
77 // Otherwise, it returns p.
78 func (p *P256Point) SetBytes(b []byte) (*P256Point, error) {
79 // p256Mul operates in the Montgomery domain with R = 2²⁵⁶ mod p. Thus rr
80 // here is R in the Montgomery domain, or R×R mod p. See comment in
81 // P256OrdInverse about how this is used.
82 rr := p256Element{0x0000000000000003, 0xfffffffbffffffff,
83 0xfffffffffffffffe, 0x00000004fffffffd}
84 85 switch {
86 // Point at infinity.
87 case len(b) == 1 && b[0] == 0:
88 return p.Set(NewP256Point()), nil
89 90 // Uncompressed form.
91 case len(b) == p256UncompressedLength && b[0] == 4:
92 var r P256Point
93 p256BigToLittle(&r.x, (*[32]byte)(b[1:33]))
94 p256BigToLittle(&r.y, (*[32]byte)(b[33:65]))
95 if p256LessThanP(&r.x) == 0 || p256LessThanP(&r.y) == 0 {
96 return nil, errors.New("invalid P256 element encoding")
97 }
98 p256Mul(&r.x, &r.x, &rr)
99 p256Mul(&r.y, &r.y, &rr)
100 if err := p256CheckOnCurve(&r.x, &r.y); err != nil {
101 return nil, err
102 }
103 r.z = p256One
104 return p.Set(&r), nil
105 106 // Compressed form.
107 case len(b) == p256CompressedLength && (b[0] == 2 || b[0] == 3):
108 var r P256Point
109 p256BigToLittle(&r.x, (*[32]byte)(b[1:33]))
110 if p256LessThanP(&r.x) == 0 {
111 return nil, errors.New("invalid P256 element encoding")
112 }
113 p256Mul(&r.x, &r.x, &rr)
114 115 // y² = x³ - 3x + b
116 p256Polynomial(&r.y, &r.x)
117 if !p256Sqrt(&r.y, &r.y) {
118 return nil, errors.New("invalid P256 compressed point encoding")
119 }
120 121 // Select the positive or negative root, as indicated by the least
122 // significant bit, based on the encoding type byte.
123 yy := &p256Element{}
124 p256FromMont(yy, &r.y)
125 cond := int(yy[0]&1) ^ int(b[0]&1)
126 p256NegCond(&r.y, cond)
127 128 r.z = p256One
129 return p.Set(&r), nil
130 131 default:
132 return nil, errors.New("invalid P256 point encoding")
133 }
134 }
135 136 // p256Polynomial sets y2 to x³ - 3x + b, and returns y2.
137 func p256Polynomial(y2, x *p256Element) *p256Element {
138 x3 := &p256Element{}
139 p256Sqr(x3, x, 1)
140 p256Mul(x3, x3, x)
141 142 threeX := &p256Element{}
143 p256Add(threeX, x, x)
144 p256Add(threeX, threeX, x)
145 p256NegCond(threeX, 1)
146 147 p256B := &p256Element{0xd89cdf6229c4bddf, 0xacf005cd78843090,
148 0xe5a220abf7212ed6, 0xdc30061d04874834}
149 150 p256Add(x3, x3, threeX)
151 p256Add(x3, x3, p256B)
152 153 *y2 = *x3
154 return y2
155 }
156 157 func p256CheckOnCurve(x, y *p256Element) error {
158 // y² = x³ - 3x + b
159 rhs := p256Polynomial(&p256Element{}, x)
160 lhs := &p256Element{}
161 p256Sqr(lhs, y, 1)
162 if p256Equal(lhs, rhs) != 1 {
163 return errors.New("P256 point not on curve")
164 }
165 return nil
166 }
167 168 // p256LessThanP returns 1 if x < p, and 0 otherwise. Note that a p256Element is
169 // not allowed to be equal to or greater than p, so if this function returns 0
170 // then x is invalid.
171 func p256LessThanP(x *p256Element) int {
172 var b uint64
173 _, b = bits.Sub64(x[0], p256P[0], b)
174 _, b = bits.Sub64(x[1], p256P[1], b)
175 _, b = bits.Sub64(x[2], p256P[2], b)
176 _, b = bits.Sub64(x[3], p256P[3], b)
177 return int(b)
178 }
179 180 func p256BigToLittle(l *p256Element, b *[32]byte) {
181 bytesToLimbs((*[4]uint64)(l), b)
182 }
183 184 func bytesToLimbs(l *[4]uint64, b *[32]byte) {
185 l[0] = byteorder.BEUint64(b[24:])
186 l[1] = byteorder.BEUint64(b[16:])
187 l[2] = byteorder.BEUint64(b[8:])
188 l[3] = byteorder.BEUint64(b[:])
189 }
190 191 func p256LittleToBig(b *[32]byte, l *p256Element) {
192 limbsToBytes(b, (*[4]uint64)(l))
193 }
194 195 func limbsToBytes(b *[32]byte, l *[4]uint64) {
196 byteorder.BEPutUint64(b[24:], l[0])
197 byteorder.BEPutUint64(b[16:], l[1])
198 byteorder.BEPutUint64(b[8:], l[2])
199 byteorder.BEPutUint64(b[:], l[3])
200 }
201 202 // p256Add sets res = x + y.
203 func p256Add(res, x, y *p256Element) {
204 var c, b uint64
205 t1 := []uint64{:4}
206 t1[0], c = bits.Add64(x[0], y[0], 0)
207 t1[1], c = bits.Add64(x[1], y[1], c)
208 t1[2], c = bits.Add64(x[2], y[2], c)
209 t1[3], c = bits.Add64(x[3], y[3], c)
210 t2 := []uint64{:4}
211 t2[0], b = bits.Sub64(t1[0], p256P[0], 0)
212 t2[1], b = bits.Sub64(t1[1], p256P[1], b)
213 t2[2], b = bits.Sub64(t1[2], p256P[2], b)
214 t2[3], b = bits.Sub64(t1[3], p256P[3], b)
215 // Three options:
216 // - a+b < p
217 // then c is 0, b is 1, and t1 is correct
218 // - p <= a+b < 2^256
219 // then c is 0, b is 0, and t2 is correct
220 // - 2^256 <= a+b
221 // then c is 1, b is 1, and t2 is correct
222 t2Mask := (c ^ b) - 1
223 res[0] = (t1[0] & ^t2Mask) | (t2[0] & t2Mask)
224 res[1] = (t1[1] & ^t2Mask) | (t2[1] & t2Mask)
225 res[2] = (t1[2] & ^t2Mask) | (t2[2] & t2Mask)
226 res[3] = (t1[3] & ^t2Mask) | (t2[3] & t2Mask)
227 }
228 229 // p256Sqrt sets e to a square root of x. If x is not a square, p256Sqrt returns
230 // false and e is unchanged. e and x can overlap.
231 func p256Sqrt(e, x *p256Element) (isSquare bool) {
232 t0, t1 := &p256Element{}, &p256Element{}
233 234 // Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate.
235 //
236 // The sequence of 7 multiplications and 253 squarings is derived from the
237 // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
238 //
239 // _10 = 2*1
240 // _11 = 1 + _10
241 // _1100 = _11 << 2
242 // _1111 = _11 + _1100
243 // _11110000 = _1111 << 4
244 // _11111111 = _1111 + _11110000
245 // x16 = _11111111 << 8 + _11111111
246 // x32 = x16 << 16 + x16
247 // return ((x32 << 32 + 1) << 96 + 1) << 94
248 //
249 p256Sqr(t0, x, 1)
250 p256Mul(t0, x, t0)
251 p256Sqr(t1, t0, 2)
252 p256Mul(t0, t0, t1)
253 p256Sqr(t1, t0, 4)
254 p256Mul(t0, t0, t1)
255 p256Sqr(t1, t0, 8)
256 p256Mul(t0, t0, t1)
257 p256Sqr(t1, t0, 16)
258 p256Mul(t0, t0, t1)
259 p256Sqr(t0, t0, 32)
260 p256Mul(t0, x, t0)
261 p256Sqr(t0, t0, 96)
262 p256Mul(t0, x, t0)
263 p256Sqr(t0, t0, 94)
264 265 p256Sqr(t1, t0, 1)
266 if p256Equal(t1, x) != 1 {
267 return false
268 }
269 *e = *t0
270 return true
271 }
272 273 // The following assembly functions are implemented in p256_asm_*.s
274 275 // Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p.
276 //
277 //go:noescape
278 func p256Mul(res, in1, in2 *p256Element)
279 280 // Montgomery square, repeated n times (n >= 1).
281 //
282 //go:noescape
283 func p256Sqr(res, in *p256Element, n int)
284 285 // Montgomery multiplication by R⁻¹, or 1 outside the domain.
286 // Sets res = in * R⁻¹, bringing res out of the Montgomery domain.
287 //
288 //go:noescape
289 func p256FromMont(res, in *p256Element)
290 291 // If cond is not 0, sets val = -val mod p.
292 //
293 //go:noescape
294 func p256NegCond(val *p256Element, cond int)
295 296 // If cond is 0, sets res = b, otherwise sets res = a.
297 //
298 //go:noescape
299 func p256MovCond(res, a, b *P256Point, cond int)
300 301 // p256Table is a table of the first 16 multiples of a point. Points are stored
302 // at an index offset of -1 so [8]P is at index 7, P is at 0, and [16]P is at 15.
303 // [0]P is the point at infinity and it's not stored.
304 type p256Table [16]P256Point
305 306 // p256Select sets res to the point at index idx in the table.
307 // idx must be in [0, 15]. It executes in constant time.
308 //
309 //go:noescape
310 func p256Select(res *P256Point, table *p256Table, idx int)
311 312 // p256AffinePoint is a point in affine coordinates (x, y). x and y are still
313 // Montgomery domain elements. The point can't be the point at infinity.
314 type p256AffinePoint struct {
315 x, y p256Element
316 }
317 318 // p256AffineTable is a table of the first 32 multiples of a point. Points are
319 // stored at an index offset of -1 like in p256Table, and [0]P is not stored.
320 type p256AffineTable [32]p256AffinePoint
321 322 // p256Precomputed is a series of precomputed multiples of G, the canonical
323 // generator. The first p256AffineTable contains multiples of G. The second one
324 // multiples of [2⁶]G, the third one of [2¹²]G, and so on, where each successive
325 // table is the previous table doubled six times. Six is the width of the
326 // sliding window used in p256ScalarBaseMult, and having each table already
327 // pre-doubled lets us avoid the doublings between windows entirely. This table
328 // aliases into p256PrecomputedEmbed.
329 var p256Precomputed *[43]p256AffineTable
330 331 func init() {
332 p256PrecomputedPtr := unsafe.Pointer(&p256PrecomputedEmbed)
333 if runtime.GOARCH == "s390x" {
334 var newTable [43 * 32 * 2 * 4]uint64
335 for i, x := range (*[43 * 32 * 2 * 4][8]byte)(p256PrecomputedPtr) {
336 newTable[i] = byteorder.LEUint64(x[:])
337 }
338 p256PrecomputedPtr = unsafe.Pointer(&newTable)
339 }
340 p256Precomputed = (*[43]p256AffineTable)(p256PrecomputedPtr)
341 }
342 343 // p256SelectAffine sets res to the point at index idx in the table.
344 // idx must be in [0, 31]. It executes in constant time.
345 //
346 //go:noescape
347 func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int)
348 349 // Point addition with an affine point and constant time conditions.
350 // If zero is 0, sets res = in2. If sel is 0, sets res = in1.
351 // If sign is not 0, sets res = in1 + -in2. Otherwise, sets res = in1 + in2
352 //
353 //go:noescape
354 func p256PointAddAffineAsm(res, in1 *P256Point, in2 *p256AffinePoint, sign, sel, zero int)
355 356 // Point addition. Sets res = in1 + in2. Returns one if the two input points
357 // were equal and zero otherwise. If in1 or in2 are the point at infinity, res
358 // and the return value are undefined.
359 //
360 //go:noescape
361 func p256PointAddAsm(res, in1, in2 *P256Point) int
362 363 // Point doubling. Sets res = in + in. in can be the point at infinity.
364 //
365 //go:noescape
366 func p256PointDoubleAsm(res, in *P256Point)
367 368 // p256OrdElement is a P-256 scalar field element in [0, ord(G)-1] in the
369 // Montgomery domain (with R 2²⁵⁶) as four uint64 limbs in little-endian order.
370 type p256OrdElement [4]uint64
371 372 // p256OrdReduce ensures s is in the range [0, ord(G)-1].
373 func p256OrdReduce(s *p256OrdElement) {
374 // Since 2 * ord(G) > 2²⁵⁶, we can just conditionally subtract ord(G),
375 // keeping the result if it doesn't underflow.
376 t0, b := bits.Sub64(s[0], 0xf3b9cac2fc632551, 0)
377 t1, b := bits.Sub64(s[1], 0xbce6faada7179e84, b)
378 t2, b := bits.Sub64(s[2], 0xffffffffffffffff, b)
379 t3, b := bits.Sub64(s[3], 0xffffffff00000000, b)
380 tMask := b - 1 // zero if subtraction underflowed
381 s[0] ^= (t0 ^ s[0]) & tMask
382 s[1] ^= (t1 ^ s[1]) & tMask
383 s[2] ^= (t2 ^ s[2]) & tMask
384 s[3] ^= (t3 ^ s[3]) & tMask
385 }
386 387 func p256OrdLittleToBig(b *[32]byte, l *p256OrdElement) {
388 limbsToBytes(b, (*[4]uint64)(l))
389 }
390 391 func p256OrdBigToLittle(l *p256OrdElement, b *[32]byte) {
392 bytesToLimbs((*[4]uint64)(l), b)
393 }
394 395 // Add sets q = p1 + p2, and returns q. The points may overlap.
396 func (q *P256Point) Add(r1, r2 *P256Point) *P256Point {
397 var sum, double P256Point
398 r1IsInfinity := r1.isInfinity()
399 r2IsInfinity := r2.isInfinity()
400 pointsEqual := p256PointAddAsm(&sum, r1, r2)
401 p256PointDoubleAsm(&double, r1)
402 p256MovCond(&sum, &double, &sum, pointsEqual)
403 p256MovCond(&sum, r1, &sum, r2IsInfinity)
404 p256MovCond(&sum, r2, &sum, r1IsInfinity)
405 return q.Set(&sum)
406 }
407 408 // Double sets q = p + p, and returns q. The points may overlap.
409 func (q *P256Point) Double(p *P256Point) *P256Point {
410 var double P256Point
411 p256PointDoubleAsm(&double, p)
412 return q.Set(&double)
413 }
414 415 // ScalarBaseMult sets r = scalar * generator, where scalar is a 32-byte big
416 // endian value, and returns r. If scalar is not 32 bytes long, ScalarBaseMult
417 // returns an error and the receiver is unchanged.
418 func (r *P256Point) ScalarBaseMult(scalar []byte) (*P256Point, error) {
419 if len(scalar) != 32 {
420 return nil, errors.New("invalid scalar length")
421 }
422 scalarReversed := &p256OrdElement{}
423 p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar))
424 p256OrdReduce(scalarReversed)
425 426 r.p256BaseMult(scalarReversed)
427 return r, nil
428 }
429 430 // ScalarMult sets r = scalar * q, where scalar is a 32-byte big endian value,
431 // and returns r. If scalar is not 32 bytes long, ScalarBaseMult returns an
432 // error and the receiver is unchanged.
433 func (r *P256Point) ScalarMult(q *P256Point, scalar []byte) (*P256Point, error) {
434 if len(scalar) != 32 {
435 return nil, errors.New("invalid scalar length")
436 }
437 scalarReversed := &p256OrdElement{}
438 p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar))
439 p256OrdReduce(scalarReversed)
440 441 r.Set(q).p256ScalarMult(scalarReversed)
442 return r, nil
443 }
444 445 // uint64IsZero returns 1 if x is zero and zero otherwise.
446 func uint64IsZero(x uint64) int {
447 x = ^x
448 x &= x >> 32
449 x &= x >> 16
450 x &= x >> 8
451 x &= x >> 4
452 x &= x >> 2
453 x &= x >> 1
454 return int(x & 1)
455 }
456 457 // p256Equal returns 1 if a and b are equal and 0 otherwise.
458 func p256Equal(a, b *p256Element) int {
459 var acc uint64
460 for i := range a {
461 acc |= a[i] ^ b[i]
462 }
463 return uint64IsZero(acc)
464 }
465 466 // isInfinity returns 1 if p is the point at infinity and 0 otherwise.
467 func (p *P256Point) isInfinity() int {
468 return p256Equal(&p.z, &p256Zero)
469 }
470 471 // Bytes returns the uncompressed or infinity encoding of p, as specified in
472 // SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
473 // infinity is shorter than all other encodings.
474 func (p *P256Point) Bytes() []byte {
475 // This function is outlined to make the allocations inline in the caller
476 // rather than happen on the heap.
477 var out [p256UncompressedLength]byte
478 return p.bytes(&out)
479 }
480 481 func (p *P256Point) bytes(out *[p256UncompressedLength]byte) []byte {
482 // The proper representation of the point at infinity is a single zero byte.
483 if p.isInfinity() == 1 {
484 return append(out[:0], 0)
485 }
486 487 x, y := &p256Element{}, &p256Element{}
488 p.affineFromMont(x, y)
489 490 out[0] = 4 // Uncompressed form.
491 p256LittleToBig((*[32]byte)(out[1:33]), x)
492 p256LittleToBig((*[32]byte)(out[33:65]), y)
493 494 return out[:]
495 }
496 497 // affineFromMont sets (x, y) to the affine coordinates of p, converted out of the
498 // Montgomery domain.
499 func (p *P256Point) affineFromMont(x, y *p256Element) {
500 p256Inverse(y, &p.z)
501 p256Sqr(x, y, 1)
502 p256Mul(y, y, x)
503 504 p256Mul(x, &p.x, x)
505 p256Mul(y, &p.y, y)
506 507 p256FromMont(x, x)
508 p256FromMont(y, y)
509 }
510 511 // BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1,
512 // Version 2.0, Section 2.3.5, or an error if p is the point at infinity.
513 func (p *P256Point) BytesX() ([]byte, error) {
514 // This function is outlined to make the allocations inline in the caller
515 // rather than happen on the heap.
516 var out [p256ElementLength]byte
517 return p.bytesX(&out)
518 }
519 520 func (p *P256Point) bytesX(out *[p256ElementLength]byte) ([]byte, error) {
521 if p.isInfinity() == 1 {
522 return nil, errors.New("P256 point is the point at infinity")
523 }
524 525 x := &p256Element{}
526 p256Inverse(x, &p.z)
527 p256Sqr(x, x, 1)
528 p256Mul(x, &p.x, x)
529 p256FromMont(x, x)
530 p256LittleToBig((*[32]byte)(out[:]), x)
531 532 return out[:], nil
533 }
534 535 // BytesCompressed returns the compressed or infinity encoding of p, as
536 // specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the
537 // point at infinity is shorter than all other encodings.
538 func (p *P256Point) BytesCompressed() []byte {
539 // This function is outlined to make the allocations inline in the caller
540 // rather than happen on the heap.
541 var out [p256CompressedLength]byte
542 return p.bytesCompressed(&out)
543 }
544 545 func (p *P256Point) bytesCompressed(out *[p256CompressedLength]byte) []byte {
546 if p.isInfinity() == 1 {
547 return append(out[:0], 0)
548 }
549 550 x, y := &p256Element{}, &p256Element{}
551 p.affineFromMont(x, y)
552 553 out[0] = 2 | byte(y[0]&1)
554 p256LittleToBig((*[32]byte)(out[1:33]), x)
555 556 return out[:]
557 }
558 559 // Select sets q to p1 if cond == 1, and to p2 if cond == 0.
560 func (q *P256Point) Select(p1, p2 *P256Point, cond int) *P256Point {
561 p256MovCond(q, p1, p2, cond)
562 return q
563 }
564 565 // p256Inverse sets out to in⁻¹ mod p. If in is zero, out will be zero.
566 func p256Inverse(out, in *p256Element) {
567 // Inversion is calculated through exponentiation by p - 2, per Fermat's
568 // little theorem.
569 //
570 // The sequence of 12 multiplications and 255 squarings is derived from the
571 // following addition chain generated with github.com/mmcloughlin/addchain
572 // v0.4.0.
573 //
574 // _10 = 2*1
575 // _11 = 1 + _10
576 // _110 = 2*_11
577 // _111 = 1 + _110
578 // _111000 = _111 << 3
579 // _111111 = _111 + _111000
580 // x12 = _111111 << 6 + _111111
581 // x15 = x12 << 3 + _111
582 // x16 = 2*x15 + 1
583 // x32 = x16 << 16 + x16
584 // i53 = x32 << 15
585 // x47 = x15 + i53
586 // i263 = ((i53 << 17 + 1) << 143 + x47) << 47
587 // return (x47 + i263) << 2 + 1
588 //
589 var z = &p256Element{}
590 var t0 = &p256Element{}
591 var t1 = &p256Element{}
592 593 p256Sqr(z, in, 1)
594 p256Mul(z, in, z)
595 p256Sqr(z, z, 1)
596 p256Mul(z, in, z)
597 p256Sqr(t0, z, 3)
598 p256Mul(t0, z, t0)
599 p256Sqr(t1, t0, 6)
600 p256Mul(t0, t0, t1)
601 p256Sqr(t0, t0, 3)
602 p256Mul(z, z, t0)
603 p256Sqr(t0, z, 1)
604 p256Mul(t0, in, t0)
605 p256Sqr(t1, t0, 16)
606 p256Mul(t0, t0, t1)
607 p256Sqr(t0, t0, 15)
608 p256Mul(z, z, t0)
609 p256Sqr(t0, t0, 17)
610 p256Mul(t0, in, t0)
611 p256Sqr(t0, t0, 143)
612 p256Mul(t0, z, t0)
613 p256Sqr(t0, t0, 47)
614 p256Mul(z, z, t0)
615 p256Sqr(z, z, 2)
616 p256Mul(out, in, z)
617 }
618 619 func boothW5(in uint) (int, int) {
620 var s uint = ^((in >> 5) - 1)
621 var d uint = (1 << 6) - in - 1
622 d = (d & s) | (in & (^s))
623 d = (d >> 1) + (d & 1)
624 return int(d), int(s & 1)
625 }
626 627 func boothW6(in uint) (int, int) {
628 var s uint = ^((in >> 6) - 1)
629 var d uint = (1 << 7) - in - 1
630 d = (d & s) | (in & (^s))
631 d = (d >> 1) + (d & 1)
632 return int(d), int(s & 1)
633 }
634 635 func (p *P256Point) p256BaseMult(scalar *p256OrdElement) {
636 var t0 p256AffinePoint
637 638 wvalue := (scalar[0] << 1) & 0x7f
639 sel, sign := boothW6(uint(wvalue))
640 p256SelectAffine(&t0, &p256Precomputed[0], sel)
641 p.x, p.y, p.z = t0.x, t0.y, p256One
642 p256NegCond(&p.y, sign)
643 644 index := uint(5)
645 zero := sel
646 647 for i := 1; i < 43; i++ {
648 if index < 192 {
649 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
650 } else {
651 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
652 }
653 index += 6
654 sel, sign = boothW6(uint(wvalue))
655 p256SelectAffine(&t0, &p256Precomputed[i], sel)
656 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
657 zero |= sel
658 }
659 660 // If the whole scalar was zero, set to the point at infinity.
661 p256MovCond(p, p, NewP256Point(), zero)
662 }
663 664 func (p *P256Point) p256ScalarMult(scalar *p256OrdElement) {
665 // precomp is a table of precomputed points that stores powers of p
666 // from p^1 to p^16.
667 var precomp p256Table
668 var t0, t1, t2, t3 P256Point
669 670 // Prepare the table
671 precomp[0] = *p // 1
672 673 p256PointDoubleAsm(&t0, p)
674 p256PointDoubleAsm(&t1, &t0)
675 p256PointDoubleAsm(&t2, &t1)
676 p256PointDoubleAsm(&t3, &t2)
677 precomp[1] = t0 // 2
678 precomp[3] = t1 // 4
679 precomp[7] = t2 // 8
680 precomp[15] = t3 // 16
681 682 p256PointAddAsm(&t0, &t0, p)
683 p256PointAddAsm(&t1, &t1, p)
684 p256PointAddAsm(&t2, &t2, p)
685 precomp[2] = t0 // 3
686 precomp[4] = t1 // 5
687 precomp[8] = t2 // 9
688 689 p256PointDoubleAsm(&t0, &t0)
690 p256PointDoubleAsm(&t1, &t1)
691 precomp[5] = t0 // 6
692 precomp[9] = t1 // 10
693 694 p256PointAddAsm(&t2, &t0, p)
695 p256PointAddAsm(&t1, &t1, p)
696 precomp[6] = t2 // 7
697 precomp[10] = t1 // 11
698 699 p256PointDoubleAsm(&t0, &t0)
700 p256PointDoubleAsm(&t2, &t2)
701 precomp[11] = t0 // 12
702 precomp[13] = t2 // 14
703 704 p256PointAddAsm(&t0, &t0, p)
705 p256PointAddAsm(&t2, &t2, p)
706 precomp[12] = t0 // 13
707 precomp[14] = t2 // 15
708 709 // Start scanning the window from top bit
710 index := uint(254)
711 var sel, sign int
712 713 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
714 sel, _ = boothW5(uint(wvalue))
715 716 p256Select(p, &precomp, sel)
717 zero := sel
718 719 for index > 4 {
720 index -= 5
721 p256PointDoubleAsm(p, p)
722 p256PointDoubleAsm(p, p)
723 p256PointDoubleAsm(p, p)
724 p256PointDoubleAsm(p, p)
725 p256PointDoubleAsm(p, p)
726 727 if index < 192 {
728 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
729 } else {
730 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
731 }
732 733 sel, sign = boothW5(uint(wvalue))
734 735 p256Select(&t0, &precomp, sel)
736 p256NegCond(&t0.y, sign)
737 p256PointAddAsm(&t1, p, &t0)
738 p256MovCond(&t1, &t1, p, sel)
739 p256MovCond(p, &t1, &t0, zero)
740 zero |= sel
741 }
742 743 p256PointDoubleAsm(p, p)
744 p256PointDoubleAsm(p, p)
745 p256PointDoubleAsm(p, p)
746 p256PointDoubleAsm(p, p)
747 p256PointDoubleAsm(p, p)
748 749 wvalue = (scalar[0] << 1) & 0x3f
750 sel, sign = boothW5(uint(wvalue))
751 752 p256Select(&t0, &precomp, sel)
753 p256NegCond(&t0.y, sign)
754 p256PointAddAsm(&t1, p, &t0)
755 p256MovCond(&t1, &t1, p, sel)
756 p256MovCond(p, &t1, &t0, zero)
757 }
758