p256.mx raw

   1  // Copyright 2022 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build (!amd64 && !arm64 && !ppc64le && !s390x) || purego
   6  
   7  package nistec
   8  
   9  import (
  10  	"crypto/internal/fips140/nistec/fiat"
  11  	"crypto/internal/fips140/subtle"
  12  	"crypto/internal/fips140deps/byteorder"
  13  	"crypto/internal/fips140deps/cpu"
  14  	"errors"
  15  	"math/bits"
  16  	"sync"
  17  	"unsafe"
  18  )
  19  
  20  // P256Point is a P-256 point. The zero value is NOT valid.
  21  type P256Point struct {
  22  	// The point is represented in projective coordinates (X:Y:Z), where x = X/Z
  23  	// and y = Y/Z. Infinity is (0:1:0).
  24  	//
  25  	// fiat.P256Element is a base field element in [0, P-1] in the Montgomery
  26  	// domain (with R 2²⁵⁶ and P 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1) as four limbs in
  27  	// little-endian order value.
  28  	x, y, z fiat.P256Element
  29  }
  30  
  31  // NewP256Point returns a new P256Point representing the point at infinity point.
  32  func NewP256Point() *P256Point {
  33  	p := &P256Point{}
  34  	p.y.One()
  35  	return p
  36  }
  37  
  38  // SetGenerator sets p to the canonical generator and returns p.
  39  func (p *P256Point) SetGenerator() *P256Point {
  40  	p.x.SetBytes([]byte{0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, 0x77, 0x3, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96})
  41  	p.y.SetBytes([]byte{0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0xf, 0x9e, 0x16, 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5})
  42  	p.z.One()
  43  	return p
  44  }
  45  
  46  // Set sets p = q and returns p.
  47  func (p *P256Point) Set(q *P256Point) *P256Point {
  48  	p.x.Set(&q.x)
  49  	p.y.Set(&q.y)
  50  	p.z.Set(&q.z)
  51  	return p
  52  }
  53  
  54  const p256ElementLength = 32
  55  const p256UncompressedLength = 1 + 2*p256ElementLength
  56  const p256CompressedLength = 1 + p256ElementLength
  57  
  58  // SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
  59  // b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
  60  // the curve, it returns nil and an error, and the receiver is unchanged.
  61  // Otherwise, it returns p.
  62  func (p *P256Point) SetBytes(b []byte) (*P256Point, error) {
  63  	switch {
  64  	// Point at infinity.
  65  	case len(b) == 1 && b[0] == 0:
  66  		return p.Set(NewP256Point()), nil
  67  
  68  	// Uncompressed form.
  69  	case len(b) == p256UncompressedLength && b[0] == 4:
  70  		x, err := (&fiat.P256Element{}).SetBytes(b[1 : 1+p256ElementLength])
  71  		if err != nil {
  72  			return nil, err
  73  		}
  74  		y, err := (&fiat.P256Element{}).SetBytes(b[1+p256ElementLength:])
  75  		if err != nil {
  76  			return nil, err
  77  		}
  78  		if err := p256CheckOnCurve(x, y); err != nil {
  79  			return nil, err
  80  		}
  81  		p.x.Set(x)
  82  		p.y.Set(y)
  83  		p.z.One()
  84  		return p, nil
  85  
  86  	// Compressed form.
  87  	case len(b) == p256CompressedLength && (b[0] == 2 || b[0] == 3):
  88  		x, err := (&fiat.P256Element{}).SetBytes(b[1:])
  89  		if err != nil {
  90  			return nil, err
  91  		}
  92  
  93  		// y² = x³ - 3x + b
  94  		y := p256Polynomial(&fiat.P256Element{}, x)
  95  		if !p256Sqrt(y, y) {
  96  			return nil, errors.New("invalid P256 compressed point encoding")
  97  		}
  98  
  99  		// Select the positive or negative root, as indicated by the least
 100  		// significant bit, based on the encoding type byte.
 101  		otherRoot := &fiat.P256Element{}
 102  		otherRoot.Sub(otherRoot, y)
 103  		cond := y.Bytes()[p256ElementLength-1]&1 ^ b[0]&1
 104  		y.Select(otherRoot, y, int(cond))
 105  
 106  		p.x.Set(x)
 107  		p.y.Set(y)
 108  		p.z.One()
 109  		return p, nil
 110  
 111  	default:
 112  		return nil, errors.New("invalid P256 point encoding")
 113  	}
 114  }
 115  
 116  var _p256B *fiat.P256Element
 117  var _p256BOnce sync.Once
 118  
 119  func p256B() *fiat.P256Element {
 120  	_p256BOnce.Do(func() {
 121  		_p256B, _ = (&fiat.P256Element{}).SetBytes([]byte{0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, 0x65, 0x1d, 0x6, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b})
 122  	})
 123  	return _p256B
 124  }
 125  
 126  // p256Polynomial sets y2 to x³ - 3x + b, and returns y2.
 127  func p256Polynomial(y2, x *fiat.P256Element) *fiat.P256Element {
 128  	y2.Square(x)
 129  	y2.Mul(y2, x)
 130  
 131  	threeX := (&fiat.P256Element{}).Add(x, x)
 132  	threeX.Add(threeX, x)
 133  	y2.Sub(y2, threeX)
 134  
 135  	return y2.Add(y2, p256B())
 136  }
 137  
 138  func p256CheckOnCurve(x, y *fiat.P256Element) error {
 139  	// y² = x³ - 3x + b
 140  	rhs := p256Polynomial(&fiat.P256Element{}, x)
 141  	lhs := (&fiat.P256Element{}).Square(y)
 142  	if rhs.Equal(lhs) != 1 {
 143  		return errors.New("P256 point not on curve")
 144  	}
 145  	return nil
 146  }
 147  
 148  // Bytes returns the uncompressed or infinity encoding of p, as specified in
 149  // SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
 150  // infinity is shorter than all other encodings.
 151  func (p *P256Point) Bytes() []byte {
 152  	// This function is outlined to make the allocations inline in the caller
 153  	// rather than happen on the heap.
 154  	var out [p256UncompressedLength]byte
 155  	return p.bytes(&out)
 156  }
 157  
 158  func (p *P256Point) bytes(out *[p256UncompressedLength]byte) []byte {
 159  	// The SEC 1 representation of the point at infinity is a single zero byte,
 160  	// and only infinity has z = 0.
 161  	if p.z.IsZero() == 1 {
 162  		return append(out[:0], 0)
 163  	}
 164  
 165  	zinv := (&fiat.P256Element{}).Invert(&p.z)
 166  	x := (&fiat.P256Element{}).Mul(&p.x, zinv)
 167  	y := (&fiat.P256Element{}).Mul(&p.y, zinv)
 168  
 169  	buf := append(out[:0], 4)
 170  	buf = append(buf, x.Bytes()...)
 171  	buf = append(buf, y.Bytes()...)
 172  	return buf
 173  }
 174  
 175  // BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1,
 176  // Version 2.0, Section 2.3.5, or an error if p is the point at infinity.
 177  func (p *P256Point) BytesX() ([]byte, error) {
 178  	// This function is outlined to make the allocations inline in the caller
 179  	// rather than happen on the heap.
 180  	var out [p256ElementLength]byte
 181  	return p.bytesX(&out)
 182  }
 183  
 184  func (p *P256Point) bytesX(out *[p256ElementLength]byte) ([]byte, error) {
 185  	if p.z.IsZero() == 1 {
 186  		return nil, errors.New("P256 point is the point at infinity")
 187  	}
 188  
 189  	zinv := (&fiat.P256Element{}).Invert(&p.z)
 190  	x := (&fiat.P256Element{}).Mul(&p.x, zinv)
 191  
 192  	return append(out[:0], x.Bytes()...), nil
 193  }
 194  
 195  // BytesCompressed returns the compressed or infinity encoding of p, as
 196  // specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the
 197  // point at infinity is shorter than all other encodings.
 198  func (p *P256Point) BytesCompressed() []byte {
 199  	// This function is outlined to make the allocations inline in the caller
 200  	// rather than happen on the heap.
 201  	var out [p256CompressedLength]byte
 202  	return p.bytesCompressed(&out)
 203  }
 204  
 205  func (p *P256Point) bytesCompressed(out *[p256CompressedLength]byte) []byte {
 206  	if p.z.IsZero() == 1 {
 207  		return append(out[:0], 0)
 208  	}
 209  
 210  	zinv := (&fiat.P256Element{}).Invert(&p.z)
 211  	x := (&fiat.P256Element{}).Mul(&p.x, zinv)
 212  	y := (&fiat.P256Element{}).Mul(&p.y, zinv)
 213  
 214  	// Encode the sign of the y coordinate (indicated by the least significant
 215  	// bit) as the encoding type (2 or 3).
 216  	buf := append(out[:0], 2)
 217  	buf[0] |= y.Bytes()[p256ElementLength-1] & 1
 218  	buf = append(buf, x.Bytes()...)
 219  	return buf
 220  }
 221  
 222  // Add sets q = p1 + p2, and returns q. The points may overlap.
 223  func (q *P256Point) Add(p1, p2 *P256Point) *P256Point {
 224  	// Complete addition formula for a = -3 from "Complete addition formulas for
 225  	// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
 226  
 227  	t0 := (&fiat.P256Element{}).Mul(&p1.x, &p2.x) // t0 := X1 * X2
 228  	t1 := (&fiat.P256Element{}).Mul(&p1.y, &p2.y) // t1 := Y1 * Y2
 229  	t2 := (&fiat.P256Element{}).Mul(&p1.z, &p2.z) // t2 := Z1 * Z2
 230  	t3 := (&fiat.P256Element{}).Add(&p1.x, &p1.y) // t3 := X1 + Y1
 231  	t4 := (&fiat.P256Element{}).Add(&p2.x, &p2.y) // t4 := X2 + Y2
 232  	t3.Mul(t3, t4)                                // t3 := t3 * t4
 233  	t4.Add(t0, t1)                                // t4 := t0 + t1
 234  	t3.Sub(t3, t4)                                // t3 := t3 - t4
 235  	t4.Add(&p1.y, &p1.z)                          // t4 := Y1 + Z1
 236  	x3 := (&fiat.P256Element{}).Add(&p2.y, &p2.z) // X3 := Y2 + Z2
 237  	t4.Mul(t4, x3)                                // t4 := t4 * X3
 238  	x3.Add(t1, t2)                                // X3 := t1 + t2
 239  	t4.Sub(t4, x3)                                // t4 := t4 - X3
 240  	x3.Add(&p1.x, &p1.z)                          // X3 := X1 + Z1
 241  	y3 := (&fiat.P256Element{}).Add(&p2.x, &p2.z) // Y3 := X2 + Z2
 242  	x3.Mul(x3, y3)                                // X3 := X3 * Y3
 243  	y3.Add(t0, t2)                                // Y3 := t0 + t2
 244  	y3.Sub(x3, y3)                                // Y3 := X3 - Y3
 245  	z3 := (&fiat.P256Element{}).Mul(p256B(), t2)  // Z3 := b * t2
 246  	x3.Sub(y3, z3)                                // X3 := Y3 - Z3
 247  	z3.Add(x3, x3)                                // Z3 := X3 + X3
 248  	x3.Add(x3, z3)                                // X3 := X3 + Z3
 249  	z3.Sub(t1, x3)                                // Z3 := t1 - X3
 250  	x3.Add(t1, x3)                                // X3 := t1 + X3
 251  	y3.Mul(p256B(), y3)                           // Y3 := b * Y3
 252  	t1.Add(t2, t2)                                // t1 := t2 + t2
 253  	t2.Add(t1, t2)                                // t2 := t1 + t2
 254  	y3.Sub(y3, t2)                                // Y3 := Y3 - t2
 255  	y3.Sub(y3, t0)                                // Y3 := Y3 - t0
 256  	t1.Add(y3, y3)                                // t1 := Y3 + Y3
 257  	y3.Add(t1, y3)                                // Y3 := t1 + Y3
 258  	t1.Add(t0, t0)                                // t1 := t0 + t0
 259  	t0.Add(t1, t0)                                // t0 := t1 + t0
 260  	t0.Sub(t0, t2)                                // t0 := t0 - t2
 261  	t1.Mul(t4, y3)                                // t1 := t4 * Y3
 262  	t2.Mul(t0, y3)                                // t2 := t0 * Y3
 263  	y3.Mul(x3, z3)                                // Y3 := X3 * Z3
 264  	y3.Add(y3, t2)                                // Y3 := Y3 + t2
 265  	x3.Mul(t3, x3)                                // X3 := t3 * X3
 266  	x3.Sub(x3, t1)                                // X3 := X3 - t1
 267  	z3.Mul(t4, z3)                                // Z3 := t4 * Z3
 268  	t1.Mul(t3, t0)                                // t1 := t3 * t0
 269  	z3.Add(z3, t1)                                // Z3 := Z3 + t1
 270  
 271  	q.x.Set(x3)
 272  	q.y.Set(y3)
 273  	q.z.Set(z3)
 274  	return q
 275  }
 276  
 277  // Double sets q = p + p, and returns q. The points may overlap.
 278  func (q *P256Point) Double(p *P256Point) *P256Point {
 279  	// Complete addition formula for a = -3 from "Complete addition formulas for
 280  	// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
 281  
 282  	t0 := (&fiat.P256Element{}).Square(&p.x)     // t0 := X ^ 2
 283  	t1 := (&fiat.P256Element{}).Square(&p.y)     // t1 := Y ^ 2
 284  	t2 := (&fiat.P256Element{}).Square(&p.z)     // t2 := Z ^ 2
 285  	t3 := (&fiat.P256Element{}).Mul(&p.x, &p.y)  // t3 := X * Y
 286  	t3.Add(t3, t3)                               // t3 := t3 + t3
 287  	z3 := (&fiat.P256Element{}).Mul(&p.x, &p.z)  // Z3 := X * Z
 288  	z3.Add(z3, z3)                               // Z3 := Z3 + Z3
 289  	y3 := (&fiat.P256Element{}).Mul(p256B(), t2) // Y3 := b * t2
 290  	y3.Sub(y3, z3)                               // Y3 := Y3 - Z3
 291  	x3 := (&fiat.P256Element{}).Add(y3, y3)      // X3 := Y3 + Y3
 292  	y3.Add(x3, y3)                               // Y3 := X3 + Y3
 293  	x3.Sub(t1, y3)                               // X3 := t1 - Y3
 294  	y3.Add(t1, y3)                               // Y3 := t1 + Y3
 295  	y3.Mul(x3, y3)                               // Y3 := X3 * Y3
 296  	x3.Mul(x3, t3)                               // X3 := X3 * t3
 297  	t3.Add(t2, t2)                               // t3 := t2 + t2
 298  	t2.Add(t2, t3)                               // t2 := t2 + t3
 299  	z3.Mul(p256B(), z3)                          // Z3 := b * Z3
 300  	z3.Sub(z3, t2)                               // Z3 := Z3 - t2
 301  	z3.Sub(z3, t0)                               // Z3 := Z3 - t0
 302  	t3.Add(z3, z3)                               // t3 := Z3 + Z3
 303  	z3.Add(z3, t3)                               // Z3 := Z3 + t3
 304  	t3.Add(t0, t0)                               // t3 := t0 + t0
 305  	t0.Add(t3, t0)                               // t0 := t3 + t0
 306  	t0.Sub(t0, t2)                               // t0 := t0 - t2
 307  	t0.Mul(t0, z3)                               // t0 := t0 * Z3
 308  	y3.Add(y3, t0)                               // Y3 := Y3 + t0
 309  	t0.Mul(&p.y, &p.z)                           // t0 := Y * Z
 310  	t0.Add(t0, t0)                               // t0 := t0 + t0
 311  	z3.Mul(t0, z3)                               // Z3 := t0 * Z3
 312  	x3.Sub(x3, z3)                               // X3 := X3 - Z3
 313  	z3.Mul(t0, t1)                               // Z3 := t0 * t1
 314  	z3.Add(z3, z3)                               // Z3 := Z3 + Z3
 315  	z3.Add(z3, z3)                               // Z3 := Z3 + Z3
 316  
 317  	q.x.Set(x3)
 318  	q.y.Set(y3)
 319  	q.z.Set(z3)
 320  	return q
 321  }
 322  
 323  // p256AffinePoint is a point in affine coordinates (x, y). x and y are still
 324  // Montgomery domain elements. The point can't be the point at infinity.
 325  type p256AffinePoint struct {
 326  	x, y fiat.P256Element
 327  }
 328  
 329  func (p *p256AffinePoint) Projective() *P256Point {
 330  	pp := &P256Point{x: p.x, y: p.y}
 331  	pp.z.One()
 332  	return pp
 333  }
 334  
 335  // AddAffine sets q = p1 + p2, if infinity == 0, and to p1 if infinity == 1.
 336  // p2 can't be the point at infinity as it can't be represented in affine
 337  // coordinates, instead callers can set p2 to an arbitrary point and set
 338  // infinity to 1.
 339  func (q *P256Point) AddAffine(p1 *P256Point, p2 *p256AffinePoint, infinity int) *P256Point {
 340  	// Complete mixed addition formula for a = -3 from "Complete addition
 341  	// formulas for prime order elliptic curves"
 342  	// (https://eprint.iacr.org/2015/1060), Algorithm 5.
 343  
 344  	t0 := (&fiat.P256Element{}).Mul(&p1.x, &p2.x)   // t0 ← X1 · X2
 345  	t1 := (&fiat.P256Element{}).Mul(&p1.y, &p2.y)   // t1 ← Y1 · Y2
 346  	t3 := (&fiat.P256Element{}).Add(&p2.x, &p2.y)   // t3 ← X2 + Y2
 347  	t4 := (&fiat.P256Element{}).Add(&p1.x, &p1.y)   // t4 ← X1 + Y1
 348  	t3.Mul(t3, t4)                                  // t3 ← t3 · t4
 349  	t4.Add(t0, t1)                                  // t4 ← t0 + t1
 350  	t3.Sub(t3, t4)                                  // t3 ← t3 − t4
 351  	t4.Mul(&p2.y, &p1.z)                            // t4 ← Y2 · Z1
 352  	t4.Add(t4, &p1.y)                               // t4 ← t4 + Y1
 353  	y3 := (&fiat.P256Element{}).Mul(&p2.x, &p1.z)   // Y3 ← X2 · Z1
 354  	y3.Add(y3, &p1.x)                               // Y3 ← Y3 + X1
 355  	z3 := (&fiat.P256Element{}).Mul(p256B(), &p1.z) // Z3 ← b  · Z1
 356  	x3 := (&fiat.P256Element{}).Sub(y3, z3)         // X3 ← Y3 − Z3
 357  	z3.Add(x3, x3)                                  // Z3 ← X3 + X3
 358  	x3.Add(x3, z3)                                  // X3 ← X3 + Z3
 359  	z3.Sub(t1, x3)                                  // Z3 ← t1 − X3
 360  	x3.Add(t1, x3)                                  // X3 ← t1 + X3
 361  	y3.Mul(p256B(), y3)                             // Y3 ← b  · Y3
 362  	t1.Add(&p1.z, &p1.z)                            // t1 ← Z1 + Z1
 363  	t2 := (&fiat.P256Element{}).Add(t1, &p1.z)      // t2 ← t1 + Z1
 364  	y3.Sub(y3, t2)                                  // Y3 ← Y3 − t2
 365  	y3.Sub(y3, t0)                                  // Y3 ← Y3 − t0
 366  	t1.Add(y3, y3)                                  // t1 ← Y3 + Y3
 367  	y3.Add(t1, y3)                                  // Y3 ← t1 + Y3
 368  	t1.Add(t0, t0)                                  // t1 ← t0 + t0
 369  	t0.Add(t1, t0)                                  // t0 ← t1 + t0
 370  	t0.Sub(t0, t2)                                  // t0 ← t0 − t2
 371  	t1.Mul(t4, y3)                                  // t1 ← t4 · Y3
 372  	t2.Mul(t0, y3)                                  // t2 ← t0 · Y3
 373  	y3.Mul(x3, z3)                                  // Y3 ← X3 · Z3
 374  	y3.Add(y3, t2)                                  // Y3 ← Y3 + t2
 375  	x3.Mul(t3, x3)                                  // X3 ← t3 · X3
 376  	x3.Sub(x3, t1)                                  // X3 ← X3 − t1
 377  	z3.Mul(t4, z3)                                  // Z3 ← t4 · Z3
 378  	t1.Mul(t3, t0)                                  // t1 ← t3 · t0
 379  	z3.Add(z3, t1)                                  // Z3 ← Z3 + t1
 380  
 381  	q.x.Select(&p1.x, x3, infinity)
 382  	q.y.Select(&p1.y, y3, infinity)
 383  	q.z.Select(&p1.z, z3, infinity)
 384  	return q
 385  }
 386  
 387  // Select sets q to p1 if cond == 1, and to p2 if cond == 0.
 388  func (q *P256Point) Select(p1, p2 *P256Point, cond int) *P256Point {
 389  	q.x.Select(&p1.x, &p2.x, cond)
 390  	q.y.Select(&p1.y, &p2.y, cond)
 391  	q.z.Select(&p1.z, &p2.z, cond)
 392  	return q
 393  }
 394  
 395  // p256OrdElement is a P-256 scalar field element in [0, ord(G)-1] in the
 396  // Montgomery domain (with R 2²⁵⁶) as four uint64 limbs in little-endian order.
 397  type p256OrdElement [4]uint64
 398  
 399  // SetBytes sets s to the big-endian value of x, reducing it as necessary.
 400  func (s *p256OrdElement) SetBytes(x []byte) (*p256OrdElement, error) {
 401  	if len(x) != 32 {
 402  		return nil, errors.New("invalid scalar length")
 403  	}
 404  
 405  	s[0] = byteorder.BEUint64(x[24:])
 406  	s[1] = byteorder.BEUint64(x[16:])
 407  	s[2] = byteorder.BEUint64(x[8:])
 408  	s[3] = byteorder.BEUint64(x[:])
 409  
 410  	// Ensure s is in the range [0, ord(G)-1]. Since 2 * ord(G) > 2²⁵⁶, we can
 411  	// just conditionally subtract ord(G), keeping the result if it doesn't
 412  	// underflow.
 413  	t0, b := bits.Sub64(s[0], 0xf3b9cac2fc632551, 0)
 414  	t1, b := bits.Sub64(s[1], 0xbce6faada7179e84, b)
 415  	t2, b := bits.Sub64(s[2], 0xffffffffffffffff, b)
 416  	t3, b := bits.Sub64(s[3], 0xffffffff00000000, b)
 417  	tMask := b - 1 // zero if subtraction underflowed
 418  	s[0] ^= (t0 ^ s[0]) & tMask
 419  	s[1] ^= (t1 ^ s[1]) & tMask
 420  	s[2] ^= (t2 ^ s[2]) & tMask
 421  	s[3] ^= (t3 ^ s[3]) & tMask
 422  
 423  	return s, nil
 424  }
 425  
 426  func (s *p256OrdElement) Bytes() []byte {
 427  	var out [32]byte
 428  	byteorder.BEPutUint64(out[24:], s[0])
 429  	byteorder.BEPutUint64(out[16:], s[1])
 430  	byteorder.BEPutUint64(out[8:], s[2])
 431  	byteorder.BEPutUint64(out[:], s[3])
 432  	return out[:]
 433  }
 434  
 435  // Rsh returns the 64 least significant bits of x >> n. n must be lower
 436  // than 256. The value of n leaks through timing side-channels.
 437  func (s *p256OrdElement) Rsh(n int) uint64 {
 438  	i := n / 64
 439  	n = n % 64
 440  	res := s[i] >> n
 441  	// Shift in the more significant limb, if present.
 442  	if i := i + 1; i < len(s) {
 443  		res |= s[i] << (64 - n)
 444  	}
 445  	return res
 446  }
 447  
 448  // p256Table is a table of the first 16 multiples of a point. Points are stored
 449  // at an index offset of -1 so [8]P is at index 7, P is at 0, and [16]P is at 15.
 450  // [0]P is the point at infinity and it's not stored.
 451  type p256Table [16]P256Point
 452  
 453  // Select selects the n-th multiple of the table base point into p. It works in
 454  // constant time. n must be in [0, 16]. If n is 0, p is set to the identity point.
 455  func (table *p256Table) Select(p *P256Point, n uint8) {
 456  	if n > 16 {
 457  		panic("nistec: internal error: p256Table called with out-of-bounds value")
 458  	}
 459  	p.Set(NewP256Point())
 460  	for i := uint8(1); i <= 16; i++ {
 461  		cond := subtle.ConstantTimeByteEq(i, n)
 462  		p.Select(&table[i-1], p, cond)
 463  	}
 464  }
 465  
 466  // Compute populates the table to the first 16 multiples of q.
 467  func (table *p256Table) Compute(q *P256Point) *p256Table {
 468  	table[0].Set(q)
 469  	for i := 1; i < 16; i += 2 {
 470  		table[i].Double(&table[i/2])
 471  		if i+1 < 16 {
 472  			table[i+1].Add(&table[i], q)
 473  		}
 474  	}
 475  	return table
 476  }
 477  
 478  func boothW5(in uint64) (uint8, int) {
 479  	s := ^((in >> 5) - 1)
 480  	d := (1 << 6) - in - 1
 481  	d = (d & s) | (in & (^s))
 482  	d = (d >> 1) + (d & 1)
 483  	return uint8(d), int(s & 1)
 484  }
 485  
 486  // ScalarMult sets r = scalar * q, where scalar is a 32-byte big endian value,
 487  // and returns r. If scalar is not 32 bytes long, ScalarMult returns an error
 488  // and the receiver is unchanged.
 489  func (p *P256Point) ScalarMult(q *P256Point, scalar []byte) (*P256Point, error) {
 490  	s, err := (&p256OrdElement{}).SetBytes(scalar)
 491  	if err != nil {
 492  		return nil, err
 493  	}
 494  
 495  	// Start scanning the window from the most significant bits. We move by
 496  	// 5 bits at a time and need to finish at -1, so -1 + 5 * 51 = 254.
 497  	index := 254
 498  
 499  	sel, sign := boothW5(s.Rsh(index))
 500  	// sign is always zero because the boothW5 input here is at
 501  	// most two bits long, so the top bit is never set.
 502  	_ = sign
 503  
 504  	// Neither Select nor Add have exceptions for the point at infinity /
 505  	// selector zero, so we don't need to check for it here or in the loop.
 506  	table := (&p256Table{}).Compute(q)
 507  	table.Select(p, sel)
 508  
 509  	t := NewP256Point()
 510  	for index >= 4 {
 511  		index -= 5
 512  
 513  		p.Double(p)
 514  		p.Double(p)
 515  		p.Double(p)
 516  		p.Double(p)
 517  		p.Double(p)
 518  
 519  		if index >= 0 {
 520  			sel, sign = boothW5(s.Rsh(index) & 0b111111)
 521  		} else {
 522  			// Booth encoding considers a virtual zero bit at index -1,
 523  			// so we shift left the least significant limb.
 524  			wvalue := (s[0] << 1) & 0b111111
 525  			sel, sign = boothW5(wvalue)
 526  		}
 527  
 528  		table.Select(t, sel)
 529  		t.Negate(sign)
 530  		p.Add(p, t)
 531  	}
 532  
 533  	return p, nil
 534  }
 535  
 536  // Negate sets p to -p, if cond == 1, and to p if cond == 0.
 537  func (p *P256Point) Negate(cond int) *P256Point {
 538  	negY := &fiat.P256Element{}
 539  	negY.Sub(negY, &p.y)
 540  	p.y.Select(negY, &p.y, cond)
 541  	return p
 542  }
 543  
 544  // p256AffineTable is a table of the first 32 multiples of a point. Points are
 545  // stored at an index offset of -1 like in p256Table, and [0]P is not stored.
 546  type p256AffineTable [32]p256AffinePoint
 547  
 548  // Select selects the n-th multiple of the table base point into p. It works in
 549  // constant time. n can be in [0, 32], but (unlike p256Table.Select) if n is 0,
 550  // p is set to an undefined value.
 551  func (table *p256AffineTable) Select(p *p256AffinePoint, n uint8) {
 552  	if n > 32 {
 553  		panic("nistec: internal error: p256AffineTable.Select called with out-of-bounds value")
 554  	}
 555  	for i := uint8(1); i <= 32; i++ {
 556  		cond := subtle.ConstantTimeByteEq(i, n)
 557  		p.x.Select(&table[i-1].x, &p.x, cond)
 558  		p.y.Select(&table[i-1].y, &p.y, cond)
 559  	}
 560  }
 561  
 562  // p256GeneratorTables is a series of precomputed multiples of G, the canonical
 563  // generator. The first p256AffineTable contains multiples of G. The second one
 564  // multiples of [2⁶]G, the third one of [2¹²]G, and so on, where each successive
 565  // table is the previous table doubled six times. Six is the width of the
 566  // sliding window used in ScalarBaseMult, and having each table already
 567  // pre-doubled lets us avoid the doublings between windows entirely. This table
 568  // aliases into p256PrecomputedEmbed.
 569  var p256GeneratorTables *[43]p256AffineTable
 570  
 571  func init() {
 572  	p256GeneratorTablesPtr := unsafe.Pointer(&p256PrecomputedEmbed)
 573  	if cpu.BigEndian {
 574  		var newTable [43 * 32 * 2 * 4]uint64
 575  		for i, x := range (*[43 * 32 * 2 * 4][8]byte)(p256GeneratorTablesPtr) {
 576  			newTable[i] = byteorder.LEUint64(x[:])
 577  		}
 578  		p256GeneratorTablesPtr = unsafe.Pointer(&newTable)
 579  	}
 580  	p256GeneratorTables = (*[43]p256AffineTable)(p256GeneratorTablesPtr)
 581  }
 582  
 583  func boothW6(in uint64) (uint8, int) {
 584  	s := ^((in >> 6) - 1)
 585  	d := (1 << 7) - in - 1
 586  	d = (d & s) | (in & (^s))
 587  	d = (d >> 1) + (d & 1)
 588  	return uint8(d), int(s & 1)
 589  }
 590  
 591  // ScalarBaseMult sets p = scalar * generator, where scalar is a 32-byte big
 592  // endian value, and returns r. If scalar is not 32 bytes long, ScalarBaseMult
 593  // returns an error and the receiver is unchanged.
 594  func (p *P256Point) ScalarBaseMult(scalar []byte) (*P256Point, error) {
 595  	// This function works like ScalarMult above, but the table is fixed and
 596  	// "pre-doubled" for each iteration, so instead of doubling we move to the
 597  	// next table at each iteration.
 598  
 599  	s, err := (&p256OrdElement{}).SetBytes(scalar)
 600  	if err != nil {
 601  		return nil, err
 602  	}
 603  
 604  	// Start scanning the window from the most significant bits. We move by
 605  	// 6 bits at a time and need to finish at -1, so -1 + 6 * 42 = 251.
 606  	index := 251
 607  
 608  	sel, sign := boothW6(s.Rsh(index))
 609  	// sign is always zero because the boothW6 input here is at
 610  	// most five bits long, so the top bit is never set.
 611  	_ = sign
 612  
 613  	t := &p256AffinePoint{}
 614  	table := &p256GeneratorTables[(index+1)/6]
 615  	table.Select(t, sel)
 616  
 617  	// Select's output is undefined if the selector is zero, when it should be
 618  	// the point at infinity (because infinity can't be represented in affine
 619  	// coordinates). Here we conditionally set p to the infinity if sel is zero.
 620  	// In the loop, that's handled by AddAffine.
 621  	selIsZero := subtle.ConstantTimeByteEq(sel, 0)
 622  	p.Select(NewP256Point(), t.Projective(), selIsZero)
 623  
 624  	for index >= 5 {
 625  		index -= 6
 626  
 627  		if index >= 0 {
 628  			sel, sign = boothW6(s.Rsh(index) & 0b1111111)
 629  		} else {
 630  			// Booth encoding considers a virtual zero bit at index -1,
 631  			// so we shift left the least significant limb.
 632  			wvalue := (s[0] << 1) & 0b1111111
 633  			sel, sign = boothW6(wvalue)
 634  		}
 635  
 636  		table := &p256GeneratorTables[(index+1)/6]
 637  		table.Select(t, sel)
 638  		t.Negate(sign)
 639  		selIsZero := subtle.ConstantTimeByteEq(sel, 0)
 640  		p.AddAffine(p, t, selIsZero)
 641  	}
 642  
 643  	return p, nil
 644  }
 645  
 646  // Negate sets p to -p, if cond == 1, and to p if cond == 0.
 647  func (p *p256AffinePoint) Negate(cond int) *p256AffinePoint {
 648  	negY := &fiat.P256Element{}
 649  	negY.Sub(negY, &p.y)
 650  	p.y.Select(negY, &p.y, cond)
 651  	return p
 652  }
 653  
 654  // p256Sqrt sets e to a square root of x. If x is not a square, p256Sqrt returns
 655  // false and e is unchanged. e and x can overlap.
 656  func p256Sqrt(e, x *fiat.P256Element) (isSquare bool) {
 657  	t0, t1 := &fiat.P256Element{}, &fiat.P256Element{}
 658  
 659  	// Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate.
 660  	//
 661  	// The sequence of 7 multiplications and 253 squarings is derived from the
 662  	// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
 663  	//
 664  	//	_10       = 2*1
 665  	//	_11       = 1 + _10
 666  	//	_1100     = _11 << 2
 667  	//	_1111     = _11 + _1100
 668  	//	_11110000 = _1111 << 4
 669  	//	_11111111 = _1111 + _11110000
 670  	//	x16       = _11111111 << 8 + _11111111
 671  	//	x32       = x16 << 16 + x16
 672  	//	return      ((x32 << 32 + 1) << 96 + 1) << 94
 673  	//
 674  	p256Square(t0, x, 1)
 675  	t0.Mul(x, t0)
 676  	p256Square(t1, t0, 2)
 677  	t0.Mul(t0, t1)
 678  	p256Square(t1, t0, 4)
 679  	t0.Mul(t0, t1)
 680  	p256Square(t1, t0, 8)
 681  	t0.Mul(t0, t1)
 682  	p256Square(t1, t0, 16)
 683  	t0.Mul(t0, t1)
 684  	p256Square(t0, t0, 32)
 685  	t0.Mul(x, t0)
 686  	p256Square(t0, t0, 96)
 687  	t0.Mul(x, t0)
 688  	p256Square(t0, t0, 94)
 689  
 690  	// Check if the candidate t0 is indeed a square root of x.
 691  	t1.Square(t0)
 692  	if t1.Equal(x) != 1 {
 693  		return false
 694  	}
 695  	e.Set(t0)
 696  	return true
 697  }
 698  
 699  // p256Square sets e to the square of x, repeated n times > 1.
 700  func p256Square(e, x *fiat.P256Element, n int) {
 701  	e.Square(x)
 702  	for i := 1; i < n; i++ {
 703  		e.Square(e)
 704  	}
 705  }
 706