p521.mx raw

   1  // Copyright 2022 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Code generated by generate.go. DO NOT EDIT.
   6  
   7  package nistec
   8  
   9  import (
  10  	"crypto/internal/fips140/nistec/fiat"
  11  	"crypto/internal/fips140/subtle"
  12  	"errors"
  13  	"sync"
  14  )
  15  
  16  // p521ElementLength is the length of an element of the base or scalar field,
  17  // which have the same bytes length for all NIST P curves.
  18  const p521ElementLength = 66
  19  
  20  // P521Point is a P521 point. The zero value is NOT valid.
  21  type P521Point struct {
  22  	// The point is represented in projective coordinates (X:Y:Z),
  23  	// where x = X/Z and y = Y/Z.
  24  	x, y, z *fiat.P521Element
  25  }
  26  
  27  // NewP521Point returns a new P521Point representing the point at infinity point.
  28  func NewP521Point() *P521Point {
  29  	return &P521Point{
  30  		x: &fiat.P521Element{},
  31  		y: (&fiat.P521Element{}).One(),
  32  		z: &fiat.P521Element{},
  33  	}
  34  }
  35  
  36  // SetGenerator sets p to the canonical generator and returns p.
  37  func (p *P521Point) SetGenerator() *P521Point {
  38  	p.x.SetBytes([]byte{0x0, 0xc6, 0x85, 0x8e, 0x6, 0xb7, 0x4, 0x4, 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x5, 0x3f, 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, 0xbd, 0x66})
  39  	p.y.SetBytes([]byte{0x1, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, 0xc0, 0x4, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d, 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b, 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e, 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4, 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x1, 0x3f, 0xad, 0x7, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72, 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1, 0x66, 0x50})
  40  	p.z.One()
  41  	return p
  42  }
  43  
  44  // Set sets p = q and returns p.
  45  func (p *P521Point) Set(q *P521Point) *P521Point {
  46  	p.x.Set(q.x)
  47  	p.y.Set(q.y)
  48  	p.z.Set(q.z)
  49  	return p
  50  }
  51  
  52  // SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
  53  // b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
  54  // the curve, it returns nil and an error, and the receiver is unchanged.
  55  // Otherwise, it returns p.
  56  func (p *P521Point) SetBytes(b []byte) (*P521Point, error) {
  57  	switch {
  58  	// Point at infinity.
  59  	case len(b) == 1 && b[0] == 0:
  60  		return p.Set(NewP521Point()), nil
  61  
  62  	// Uncompressed form.
  63  	case len(b) == 1+2*p521ElementLength && b[0] == 4:
  64  		x, err := (&fiat.P521Element{}).SetBytes(b[1 : 1+p521ElementLength])
  65  		if err != nil {
  66  			return nil, err
  67  		}
  68  		y, err := (&fiat.P521Element{}).SetBytes(b[1+p521ElementLength:])
  69  		if err != nil {
  70  			return nil, err
  71  		}
  72  		if err := p521CheckOnCurve(x, y); err != nil {
  73  			return nil, err
  74  		}
  75  		p.x.Set(x)
  76  		p.y.Set(y)
  77  		p.z.One()
  78  		return p, nil
  79  
  80  	// Compressed form.
  81  	case len(b) == 1+p521ElementLength && (b[0] == 2 || b[0] == 3):
  82  		x, err := (&fiat.P521Element{}).SetBytes(b[1:])
  83  		if err != nil {
  84  			return nil, err
  85  		}
  86  
  87  		// y² = x³ - 3x + b
  88  		y := p521Polynomial(&fiat.P521Element{}, x)
  89  		if !p521Sqrt(y, y) {
  90  			return nil, errors.New("invalid P521 compressed point encoding")
  91  		}
  92  
  93  		// Select the positive or negative root, as indicated by the least
  94  		// significant bit, based on the encoding type byte.
  95  		otherRoot := &fiat.P521Element{}
  96  		otherRoot.Sub(otherRoot, y)
  97  		cond := y.Bytes()[p521ElementLength-1]&1 ^ b[0]&1
  98  		y.Select(otherRoot, y, int(cond))
  99  
 100  		p.x.Set(x)
 101  		p.y.Set(y)
 102  		p.z.One()
 103  		return p, nil
 104  
 105  	default:
 106  		return nil, errors.New("invalid P521 point encoding")
 107  	}
 108  }
 109  
 110  var _p521B *fiat.P521Element
 111  var _p521BOnce sync.Once
 112  
 113  func p521B() *fiat.P521Element {
 114  	_p521BOnce.Do(func() {
 115  		_p521B, _ = (&fiat.P521Element{}).SetBytes([]byte{0x0, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, 0x9, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, 0xbf, 0x7, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, 0x3f, 0x0})
 116  	})
 117  	return _p521B
 118  }
 119  
 120  // p521Polynomial sets y2 to x³ - 3x + b, and returns y2.
 121  func p521Polynomial(y2, x *fiat.P521Element) *fiat.P521Element {
 122  	y2.Square(x)
 123  	y2.Mul(y2, x)
 124  
 125  	threeX := (&fiat.P521Element{}).Add(x, x)
 126  	threeX.Add(threeX, x)
 127  	y2.Sub(y2, threeX)
 128  
 129  	return y2.Add(y2, p521B())
 130  }
 131  
 132  func p521CheckOnCurve(x, y *fiat.P521Element) error {
 133  	// y² = x³ - 3x + b
 134  	rhs := p521Polynomial(&fiat.P521Element{}, x)
 135  	lhs := (&fiat.P521Element{}).Square(y)
 136  	if rhs.Equal(lhs) != 1 {
 137  		return errors.New("P521 point not on curve")
 138  	}
 139  	return nil
 140  }
 141  
 142  // Bytes returns the uncompressed or infinity encoding of p, as specified in
 143  // SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
 144  // infinity is shorter than all other encodings.
 145  func (p *P521Point) Bytes() []byte {
 146  	// This function is outlined to make the allocations inline in the caller
 147  	// rather than happen on the heap.
 148  	var out [1 + 2*p521ElementLength]byte
 149  	return p.bytes(&out)
 150  }
 151  
 152  func (p *P521Point) bytes(out *[1 + 2*p521ElementLength]byte) []byte {
 153  	if p.z.IsZero() == 1 {
 154  		return append(out[:0], 0)
 155  	}
 156  
 157  	zinv := (&fiat.P521Element{}).Invert(p.z)
 158  	x := (&fiat.P521Element{}).Mul(p.x, zinv)
 159  	y := (&fiat.P521Element{}).Mul(p.y, zinv)
 160  
 161  	buf := append(out[:0], 4)
 162  	buf = append(buf, x.Bytes()...)
 163  	buf = append(buf, y.Bytes()...)
 164  	return buf
 165  }
 166  
 167  // BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1,
 168  // Version 2.0, Section 2.3.5, or an error if p is the point at infinity.
 169  func (p *P521Point) BytesX() ([]byte, error) {
 170  	// This function is outlined to make the allocations inline in the caller
 171  	// rather than happen on the heap.
 172  	var out [p521ElementLength]byte
 173  	return p.bytesX(&out)
 174  }
 175  
 176  func (p *P521Point) bytesX(out *[p521ElementLength]byte) ([]byte, error) {
 177  	if p.z.IsZero() == 1 {
 178  		return nil, errors.New("P521 point is the point at infinity")
 179  	}
 180  
 181  	zinv := (&fiat.P521Element{}).Invert(p.z)
 182  	x := (&fiat.P521Element{}).Mul(p.x, zinv)
 183  
 184  	return append(out[:0], x.Bytes()...), nil
 185  }
 186  
 187  // BytesCompressed returns the compressed or infinity encoding of p, as
 188  // specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the
 189  // point at infinity is shorter than all other encodings.
 190  func (p *P521Point) BytesCompressed() []byte {
 191  	// This function is outlined to make the allocations inline in the caller
 192  	// rather than happen on the heap.
 193  	var out [1 + p521ElementLength]byte
 194  	return p.bytesCompressed(&out)
 195  }
 196  
 197  func (p *P521Point) bytesCompressed(out *[1 + p521ElementLength]byte) []byte {
 198  	if p.z.IsZero() == 1 {
 199  		return append(out[:0], 0)
 200  	}
 201  
 202  	zinv := (&fiat.P521Element{}).Invert(p.z)
 203  	x := (&fiat.P521Element{}).Mul(p.x, zinv)
 204  	y := (&fiat.P521Element{}).Mul(p.y, zinv)
 205  
 206  	// Encode the sign of the y coordinate (indicated by the least significant
 207  	// bit) as the encoding type (2 or 3).
 208  	buf := append(out[:0], 2)
 209  	buf[0] |= y.Bytes()[p521ElementLength-1] & 1
 210  	buf = append(buf, x.Bytes()...)
 211  	return buf
 212  }
 213  
 214  // Add sets q = p1 + p2, and returns q. The points may overlap.
 215  func (q *P521Point) Add(p1, p2 *P521Point) *P521Point {
 216  	// Complete addition formula for a = -3 from "Complete addition formulas for
 217  	// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
 218  
 219  	t0 := (&fiat.P521Element{}).Mul(p1.x, p2.x)  // t0 := X1 * X2
 220  	t1 := (&fiat.P521Element{}).Mul(p1.y, p2.y)  // t1 := Y1 * Y2
 221  	t2 := (&fiat.P521Element{}).Mul(p1.z, p2.z)  // t2 := Z1 * Z2
 222  	t3 := (&fiat.P521Element{}).Add(p1.x, p1.y)  // t3 := X1 + Y1
 223  	t4 := (&fiat.P521Element{}).Add(p2.x, p2.y)  // t4 := X2 + Y2
 224  	t3.Mul(t3, t4)                               // t3 := t3 * t4
 225  	t4.Add(t0, t1)                               // t4 := t0 + t1
 226  	t3.Sub(t3, t4)                               // t3 := t3 - t4
 227  	t4.Add(p1.y, p1.z)                           // t4 := Y1 + Z1
 228  	x3 := (&fiat.P521Element{}).Add(p2.y, p2.z)  // X3 := Y2 + Z2
 229  	t4.Mul(t4, x3)                               // t4 := t4 * X3
 230  	x3.Add(t1, t2)                               // X3 := t1 + t2
 231  	t4.Sub(t4, x3)                               // t4 := t4 - X3
 232  	x3.Add(p1.x, p1.z)                           // X3 := X1 + Z1
 233  	y3 := (&fiat.P521Element{}).Add(p2.x, p2.z)  // Y3 := X2 + Z2
 234  	x3.Mul(x3, y3)                               // X3 := X3 * Y3
 235  	y3.Add(t0, t2)                               // Y3 := t0 + t2
 236  	y3.Sub(x3, y3)                               // Y3 := X3 - Y3
 237  	z3 := (&fiat.P521Element{}).Mul(p521B(), t2) // Z3 := b * t2
 238  	x3.Sub(y3, z3)                               // X3 := Y3 - Z3
 239  	z3.Add(x3, x3)                               // Z3 := X3 + X3
 240  	x3.Add(x3, z3)                               // X3 := X3 + Z3
 241  	z3.Sub(t1, x3)                               // Z3 := t1 - X3
 242  	x3.Add(t1, x3)                               // X3 := t1 + X3
 243  	y3.Mul(p521B(), y3)                          // Y3 := b * Y3
 244  	t1.Add(t2, t2)                               // t1 := t2 + t2
 245  	t2.Add(t1, t2)                               // t2 := t1 + t2
 246  	y3.Sub(y3, t2)                               // Y3 := Y3 - t2
 247  	y3.Sub(y3, t0)                               // Y3 := Y3 - t0
 248  	t1.Add(y3, y3)                               // t1 := Y3 + Y3
 249  	y3.Add(t1, y3)                               // Y3 := t1 + Y3
 250  	t1.Add(t0, t0)                               // t1 := t0 + t0
 251  	t0.Add(t1, t0)                               // t0 := t1 + t0
 252  	t0.Sub(t0, t2)                               // t0 := t0 - t2
 253  	t1.Mul(t4, y3)                               // t1 := t4 * Y3
 254  	t2.Mul(t0, y3)                               // t2 := t0 * Y3
 255  	y3.Mul(x3, z3)                               // Y3 := X3 * Z3
 256  	y3.Add(y3, t2)                               // Y3 := Y3 + t2
 257  	x3.Mul(t3, x3)                               // X3 := t3 * X3
 258  	x3.Sub(x3, t1)                               // X3 := X3 - t1
 259  	z3.Mul(t4, z3)                               // Z3 := t4 * Z3
 260  	t1.Mul(t3, t0)                               // t1 := t3 * t0
 261  	z3.Add(z3, t1)                               // Z3 := Z3 + t1
 262  
 263  	q.x.Set(x3)
 264  	q.y.Set(y3)
 265  	q.z.Set(z3)
 266  	return q
 267  }
 268  
 269  // Double sets q = p + p, and returns q. The points may overlap.
 270  func (q *P521Point) Double(p *P521Point) *P521Point {
 271  	// Complete addition formula for a = -3 from "Complete addition formulas for
 272  	// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
 273  
 274  	t0 := (&fiat.P521Element{}).Square(p.x)      // t0 := X ^ 2
 275  	t1 := (&fiat.P521Element{}).Square(p.y)      // t1 := Y ^ 2
 276  	t2 := (&fiat.P521Element{}).Square(p.z)      // t2 := Z ^ 2
 277  	t3 := (&fiat.P521Element{}).Mul(p.x, p.y)    // t3 := X * Y
 278  	t3.Add(t3, t3)                               // t3 := t3 + t3
 279  	z3 := (&fiat.P521Element{}).Mul(p.x, p.z)    // Z3 := X * Z
 280  	z3.Add(z3, z3)                               // Z3 := Z3 + Z3
 281  	y3 := (&fiat.P521Element{}).Mul(p521B(), t2) // Y3 := b * t2
 282  	y3.Sub(y3, z3)                               // Y3 := Y3 - Z3
 283  	x3 := (&fiat.P521Element{}).Add(y3, y3)      // X3 := Y3 + Y3
 284  	y3.Add(x3, y3)                               // Y3 := X3 + Y3
 285  	x3.Sub(t1, y3)                               // X3 := t1 - Y3
 286  	y3.Add(t1, y3)                               // Y3 := t1 + Y3
 287  	y3.Mul(x3, y3)                               // Y3 := X3 * Y3
 288  	x3.Mul(x3, t3)                               // X3 := X3 * t3
 289  	t3.Add(t2, t2)                               // t3 := t2 + t2
 290  	t2.Add(t2, t3)                               // t2 := t2 + t3
 291  	z3.Mul(p521B(), z3)                          // Z3 := b * Z3
 292  	z3.Sub(z3, t2)                               // Z3 := Z3 - t2
 293  	z3.Sub(z3, t0)                               // Z3 := Z3 - t0
 294  	t3.Add(z3, z3)                               // t3 := Z3 + Z3
 295  	z3.Add(z3, t3)                               // Z3 := Z3 + t3
 296  	t3.Add(t0, t0)                               // t3 := t0 + t0
 297  	t0.Add(t3, t0)                               // t0 := t3 + t0
 298  	t0.Sub(t0, t2)                               // t0 := t0 - t2
 299  	t0.Mul(t0, z3)                               // t0 := t0 * Z3
 300  	y3.Add(y3, t0)                               // Y3 := Y3 + t0
 301  	t0.Mul(p.y, p.z)                             // t0 := Y * Z
 302  	t0.Add(t0, t0)                               // t0 := t0 + t0
 303  	z3.Mul(t0, z3)                               // Z3 := t0 * Z3
 304  	x3.Sub(x3, z3)                               // X3 := X3 - Z3
 305  	z3.Mul(t0, t1)                               // Z3 := t0 * t1
 306  	z3.Add(z3, z3)                               // Z3 := Z3 + Z3
 307  	z3.Add(z3, z3)                               // Z3 := Z3 + Z3
 308  
 309  	q.x.Set(x3)
 310  	q.y.Set(y3)
 311  	q.z.Set(z3)
 312  	return q
 313  }
 314  
 315  // Select sets q to p1 if cond == 1, and to p2 if cond == 0.
 316  func (q *P521Point) Select(p1, p2 *P521Point, cond int) *P521Point {
 317  	q.x.Select(p1.x, p2.x, cond)
 318  	q.y.Select(p1.y, p2.y, cond)
 319  	q.z.Select(p1.z, p2.z, cond)
 320  	return q
 321  }
 322  
 323  // A p521Table holds the first 15 multiples of a point at offset -1, so [1]P
 324  // is at table[0], [15]P is at table[14], and [0]P is implicitly the identity
 325  // point.
 326  type p521Table [15]*P521Point
 327  
 328  // Select selects the n-th multiple of the table base point into p. It works in
 329  // constant time by iterating over every entry of the table. n must be in [0, 15].
 330  func (table *p521Table) Select(p *P521Point, n uint8) {
 331  	if n >= 16 {
 332  		panic("nistec: internal error: p521Table called with out-of-bounds value")
 333  	}
 334  	p.Set(NewP521Point())
 335  	for i := uint8(1); i < 16; i++ {
 336  		cond := subtle.ConstantTimeByteEq(i, n)
 337  		p.Select(table[i-1], p, cond)
 338  	}
 339  }
 340  
 341  // ScalarMult sets p = scalar * q, and returns p.
 342  func (p *P521Point) ScalarMult(q *P521Point, scalar []byte) (*P521Point, error) {
 343  	// Compute a p521Table for the base point q. The explicit NewP521Point
 344  	// calls get inlined, letting the allocations live on the stack.
 345  	var table = p521Table{NewP521Point(), NewP521Point(), NewP521Point(),
 346  		NewP521Point(), NewP521Point(), NewP521Point(), NewP521Point(),
 347  		NewP521Point(), NewP521Point(), NewP521Point(), NewP521Point(),
 348  		NewP521Point(), NewP521Point(), NewP521Point(), NewP521Point()}
 349  	table[0].Set(q)
 350  	for i := 1; i < 15; i += 2 {
 351  		table[i].Double(table[i/2])
 352  		table[i+1].Add(table[i], q)
 353  	}
 354  
 355  	// Instead of doing the classic double-and-add chain, we do it with a
 356  	// four-bit window: we double four times, and then add [0-15]P.
 357  	t := NewP521Point()
 358  	p.Set(NewP521Point())
 359  	for i, byte := range scalar {
 360  		// No need to double on the first iteration, as p is the identity at
 361  		// this point, and [N]∞ = ∞.
 362  		if i != 0 {
 363  			p.Double(p)
 364  			p.Double(p)
 365  			p.Double(p)
 366  			p.Double(p)
 367  		}
 368  
 369  		windowValue := byte >> 4
 370  		table.Select(t, windowValue)
 371  		p.Add(p, t)
 372  
 373  		p.Double(p)
 374  		p.Double(p)
 375  		p.Double(p)
 376  		p.Double(p)
 377  
 378  		windowValue = byte & 0b1111
 379  		table.Select(t, windowValue)
 380  		p.Add(p, t)
 381  	}
 382  
 383  	return p, nil
 384  }
 385  
 386  var p521GeneratorTable *[p521ElementLength * 2]p521Table
 387  var p521GeneratorTableOnce sync.Once
 388  
 389  // generatorTable returns a sequence of p521Tables. The first table contains
 390  // multiples of G. Each successive table is the previous table doubled four
 391  // times.
 392  func (p *P521Point) generatorTable() *[p521ElementLength * 2]p521Table {
 393  	p521GeneratorTableOnce.Do(func() {
 394  		p521GeneratorTable = &[p521ElementLength * 2]p521Table{}
 395  		base := NewP521Point().SetGenerator()
 396  		for i := 0; i < p521ElementLength*2; i++ {
 397  			p521GeneratorTable[i][0] = NewP521Point().Set(base)
 398  			for j := 1; j < 15; j++ {
 399  				p521GeneratorTable[i][j] = NewP521Point().Add(p521GeneratorTable[i][j-1], base)
 400  			}
 401  			base.Double(base)
 402  			base.Double(base)
 403  			base.Double(base)
 404  			base.Double(base)
 405  		}
 406  	})
 407  	return p521GeneratorTable
 408  }
 409  
 410  // ScalarBaseMult sets p = scalar * B, where B is the canonical generator, and
 411  // returns p.
 412  func (p *P521Point) ScalarBaseMult(scalar []byte) (*P521Point, error) {
 413  	if len(scalar) != p521ElementLength {
 414  		return nil, errors.New("invalid scalar length")
 415  	}
 416  	tables := p.generatorTable()
 417  
 418  	// This is also a scalar multiplication with a four-bit window like in
 419  	// ScalarMult, but in this case the doublings are precomputed. The value
 420  	// [windowValue]G added at iteration k would normally get doubled
 421  	// (totIterations-k)×4 times, but with a larger precomputation we can
 422  	// instead add [2^((totIterations-k)×4)][windowValue]G and avoid the
 423  	// doublings between iterations.
 424  	t := NewP521Point()
 425  	p.Set(NewP521Point())
 426  	tableIndex := len(tables) - 1
 427  	for _, byte := range scalar {
 428  		windowValue := byte >> 4
 429  		tables[tableIndex].Select(t, windowValue)
 430  		p.Add(p, t)
 431  		tableIndex--
 432  
 433  		windowValue = byte & 0b1111
 434  		tables[tableIndex].Select(t, windowValue)
 435  		p.Add(p, t)
 436  		tableIndex--
 437  	}
 438  
 439  	return p, nil
 440  }
 441  
 442  // p521Sqrt sets e to a square root of x. If x is not a square, p521Sqrt returns
 443  // false and e is unchanged. e and x can overlap.
 444  func p521Sqrt(e, x *fiat.P521Element) (isSquare bool) {
 445  	candidate := &fiat.P521Element{}
 446  	p521SqrtCandidate(candidate, x)
 447  	square := (&fiat.P521Element{}).Square(candidate)
 448  	if square.Equal(x) != 1 {
 449  		return false
 450  	}
 451  	e.Set(candidate)
 452  	return true
 453  }
 454  
 455  // p521SqrtCandidate sets z to a square root candidate for x. z and x must not overlap.
 456  func p521SqrtCandidate(z, x *fiat.P521Element) {
 457  	// Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate.
 458  	//
 459  	// The sequence of 0 multiplications and 519 squarings is derived from the
 460  	// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
 461  	//
 462  	//	return  1 << 519
 463  	//
 464  
 465  	z.Square(x)
 466  	for s := 1; s < 519; s++ {
 467  		z.Square(z)
 468  	}
 469  }
 470