curve.mx raw

   1  // Copyright (c) 2015-2022 The Decred developers
   2  // Copyright 2013-2014 The btcsuite developers
   3  // Use of this source code is governed by an ISC
   4  // license that can be found in the LICENSE file.
   5  
   6  package secp256k1
   7  
   8  import (
   9  	"math/bits"
  10  
  11  	"smesh.lol/pkg/nostr/hex"
  12  )
  13  
  14  // References:
  15  //   [SECG]: Recommended Elliptic Curve Domain Parameters
  16  //     https://www.secg.org/sec2-v2.pdf
  17  //
  18  //   [GECC]: Guide to Elliptic Curve Cryptography (Hankerson, Menezes, Vanstone)
  19  //
  20  //   [BRID]: On Binary Representations of Integers with Digits -1, 0, 1
  21  //           (Prodinger, Helmut)
  22  //
  23  //   [STWS]: Secure-TWS: Authenticating Node to Multi-user Communication in
  24  //           Shared Sensor Networks (Oliveira, Leonardo B. et al)
  25  
  26  // All group operations are performed using Jacobian coordinates.  For a given
  27  // (x, y) position on the curve, the Jacobian coordinates are (x1, y1, z1)
  28  // where x = x1/z1^2 and y = y1/z1^3.
  29  
  30  // hexToFieldVal converts the passed hex string into a FieldVal and will panic
  31  // if there is an error.  This is only provided for the hard-coded constants so
  32  // errors in the source code can be detected. It will only (and must only) be
  33  // called with hard-coded values.
  34  func hexToFieldVal(s string) *FieldVal {
  35  	b, err := hex.Dec(s)
  36  	if err != nil {
  37  		panic("invalid hex in source file: " + s)
  38  	}
  39  	var f FieldVal
  40  	if overflow := f.SetByteSlice(b); overflow {
  41  		panic("hex in source file overflows mod P: " + s)
  42  	}
  43  	return &f
  44  }
  45  
  46  // hexToModNScalar converts the passed hex string into a ModNScalar and will
  47  // panic if there is an error.  This is only provided for the hard-coded
  48  // constants so errors in the source code can be detected. It will only (and
  49  // must only) be called with hard-coded values.
  50  func hexToModNScalar(s string) *ModNScalar {
  51  	var isNegative bool
  52  	if len(s) > 0 && s[0] == '-' {
  53  		isNegative = true
  54  		s = s[1:]
  55  	}
  56  	if len(s)%2 != 0 {
  57  		s = "0" + s
  58  	}
  59  	b, err := hex.Dec(s)
  60  	if err != nil {
  61  		panic("invalid hex in source file: " + s)
  62  	}
  63  	var scalar ModNScalar
  64  	if overflow := scalar.SetByteSlice(b); overflow {
  65  		panic("hex in source file overflows mod N scalar: " + s)
  66  	}
  67  	if isNegative {
  68  		scalar.Negate()
  69  	}
  70  	return &scalar
  71  }
  72  
  73  var (
  74  	// The following constants are used to accelerate scalar point
  75  	// multiplication through the use of the endomorphism:
  76  	//
  77  	// φ(Q) ⟼ λ*Q = (β*Q.x mod p, Q.y)
  78  	//
  79  	// See the code in the deriveEndomorphismParams function in genprecomps.go
  80  	// for details on their derivation.
  81  	//
  82  	// Additionally, see the scalar multiplication function in this file for
  83  	// details on how they are used.
  84  	endoNegLambda = hexToModNScalar("-5363ad4cc05c30e0a5261c028812645a122e22ea20816678df02967c1b23bd72")
  85  	endoBeta      = hexToFieldVal("7ae96a2b657c07106e64479eac3434e99cf0497512f58995c1396c28719501ee")
  86  	endoNegB1     = hexToModNScalar("e4437ed6010e88286f547fa90abfe4c3")
  87  	endoNegB2     = hexToModNScalar("-3086d221a7d46bcde86c90e49284eb15")
  88  	endoZ1        = hexToModNScalar("3086d221a7d46bcde86c90e49284eb153daa8a1471e8ca7f")
  89  	endoZ2        = hexToModNScalar("e4437ed6010e88286f547fa90abfe4c4221208ac9df506c6")
  90  	// Alternatively, the following parameters are valid as well, however,
  91  	// benchmarks show them to be about 2% slower in practice.
  92  	// endoNegLambda = hexToModNScalar("-ac9c52b33fa3cf1f5ad9e3fd77ed9ba4a880b9fc8ec739c2e0cfc810b51283ce")
  93  	// endoBeta      = hexToFieldVal("851695d49a83f8ef919bb86153cbcb16630fb68aed0a766a3ec693d68e6afa40")
  94  	// endoNegB1     = hexToModNScalar("3086d221a7d46bcde86c90e49284eb15")
  95  	// endoNegB2     = hexToModNScalar("-114ca50f7a8e2f3f657c1108d9d44cfd8")
  96  	// endoZ1        = hexToModNScalar("114ca50f7a8e2f3f657c1108d9d44cfd95fbc92c10fddd145")
  97  	// endoZ2        = hexToModNScalar("3086d221a7d46bcde86c90e49284eb153daa8a1471e8ca7f")
  98  )
  99  
 100  // JacobianPoint is an element of the group formed by the secp256k1 curve in
 101  // Jacobian projective coordinates and thus represents a point on the curve.
 102  type JacobianPoint struct {
 103  	// The X coordinate in Jacobian projective coordinates.  The affine point is
 104  	// X/z^2.
 105  	X FieldVal
 106  	// The Y coordinate in Jacobian projective coordinates.  The affine point is
 107  	// Y/z^3.
 108  	Y FieldVal
 109  	// The Z coordinate in Jacobian projective coordinates.
 110  	Z FieldVal
 111  }
 112  
 113  // MakeJacobianPoint returns a Jacobian point with the provided X, Y, and Z
 114  // coordinates.
 115  func MakeJacobianPoint(x, y, z *FieldVal) JacobianPoint {
 116  	var p JacobianPoint
 117  	p.X.Set(x)
 118  	p.Y.Set(y)
 119  	p.Z.Set(z)
 120  	return p
 121  }
 122  
 123  // Set sets the Jacobian point to the provided point.
 124  func (p *JacobianPoint) Set(other *JacobianPoint) {
 125  	p.X.Set(&other.X)
 126  	p.Y.Set(&other.Y)
 127  	p.Z.Set(&other.Z)
 128  }
 129  
 130  // ToAffine reduces the Z value of the existing point to 1 effectively
 131  // making it an affine coordinate in constant time.  The point will be
 132  // normalized.
 133  func (p *JacobianPoint) ToAffine() {
 134  	// Inversions are expensive and both point addition and point doubling
 135  	// are faster when working with points that have a z value of one.  So,
 136  	// if the point needs to be converted to affine, go ahead and normalize
 137  	// the point itself at the same time as the calculation is the same.
 138  	var zInv, tempZ FieldVal
 139  	zInv.Set(&p.Z).Inverse()  // zInv = Z^-1
 140  	tempZ.SquareVal(&zInv)    // tempZ = Z^-2
 141  	p.X.Mul(&tempZ)           // X = X/Z^2 (mag: 1)
 142  	p.Y.Mul(tempZ.Mul(&zInv)) // Y = Y/Z^3 (mag: 1)
 143  	p.Z.SetInt(1)             // Z = 1 (mag: 1)
 144  	// Normalize the x and y values.
 145  	p.X.Normalize()
 146  	p.Y.Normalize()
 147  }
 148  
 149  // addZ1AndZ2EqualsOne adds two Jacobian points that are already known to have
 150  // z values of 1 and stores the result in the provided result param.  That is to
 151  // say result = p1 + p2.  It performs faster addition than the generic add
 152  // routine since less arithmetic is needed due to the ability to avoid the z
 153  // value multiplications.
 154  //
 155  // NOTE: The points must be normalized for this function to return the correct
 156  // result.  The resulting point will be normalized.
 157  func addZ1AndZ2EqualsOne(p1, p2, result *JacobianPoint) {
 158  	// To compute the point addition efficiently, this implementation splits
 159  	// the equation into intermediate elements which are used to minimize
 160  	// the number of field multiplications using the method shown at:
 161  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-mmadd-2007-bl
 162  	//
 163  	// In particular it performs the calculations using the following:
 164  	// H = X2-X1, HH = H^2, I = 4*HH, J = H*I, r = 2*(Y2-Y1), V = X1*I
 165  	// X3 = r^2-J-2*V, Y3 = r*(V-X3)-2*Y1*J, Z3 = 2*H
 166  	//
 167  	// This results in a cost of 4 field multiplications, 2 field squarings,
 168  	// 6 field additions, and 5 integer multiplications.
 169  	x1, y1 := &p1.X, &p1.Y
 170  	x2, y2 := &p2.X, &p2.Y
 171  	x3, y3, z3 := &result.X, &result.Y, &result.Z
 172  	// When the x coordinates are the same for two points on the curve, the
 173  	// y coordinates either must be the same, in which case it is point
 174  	// doubling, or they are opposite and the result is the point at
 175  	// infinity per the group law for elliptic curve cryptography.
 176  	if x1.Equals(x2) {
 177  		if y1.Equals(y2) {
 178  			// Since x1 == x2 and y1 == y2, point doubling must be
 179  			// done, otherwise the addition would end up dividing
 180  			// by zero.
 181  			DoubleNonConst(p1, result)
 182  			return
 183  		}
 184  		// Since x1 == x2 and y1 == -y2, the sum is the point at
 185  		// infinity per the group law.
 186  		x3.SetInt(0)
 187  		y3.SetInt(0)
 188  		z3.SetInt(0)
 189  		return
 190  	}
 191  	// Calculate X3, Y3, and Z3 according to the intermediate elements
 192  	// breakdown above.
 193  	var h, i, j, r, v FieldVal
 194  	var negJ, neg2V, negX3 FieldVal
 195  	h.Set(x1).Negate(1).Add(x2)                // H = X2-X1 (mag: 3)
 196  	i.SquareVal(&h).MulInt(4)                  // I = 4*H^2 (mag: 4)
 197  	j.Mul2(&h, &i)                             // J = H*I (mag: 1)
 198  	r.Set(y1).Negate(1).Add(y2).MulInt(2)      // r = 2*(Y2-Y1) (mag: 6)
 199  	v.Mul2(x1, &i)                             // V = X1*I (mag: 1)
 200  	negJ.Set(&j).Negate(1)                     // negJ = -J (mag: 2)
 201  	neg2V.Set(&v).MulInt(2).Negate(2)          // neg2V = -(2*V) (mag: 3)
 202  	x3.Set(&r).Square().Add(&negJ).Add(&neg2V) // X3 = r^2-J-2*V (mag: 6)
 203  	negX3.Set(x3).Negate(6)                    // negX3 = -X3 (mag: 7)
 204  	j.Mul(y1).MulInt(2).Negate(2)              // J = -(2*Y1*J) (mag: 3)
 205  	y3.Set(&v).Add(&negX3).Mul(&r).Add(&j)     // Y3 = r*(V-X3)-2*Y1*J (mag: 4)
 206  	z3.Set(&h).MulInt(2)                       // Z3 = 2*H (mag: 6)
 207  	// Normalize the resulting field values as needed.
 208  	x3.Normalize()
 209  	y3.Normalize()
 210  	z3.Normalize()
 211  }
 212  
 213  // addZ1EqualsZ2 adds two Jacobian points that are already known to have the
 214  // same z value and stores the result in the provided result param.  That is to
 215  // say result = p1 + p2.  It performs faster addition than the generic add
 216  // routine since less arithmetic is needed due to the known equivalence.
 217  //
 218  // NOTE: The points must be normalized for this function to return the correct
 219  // result.  The resulting point will be normalized.
 220  func addZ1EqualsZ2(p1, p2, result *JacobianPoint) {
 221  	// To compute the point addition efficiently, this implementation splits
 222  	// the equation into intermediate elements which are used to minimize
 223  	// the number of field multiplications using a slightly modified version
 224  	// of the method shown at:
 225  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-zadd-2007-m
 226  	//
 227  	// In particular it performs the calculations using the following:
 228  	// A = X2-X1, B = A^2, C=Y2-Y1, D = C^2, E = X1*B, ToSliceOfBytes = X2*B
 229  	// X3 = D-E-ToSliceOfBytes, Y3 = C*(E-X3)-Y1*(ToSliceOfBytes-E), Z3 = Z1*A
 230  	//
 231  	// This results in a cost of 5 field multiplications, 2 field squarings,
 232  	// 9 field additions, and 0 integer multiplications.
 233  	x1, y1, z1 := &p1.X, &p1.Y, &p1.Z
 234  	x2, y2 := &p2.X, &p2.Y
 235  	x3, y3, z3 := &result.X, &result.Y, &result.Z
 236  	// When the x coordinates are the same for two points on the curve, the
 237  	// y coordinates either must be the same, in which case it is point
 238  	// doubling, or they are opposite and the result is the point at
 239  	// infinity per the group law for elliptic curve cryptography.
 240  	if x1.Equals(x2) {
 241  		if y1.Equals(y2) {
 242  			// Since x1 == x2 and y1 == y2, point doubling must be
 243  			// done, otherwise the addition would end up dividing
 244  			// by zero.
 245  			DoubleNonConst(p1, result)
 246  			return
 247  		}
 248  		// Since x1 == x2 and y1 == -y2, the sum is the point at
 249  		// infinity per the group law.
 250  		x3.SetInt(0)
 251  		y3.SetInt(0)
 252  		z3.SetInt(0)
 253  		return
 254  	}
 255  
 256  	// Calculate X3, Y3, and Z3 according to the intermediate elements
 257  	// breakdown above.
 258  	var a, b, c, d, err, f FieldVal
 259  	var negX1, negY1, negE, negX3 FieldVal
 260  	negX1.Set(x1).Negate(1)                // negX1 = -X1 (mag: 2)
 261  	negY1.Set(y1).Negate(1)                // negY1 = -Y1 (mag: 2)
 262  	a.Set(&negX1).Add(x2)                  // A = X2-X1 (mag: 3)
 263  	b.SquareVal(&a)                        // B = A^2 (mag: 1)
 264  	c.Set(&negY1).Add(y2)                  // C = Y2-Y1 (mag: 3)
 265  	d.SquareVal(&c)                        // D = C^2 (mag: 1)
 266  	err.Mul2(x1, &b)                       // E = X1*B (mag: 1)
 267  	negE.Set(&err).Negate(1)               // negE = -E (mag: 2)
 268  	f.Mul2(x2, &b)                         // ToSliceOfBytes = X2*B (mag: 1)
 269  	x3.Add2(&err, &f).Negate(2).Add(&d)    // X3 = D-E-ToSliceOfBytes (mag: 4)
 270  	negX3.Set(x3).Negate(4)                // negX3 = -X3 (mag: 5)
 271  	y3.Set(y1).Mul(f.Add(&negE)).Negate(1) // Y3 = -(Y1*(ToSliceOfBytes-E)) (mag: 2)
 272  	y3.Add(err.Add(&negX3).Mul(&c))        // Y3 = C*(E-X3)+Y3 (mag: 3)
 273  	z3.Mul2(z1, &a)                        // Z3 = Z1*A (mag: 1)
 274  	// Normalize the resulting field values as needed.
 275  	x3.Normalize()
 276  	y3.Normalize()
 277  	z3.Normalize()
 278  }
 279  
 280  // addZ2EqualsOne adds two Jacobian points when the second point is already
 281  // known to have a z value of 1 (and the z value for the first point is not 1)
 282  // and stores the result in the provided result param.  That is to say result =
 283  // p1 + p2.  It performs faster addition than the generic add routine since
 284  // less arithmetic is needed due to the ability to avoid multiplications by the
 285  // second point's z value.
 286  //
 287  // NOTE: The points must be normalized for this function to return the correct
 288  // result.  The resulting point will be normalized.
 289  func addZ2EqualsOne(p1, p2, result *JacobianPoint) {
 290  	// To compute the point addition efficiently, this implementation splits
 291  	// the equation into intermediate elements which are used to minimize
 292  	// the number of field multiplications using the method shown at:
 293  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
 294  	//
 295  	// In particular it performs the calculations using the following:
 296  	// Z1Z1 = Z1^2, U2 = X2*Z1Z1, S2 = Y2*Z1*Z1Z1, H = U2-X1, HH = H^2,
 297  	// I = 4*HH, J = H*I, r = 2*(S2-Y1), V = X1*I
 298  	// X3 = r^2-J-2*V, Y3 = r*(V-X3)-2*Y1*J, Z3 = (Z1+H)^2-Z1Z1-HH
 299  	//
 300  	// This results in a cost of 7 field multiplications, 4 field squarings,
 301  	// 9 field additions, and 4 integer multiplications.
 302  	x1, y1, z1 := &p1.X, &p1.Y, &p1.Z
 303  	x2, y2 := &p2.X, &p2.Y
 304  	x3, y3, z3 := &result.X, &result.Y, &result.Z
 305  	// When the x coordinates are the same for two points on the curve, the
 306  	// y coordinates either must be the same, in which case it is point
 307  	// doubling, or they are opposite and the result is the point at
 308  	// infinity per the group law for elliptic curve cryptography.  Since
 309  	// any number of Jacobian coordinates can represent the same affine
 310  	// point, the x and y values need to be converted to like terms.  Due to
 311  	// the assumption made for this function that the second point has a z
 312  	// value of 1 (z2=1), the first point is already "converted".
 313  	var z1z1, u2, s2 FieldVal
 314  	z1z1.SquareVal(z1)                        // Z1Z1 = Z1^2 (mag: 1)
 315  	u2.Set(x2).Mul(&z1z1).Normalize()         // U2 = X2*Z1Z1 (mag: 1)
 316  	s2.Set(y2).Mul(&z1z1).Mul(z1).Normalize() // S2 = Y2*Z1*Z1Z1 (mag: 1)
 317  	if x1.Equals(&u2) {
 318  		if y1.Equals(&s2) {
 319  			// Since x1 == x2 and y1 == y2, point doubling must be
 320  			// done, otherwise the addition would end up dividing
 321  			// by zero.
 322  			DoubleNonConst(p1, result)
 323  			return
 324  		}
 325  		// Since x1 == x2 and y1 == -y2, the sum is the point at
 326  		// infinity per the group law.
 327  		x3.SetInt(0)
 328  		y3.SetInt(0)
 329  		z3.SetInt(0)
 330  		return
 331  	}
 332  	// Calculate X3, Y3, and Z3 according to the intermediate elements
 333  	// breakdown above.
 334  	var h, hh, i, j, r, rr, v FieldVal
 335  	var negX1, negY1, negX3 FieldVal
 336  	negX1.Set(x1).Negate(1)                // negX1 = -X1 (mag: 2)
 337  	h.Add2(&u2, &negX1)                    // H = U2-X1 (mag: 3)
 338  	hh.SquareVal(&h)                       // HH = H^2 (mag: 1)
 339  	i.Set(&hh).MulInt(4)                   // I = 4 * HH (mag: 4)
 340  	j.Mul2(&h, &i)                         // J = H*I (mag: 1)
 341  	negY1.Set(y1).Negate(1)                // negY1 = -Y1 (mag: 2)
 342  	r.Set(&s2).Add(&negY1).MulInt(2)       // r = 2*(S2-Y1) (mag: 6)
 343  	rr.SquareVal(&r)                       // rr = r^2 (mag: 1)
 344  	v.Mul2(x1, &i)                         // V = X1*I (mag: 1)
 345  	x3.Set(&v).MulInt(2).Add(&j).Negate(3) // X3 = -(J+2*V) (mag: 4)
 346  	x3.Add(&rr)                            // X3 = r^2+X3 (mag: 5)
 347  	negX3.Set(x3).Negate(5)                // negX3 = -X3 (mag: 6)
 348  	y3.Set(y1).Mul(&j).MulInt(2).Negate(2) // Y3 = -(2*Y1*J) (mag: 3)
 349  	y3.Add(v.Add(&negX3).Mul(&r))          // Y3 = r*(V-X3)+Y3 (mag: 4)
 350  	z3.Add2(z1, &h).Square()               // Z3 = (Z1+H)^2 (mag: 1)
 351  	z3.Add(z1z1.Add(&hh).Negate(2))        // Z3 = Z3-(Z1Z1+HH) (mag: 4)
 352  	// Normalize the resulting field values as needed.
 353  	x3.Normalize()
 354  	y3.Normalize()
 355  	z3.Normalize()
 356  }
 357  
 358  // addGeneric adds two Jacobian points without any assumptions about the z
 359  // values of the two points and stores the result in the provided result param.
 360  // That is to say result = p1 + p2.  It is the slowest of the add routines due
 361  // to requiring the most arithmetic.
 362  //
 363  // NOTE: The points must be normalized for this function to return the correct
 364  // result.  The resulting point will be normalized.
 365  func addGeneric(p1, p2, result *JacobianPoint) {
 366  	// To compute the point addition efficiently, this implementation splits
 367  	// the equation into intermediate elements which are used to minimize
 368  	// the number of field multiplications using the method shown at:
 369  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
 370  	//
 371  	// In particular it performs the calculations using the following:
 372  	// Z1Z1 = Z1^2, Z2Z2 = Z2^2, U1 = X1*Z2Z2, U2 = X2*Z1Z1, S1 = Y1*Z2*Z2Z2
 373  	// S2 = Y2*Z1*Z1Z1, H = U2-U1, I = (2*H)^2, J = H*I, r = 2*(S2-S1)
 374  	// V = U1*I
 375  	// X3 = r^2-J-2*V, Y3 = r*(V-X3)-2*S1*J, Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H
 376  	//
 377  	// This results in a cost of 11 field multiplications, 5 field squarings,
 378  	// 9 field additions, and 4 integer multiplications.
 379  	x1, y1, z1 := &p1.X, &p1.Y, &p1.Z
 380  	x2, y2, z2 := &p2.X, &p2.Y, &p2.Z
 381  	x3, y3, z3 := &result.X, &result.Y, &result.Z
 382  	// When the x coordinates are the same for two points on the curve, the
 383  	// y coordinates either must be the same, in which case it is point
 384  	// doubling, or they are opposite and the result is the point at
 385  	// infinity.  Since any number of Jacobian coordinates can represent the
 386  	// same affine point, the x and y values need to be converted to like
 387  	// terms.
 388  	var z1z1, z2z2, u1, u2, s1, s2 FieldVal
 389  	z1z1.SquareVal(z1)                        // Z1Z1 = Z1^2 (mag: 1)
 390  	z2z2.SquareVal(z2)                        // Z2Z2 = Z2^2 (mag: 1)
 391  	u1.Set(x1).Mul(&z2z2).Normalize()         // U1 = X1*Z2Z2 (mag: 1)
 392  	u2.Set(x2).Mul(&z1z1).Normalize()         // U2 = X2*Z1Z1 (mag: 1)
 393  	s1.Set(y1).Mul(&z2z2).Mul(z2).Normalize() // S1 = Y1*Z2*Z2Z2 (mag: 1)
 394  	s2.Set(y2).Mul(&z1z1).Mul(z1).Normalize() // S2 = Y2*Z1*Z1Z1 (mag: 1)
 395  	if u1.Equals(&u2) {
 396  		if s1.Equals(&s2) {
 397  			// Since x1 == x2 and y1 == y2, point doubling must be
 398  			// done, otherwise the addition would end up dividing
 399  			// by zero.
 400  			DoubleNonConst(p1, result)
 401  			return
 402  		}
 403  		// Since x1 == x2 and y1 == -y2, the sum is the point at
 404  		// infinity per the group law.
 405  		x3.SetInt(0)
 406  		y3.SetInt(0)
 407  		z3.SetInt(0)
 408  		return
 409  	}
 410  	// Calculate X3, Y3, and Z3 according to the intermediate elements
 411  	// breakdown above.
 412  	var h, i, j, r, rr, v FieldVal
 413  	var negU1, negS1, negX3 FieldVal
 414  	negU1.Set(&u1).Negate(1)               // negU1 = -U1 (mag: 2)
 415  	h.Add2(&u2, &negU1)                    // H = U2-U1 (mag: 3)
 416  	i.Set(&h).MulInt(2).Square()           // I = (2*H)^2 (mag: 1)
 417  	j.Mul2(&h, &i)                         // J = H*I (mag: 1)
 418  	negS1.Set(&s1).Negate(1)               // negS1 = -S1 (mag: 2)
 419  	r.Set(&s2).Add(&negS1).MulInt(2)       // r = 2*(S2-S1) (mag: 6)
 420  	rr.SquareVal(&r)                       // rr = r^2 (mag: 1)
 421  	v.Mul2(&u1, &i)                        // V = U1*I (mag: 1)
 422  	x3.Set(&v).MulInt(2).Add(&j).Negate(3) // X3 = -(J+2*V) (mag: 4)
 423  	x3.Add(&rr)                            // X3 = r^2+X3 (mag: 5)
 424  	negX3.Set(x3).Negate(5)                // negX3 = -X3 (mag: 6)
 425  	y3.Mul2(&s1, &j).MulInt(2).Negate(2)   // Y3 = -(2*S1*J) (mag: 3)
 426  	y3.Add(v.Add(&negX3).Mul(&r))          // Y3 = r*(V-X3)+Y3 (mag: 4)
 427  	z3.Add2(z1, z2).Square()               // Z3 = (Z1+Z2)^2 (mag: 1)
 428  	z3.Add(z1z1.Add(&z2z2).Negate(2))      // Z3 = Z3-(Z1Z1+Z2Z2) (mag: 4)
 429  	z3.Mul(&h)                             // Z3 = Z3*H (mag: 1)
 430  	// Normalize the resulting field values as needed.
 431  	x3.Normalize()
 432  	y3.Normalize()
 433  	z3.Normalize()
 434  }
 435  
 436  // AddNonConst adds the passed Jacobian points together and stores the result in
 437  // the provided result param in *non-constant* time.
 438  //
 439  // NOTE: The points must be normalized for this function to return the correct
 440  // result.  The resulting point will be normalized.
 441  func AddNonConst(p1, p2, result *JacobianPoint) {
 442  	// The point at infinity is the identity according to the group law for
 443  	// elliptic curve cryptography.  Thus, ∞ + P = P and P + ∞ = P.
 444  	if (p1.X.IsZero() && p1.Y.IsZero()) || p1.Z.IsZero() {
 445  		result.Set(p2)
 446  		return
 447  	}
 448  	if (p2.X.IsZero() && p2.Y.IsZero()) || p2.Z.IsZero() {
 449  		result.Set(p1)
 450  		return
 451  	}
 452  	// Faster point addition can be achieved when certain assumptions are
 453  	// met.  For example, when both points have the same z value, arithmetic
 454  	// on the z values can be avoided.  This section thus checks for these
 455  	// conditions and calls an appropriate add function which is accelerated
 456  	// by using those assumptions.
 457  	isZ1One := p1.Z.IsOne()
 458  	isZ2One := p2.Z.IsOne()
 459  	switch {
 460  	case isZ1One && isZ2One:
 461  		addZ1AndZ2EqualsOne(p1, p2, result)
 462  		return
 463  	case p1.Z.Equals(&p2.Z):
 464  		addZ1EqualsZ2(p1, p2, result)
 465  		return
 466  	case isZ2One:
 467  		addZ2EqualsOne(p1, p2, result)
 468  		return
 469  	}
 470  	// None of the above assumptions are true, so fall back to generic
 471  	// point addition.
 472  	addGeneric(p1, p2, result)
 473  }
 474  
 475  // doubleZ1EqualsOne performs point doubling on the passed Jacobian point when
 476  // the point is already known to have a z value of 1 and stores the result in
 477  // the provided result param.  That is to say result = 2*p.  It performs faster
 478  // point doubling than the generic routine since less arithmetic is needed due
 479  // to the ability to avoid multiplication by the z value.
 480  //
 481  // NOTE: The resulting point will be normalized.
 482  func doubleZ1EqualsOne(p, result *JacobianPoint) {
 483  	// This function uses the assumptions that z1 is 1, thus the point
 484  	// doubling formulas reduce to:
 485  	//
 486  	// X3 = (3*X1^2)^2 - 8*X1*Y1^2
 487  	// Y3 = (3*X1^2)*(4*X1*Y1^2 - X3) - 8*Y1^4
 488  	// Z3 = 2*Y1
 489  	//
 490  	// To compute the above efficiently, this implementation splits the
 491  	// equation into intermediate elements which are used to minimize the
 492  	// number of field multiplications in favor of field squarings which
 493  	// are roughly 35% faster than field multiplications with the current
 494  	// implementation at the time this was written.
 495  	//
 496  	// This uses a slightly modified version of the method shown at:
 497  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-mdbl-2007-bl
 498  	//
 499  	// In particular it performs the calculations using the following:
 500  	// A = X1^2, B = Y1^2, C = B^2, D = 2*((X1+B)^2-A-C)
 501  	// E = 3*A, ToSliceOfBytes = E^2, X3 = ToSliceOfBytes-2*D, Y3 = E*(D-X3)-8*C
 502  	// Z3 = 2*Y1
 503  	//
 504  	// This results in a cost of 1 field multiplication, 5 field squarings,
 505  	// 6 field additions, and 5 integer multiplications.
 506  	x1, y1 := &p.X, &p.Y
 507  	x3, y3, z3 := &result.X, &result.Y, &result.Z
 508  	var a, b, c, d, err, f FieldVal
 509  	z3.Set(y1).MulInt(2)                     // Z3 = 2*Y1 (mag: 2)
 510  	a.SquareVal(x1)                          // A = X1^2 (mag: 1)
 511  	b.SquareVal(y1)                          // B = Y1^2 (mag: 1)
 512  	c.SquareVal(&b)                          // C = B^2 (mag: 1)
 513  	b.Add(x1).Square()                       // B = (X1+B)^2 (mag: 1)
 514  	d.Set(&a).Add(&c).Negate(2)              // D = -(A+C) (mag: 3)
 515  	d.Add(&b).MulInt(2)                      // D = 2*(B+D)(mag: 8)
 516  	err.Set(&a).MulInt(3)                    // E = 3*A (mag: 3)
 517  	f.SquareVal(&err)                        // ToSliceOfBytes = E^2 (mag: 1)
 518  	x3.Set(&d).MulInt(2).Negate(16)          // X3 = -(2*D) (mag: 17)
 519  	x3.Add(&f)                               // X3 = ToSliceOfBytes+X3 (mag: 18)
 520  	f.Set(x3).Negate(18).Add(&d).Normalize() // ToSliceOfBytes = D-X3 (mag: 1)
 521  	y3.Set(&c).MulInt(8).Negate(8)           // Y3 = -(8*C) (mag: 9)
 522  	y3.Add(f.Mul(&err))                      // Y3 = E*ToSliceOfBytes+Y3 (mag: 10)
 523  	// Normalize the resulting field values as needed.
 524  	x3.Normalize()
 525  	y3.Normalize()
 526  	z3.Normalize()
 527  }
 528  
 529  // doubleGeneric performs point doubling on the passed Jacobian point without
 530  // any assumptions about the z value and stores the result in the provided
 531  // result param.  That is to say result = 2*p.  It is the slowest of the point
 532  // doubling routines due to requiring the most arithmetic.
 533  //
 534  // NOTE: The resulting point will be normalized.
 535  func doubleGeneric(p, result *JacobianPoint) {
 536  	// Point doubling formula for Jacobian coordinates for the secp256k1
 537  	// curve:
 538  	//
 539  	// X3 = (3*X1^2)^2 - 8*X1*Y1^2
 540  	// Y3 = (3*X1^2)*(4*X1*Y1^2 - X3) - 8*Y1^4
 541  	// Z3 = 2*Y1*Z1
 542  	//
 543  	// To compute the above efficiently, this implementation splits the
 544  	// equation into intermediate elements which are used to minimize the
 545  	// number of field multiplications in favor of field squarings which
 546  	// are roughly 35% faster than field multiplications with the current
 547  	// implementation at the time this was written.
 548  	//
 549  	// This uses a slightly modified version of the method shown at:
 550  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
 551  	//
 552  	// In particular it performs the calculations using the following:
 553  	// A = X1^2, B = Y1^2, C = B^2, D = 2*((X1+B)^2-A-C)
 554  	// E = 3*A, ToSliceOfBytes = E^2, X3 = ToSliceOfBytes-2*D, Y3 = E*(D-X3)-8*C
 555  	// Z3 = 2*Y1*Z1
 556  	//
 557  	// This results in a cost of 1 field multiplication, 5 field squarings,
 558  	// 6 field additions, and 5 integer multiplications.
 559  	x1, y1, z1 := &p.X, &p.Y, &p.Z
 560  	x3, y3, z3 := &result.X, &result.Y, &result.Z
 561  	var a, b, c, d, err, f FieldVal
 562  	z3.Mul2(y1, z1).MulInt(2)                // Z3 = 2*Y1*Z1 (mag: 2)
 563  	a.SquareVal(x1)                          // A = X1^2 (mag: 1)
 564  	b.SquareVal(y1)                          // B = Y1^2 (mag: 1)
 565  	c.SquareVal(&b)                          // C = B^2 (mag: 1)
 566  	b.Add(x1).Square()                       // B = (X1+B)^2 (mag: 1)
 567  	d.Set(&a).Add(&c).Negate(2)              // D = -(A+C) (mag: 3)
 568  	d.Add(&b).MulInt(2)                      // D = 2*(B+D)(mag: 8)
 569  	err.Set(&a).MulInt(3)                    // E = 3*A (mag: 3)
 570  	f.SquareVal(&err)                        // ToSliceOfBytes = E^2 (mag: 1)
 571  	x3.Set(&d).MulInt(2).Negate(16)          // X3 = -(2*D) (mag: 17)
 572  	x3.Add(&f)                               // X3 = ToSliceOfBytes+X3 (mag: 18)
 573  	f.Set(x3).Negate(18).Add(&d).Normalize() // ToSliceOfBytes = D-X3 (mag: 1)
 574  	y3.Set(&c).MulInt(8).Negate(8)           // Y3 = -(8*C) (mag: 9)
 575  	y3.Add(f.Mul(&err))                      // Y3 = E*ToSliceOfBytes+Y3 (mag: 10)
 576  	// Normalize the resulting field values as needed.
 577  	x3.Normalize()
 578  	y3.Normalize()
 579  	z3.Normalize()
 580  }
 581  
 582  // DoubleNonConst doubles the passed Jacobian point and stores the result in the
 583  // provided result parameter in *non-constant* time.
 584  //
 585  // NOTE: The point must be normalized for this function to return the correct
 586  // result.  The resulting point will be normalized.
 587  func DoubleNonConst(p, result *JacobianPoint) {
 588  	// Doubling the point at infinity is still infinity.
 589  	if p.Y.IsZero() || p.Z.IsZero() {
 590  		result.X.SetInt(0)
 591  		result.Y.SetInt(0)
 592  		result.Z.SetInt(0)
 593  		return
 594  	}
 595  	// Slightly faster point doubling can be achieved when the z value is 1
 596  	// by avoiding the multiplication on the z value.  This section calls
 597  	// a point doubling function which is accelerated by using that
 598  	// assumption when possible.
 599  	if p.Z.IsOne() {
 600  		doubleZ1EqualsOne(p, result)
 601  		return
 602  	}
 603  	// Fall back to generic point doubling which works with arbitrary z
 604  	// values.
 605  	doubleGeneric(p, result)
 606  }
 607  
 608  // mulAdd64 multiplies the two passed base 2^64 digits together, adds the given
 609  // value to the result, and returns the 128-bit result via a (hi, lo) tuple
 610  // where the upper half of the bits are returned in hi and the lower half in lo.
 611  func mulAdd64(digit1, digit2, m uint64) (hi, lo uint64) {
 612  	// Note the carry on the final add is safe to discard because the maximum
 613  	// possible value is:
 614  	//   (2^64 - 1)(2^64 - 1) + (2^64 - 1) = 2^128 - 2^64
 615  	// and:
 616  	//   2^128 - 2^64 < 2^128.
 617  	var c uint64
 618  	hi, lo = bits.Mul64(digit1, digit2)
 619  	lo, c = bits.Add64(lo, m, 0)
 620  	hi, _ = bits.Add64(hi, 0, c)
 621  	return hi, lo
 622  }
 623  
 624  // mulAdd64Carry multiplies the two passed base 2^64 digits together, adds both
 625  // the given value and carry to the result, and returns the 128-bit result via a
 626  // (hi, lo) tuple where the upper half of the bits are returned in hi and the
 627  // lower half in lo.
 628  func mulAdd64Carry(digit1, digit2, m, c uint64) (hi, lo uint64) {
 629  	// Note the carry on the high order add is safe to discard because the
 630  	// maximum possible value is:
 631  	//   (2^64 - 1)(2^64 - 1) + 2*(2^64 - 1) = 2^128 - 1
 632  	// and:
 633  	//   2^128 - 1 < 2^128.
 634  	var c2 uint64
 635  	hi, lo = mulAdd64(digit1, digit2, m)
 636  	lo, c2 = bits.Add64(lo, c, 0)
 637  	hi, _ = bits.Add64(hi, 0, c2)
 638  	return hi, lo
 639  }
 640  
 641  // mul512Rsh320Round computes the full 512-bit product of the two given scalars,
 642  // right shifts the result by 320 bits, rounds to the nearest integer, and
 643  // returns the result in constant time.
 644  //
 645  // Note that despite the inputs and output being mod n scalars, the 512-bit
 646  // product is NOT reduced mod N prior to the right shift.  This is intentional
 647  // because it is used for replacing division with multiplication and thus the
 648  // intermediate results must be done via a field extension to a larger field.
 649  func mul512Rsh320Round(n1, n2 *ModNScalar) ModNScalar {
 650  	// Convert n1 and n2 to base 2^64 digits.
 651  	n1Digit0 := uint64(n1.n[0]) | uint64(n1.n[1])<<32
 652  	n1Digit1 := uint64(n1.n[2]) | uint64(n1.n[3])<<32
 653  	n1Digit2 := uint64(n1.n[4]) | uint64(n1.n[5])<<32
 654  	n1Digit3 := uint64(n1.n[6]) | uint64(n1.n[7])<<32
 655  	n2Digit0 := uint64(n2.n[0]) | uint64(n2.n[1])<<32
 656  	n2Digit1 := uint64(n2.n[2]) | uint64(n2.n[3])<<32
 657  	n2Digit2 := uint64(n2.n[4]) | uint64(n2.n[5])<<32
 658  	n2Digit3 := uint64(n2.n[6]) | uint64(n2.n[7])<<32
 659  	// Compute the full 512-bit product n1*n2.
 660  	var r0, r1, r2, r3, r4, r5, r6, r7, c uint64
 661  	// Terms resulting from the product of the first digit of the second number
 662  	// by all digits of the first number.
 663  	//
 664  	// Note that r0 is ignored because it is not needed to compute the higher
 665  	// terms and it is shifted out below anyway.
 666  	c, _ = bits.Mul64(n2Digit0, n1Digit0)
 667  	c, r1 = mulAdd64(n2Digit0, n1Digit1, c)
 668  	c, r2 = mulAdd64(n2Digit0, n1Digit2, c)
 669  	r4, r3 = mulAdd64(n2Digit0, n1Digit3, c)
 670  	// Terms resulting from the product of the second digit of the second number
 671  	// by all digits of the first number.
 672  	//
 673  	// Note that r1 is ignored because it is no longer needed to compute the
 674  	// higher terms and it is shifted out below anyway.
 675  	c, _ = mulAdd64(n2Digit1, n1Digit0, r1)
 676  	c, r2 = mulAdd64Carry(n2Digit1, n1Digit1, r2, c)
 677  	c, r3 = mulAdd64Carry(n2Digit1, n1Digit2, r3, c)
 678  	r5, r4 = mulAdd64Carry(n2Digit1, n1Digit3, r4, c)
 679  	// Terms resulting from the product of the third digit of the second number
 680  	// by all digits of the first number.
 681  	//
 682  	// Note that r2 is ignored because it is no longer needed to compute the
 683  	// higher terms and it is shifted out below anyway.
 684  	c, _ = mulAdd64(n2Digit2, n1Digit0, r2)
 685  	c, r3 = mulAdd64Carry(n2Digit2, n1Digit1, r3, c)
 686  	c, r4 = mulAdd64Carry(n2Digit2, n1Digit2, r4, c)
 687  	r6, r5 = mulAdd64Carry(n2Digit2, n1Digit3, r5, c)
 688  	// Terms resulting from the product of the fourth digit of the second number
 689  	// by all digits of the first number.
 690  	//
 691  	// Note that r3 is ignored because it is no longer needed to compute the
 692  	// higher terms and it is shifted out below anyway.
 693  	c, _ = mulAdd64(n2Digit3, n1Digit0, r3)
 694  	c, r4 = mulAdd64Carry(n2Digit3, n1Digit1, r4, c)
 695  	c, r5 = mulAdd64Carry(n2Digit3, n1Digit2, r5, c)
 696  	r7, r6 = mulAdd64Carry(n2Digit3, n1Digit3, r6, c)
 697  	// At this point the upper 256 bits of the full 512-bit product n1*n2 are in
 698  	// r4..r7 (recall the low order results were discarded as noted above).
 699  	//
 700  	// Right shift the result 320 bits.  Note that the MSB of r4 determines
 701  	// whether or not to round because it is the final bit that is shifted out.
 702  	//
 703  	// Also, notice that r3..r7 would also ordinarily be set to 0 as well for
 704  	// the full shift, but that is skipped since they are no longer used as
 705  	// their values are known to be zero.
 706  	roundBit := r4 >> 63
 707  	r2, r1, r0 = r7, r6, r5
 708  	// Conditionally add 1 depending on the round bit in constant time.
 709  	r0, c = bits.Add64(r0, roundBit, 0)
 710  	r1, c = bits.Add64(r1, 0, c)
 711  	r2, r3 = bits.Add64(r2, 0, c)
 712  	// Finally, convert the result to a mod n scalar.
 713  	//
 714  	// No modular reduction is needed because the result is guaranteed to be
 715  	// less than the group order given the group order is > 2^255 and the
 716  	// maximum possible value of the result is 2^192.
 717  	var result ModNScalar
 718  	result.n[0] = uint32(r0)
 719  	result.n[1] = uint32(r0 >> 32)
 720  	result.n[2] = uint32(r1)
 721  	result.n[3] = uint32(r1 >> 32)
 722  	result.n[4] = uint32(r2)
 723  	result.n[5] = uint32(r2 >> 32)
 724  	result.n[6] = uint32(r3)
 725  	result.n[7] = uint32(r3 >> 32)
 726  	return result
 727  }
 728  
 729  // splitK returns two scalars (k1 and k2) that are a balanced length-two
 730  // representation of the provided scalar such that k ≡ k1 + k2*λ (mod N), where
 731  // N is the secp256k1 group order.
 732  func splitK(k *ModNScalar) (ModNScalar, ModNScalar) {
 733  	// The ultimate goal is to decompose k into two scalars that are around
 734  	// half the bit length of k such that the following equation is satisfied:
 735  	//
 736  	// k1 + k2*λ ≡ k (mod n)
 737  	//
 738  	// The strategy used here is based on algorithm 3.74 from [GECC] with a few
 739  	// modifications to make use of the more efficient mod n scalar type, avoid
 740  	// some costly long divisions, and minimize the number of calculations.
 741  	//
 742  	// Start by defining a function that takes a vector v = <a,b> ∈ ℤ⨯ℤ:
 743  	//
 744  	// f(v) = a + bλ (mod n)
 745  	//
 746  	// Then, find two vectors, v1 = <a1,b1>, and v2 = <a2,b2> in ℤ⨯ℤ such that:
 747  	// 1) v1 and v2 are linearly independent
 748  	// 2) f(v1) = f(v2) = 0
 749  	// 3) v1 and v2 have small Euclidean norm
 750  	//
 751  	// The vectors that satisfy these properties are found via the Euclidean
 752  	// algorithm and are precomputed since both n and λ are fixed values for the
 753  	// secp256k1 curve.  See genprecomps.go for derivation details.
 754  	//
 755  	// Next, consider k as a vector <k, 0> in ℚ⨯ℚ and by linear algebra write:
 756  	//
 757  	// <k, 0> = g1*v1 + g2*v2, where g1, g2 ∈ ℚ
 758  	//
 759  	// Note that, per above, the components of vector v1 are a1 and b1 while the
 760  	// components of vector v2 are a2 and b2.  Given the vectors v1 and v2 were
 761  	// generated such that a1*b2 - a2*b1 = n, solving the equation for g1 and g2
 762  	// yields:
 763  	//
 764  	// g1 = b2*k / n
 765  	// g2 = -b1*k / n
 766  	//
 767  	// Observe:
 768  	// <k, 0> = g1*v1 + g2*v2
 769  	//        = (b2*k/n)*<a1,b1> + (-b1*k/n)*<a2,b2>              | substitute
 770  	//        = <a1*b2*k/n, b1*b2*k/n> + <-a2*b1*k/n, -b2*b1*k/n> | scalar mul
 771  	//        = <a1*b2*k/n - a2*b1*k/n, b1*b2*k/n - b2*b1*k/n>    | vector add
 772  	//        = <[a1*b2*k - a2*b1*k]/n, 0>                        | simplify
 773  	//        = <k*[a1*b2 - a2*b1]/n, 0>                          | factor out k
 774  	//        = <k*n/n, 0>                                        | substitute
 775  	//        = <k, 0>                                            | simplify
 776  	//
 777  	// Now, consider an integer-valued vector v:
 778  	//
 779  	// v = c1*v1 + c2*v2, where c1, c2 ∈ ℤ (mod n)
 780  	//
 781  	// Since vectors v1 and v2 are linearly independent and were generated such
 782  	// that f(v1) = f(v2) = 0, all possible scalars c1 and c2 also produce a
 783  	// vector v such that f(v) = 0.
 784  	//
 785  	// In other words, c1 and c2 can be any integers and the resulting
 786  	// decomposition will still satisfy the required equation.  However, since
 787  	// the goal is to produce a balanced decomposition that provides a
 788  	// performance advantage by minimizing max(k1, k2), c1 and c2 need to be
 789  	// integers close to g1 and g2, respectively, so the resulting vector v is
 790  	// an integer-valued vector that is close to <k, 0>.
 791  	//
 792  	// Finally, consider the vector u:
 793  	//
 794  	// u  = <k, 0> - v
 795  	//
 796  	// It follows that f(u) = k and thus the two components of vector u satisfy
 797  	// the required equation:
 798  	//
 799  	// k1 + k2*λ ≡ k (mod n)
 800  	//
 801  	// Choosing c1 and c2:
 802  	// -------------------
 803  	//
 804  	// As mentioned above, c1 and c2 need to be integers close to g1 and g2,
 805  	// respectively.  The algorithm in [GECC] chooses the following values:
 806  	//
 807  	// c1 = round(g1) = round(b2*k / n)
 808  	// c2 = round(g2) = round(-b1*k / n)
 809  	//
 810  	// However, as section 3.4.2 of [STWS] notes, the aforementioned approach
 811  	// requires costly long divisions that can be avoided by precomputing
 812  	// rounded estimates as follows:
 813  	//
 814  	// t = bitlen(n) + 1
 815  	// z1 = round(2^t * b2 / n)
 816  	// z2 = round(2^t * -b1 / n)
 817  	//
 818  	// Then, use those precomputed estimates to perform a multiplication by k
 819  	// along with a floored division by 2^t, which is a simple right shift by t:
 820  	//
 821  	// c1 = floor(k * z1 / 2^t) = (k * z1) >> t
 822  	// c2 = floor(k * z2 / 2^t) = (k * z2) >> t
 823  	//
 824  	// Finally, round up if last bit discarded in the right shift by t is set by
 825  	// adding 1.
 826  	//
 827  	// As a further optimization, rather than setting t = bitlen(n) + 1 = 257 as
 828  	// stated by [STWS], this implementation uses a higher precision estimate of
 829  	// t = bitlen(n) + 64 = 320 because it allows simplification of the shifts
 830  	// in the internal calculations that are done via uint64s and also allows
 831  	// the use of floor in the precomputations.
 832  	//
 833  	// Thus, the calculations this implementation uses are:
 834  	//
 835  	// z1 = floor(b2<<320 / n)                                     | precomputed
 836  	// z2 = floor((-b1)<<320) / n)                                 | precomputed
 837  	// c1 = ((k * z1) >> 320) + (((k * z1) >> 319) & 1)
 838  	// c2 = ((k * z2) >> 320) + (((k * z2) >> 319) & 1)
 839  	//
 840  	// Putting it all together:
 841  	// ------------------------
 842  	//
 843  	// Calculate the following vectors using the values discussed above:
 844  	//
 845  	// v = c1*v1 + c2*v2
 846  	// u = <k, 0> - v
 847  	//
 848  	// The two components of the resulting vector v are:
 849  	// va = c1*a1 + c2*a2
 850  	// vb = c1*b1 + c2*b2
 851  	//
 852  	// Thus, the two components of the resulting vector u are:
 853  	// k1 = k - va
 854  	// k2 = 0 - vb = -vb
 855  	//
 856  	// As some final optimizations:
 857  	//
 858  	// 1) Note that k1 + k2*λ ≡ k (mod n) means that k1 ≡ k - k2*λ (mod n).
 859  	//    Therefore, the computation of va can be avoided to save two
 860  	//    field multiplications and a field addition.
 861  	//
 862  	// 2) Since k1 = k - k2*λ = k + k2*(-λ), an additional field negation is
 863  	//    saved by storing and using the negative version of λ.
 864  	//
 865  	// 3) Since k2 = -vb = -(c1*b1 + c2*b2) = c1*(-b1) + c2*(-b2), one more
 866  	//    field negation is saved by storing and using the negative versions of
 867  	//    b1 and b2.
 868  	//
 869  	// k2 = c1*(-b1) + c2*(-b2)
 870  	// k1 = k + k2*(-λ)
 871  	var k1, k2 ModNScalar
 872  	c1 := mul512Rsh320Round(k, endoZ1)
 873  	c2 := mul512Rsh320Round(k, endoZ2)
 874  	k2.Add2(c1.Mul(endoNegB1), c2.Mul(endoNegB2))
 875  	k1.Mul2(&k2, endoNegLambda).Add(k)
 876  	return k1, k2
 877  }
 878  
 879  // nafScalar represents a positive integer up to a maximum value of 2^256 - 1
 880  // encoded in non-adjacent form.
 881  //
 882  // NAF is a signed-digit representation where each digit can be +1, 0, or -1.
 883  //
 884  // In order to efficiently encode that information, this type uses two arrays, a
 885  // "positive" array where set bits represent the +1 signed digits and a
 886  // "negative" array where set bits represent the -1 signed digits.  0 is
 887  // represented by neither array having a bit set in that position.
 888  //
 889  // The Pos and Neg methods return the aforementioned positive and negative
 890  // arrays, respectively.
 891  type nafScalar struct {
 892  	// pos houses the positive portion of the representation.  An additional
 893  	// byte is required for the positive portion because the NAF encoding can be
 894  	// up to 1 bit longer than the normal binary encoding of the value.
 895  	//
 896  	// neg houses the negative portion of the representation.  Even though the
 897  	// additional byte is not required for the negative portion, since it can
 898  	// never exceed the length of the normal binary encoding of the value,
 899  	// keeping the same length for positive and negative portions simplifies
 900  	// working with the representation and allows extra conditional branches to
 901  	// be avoided.
 902  	//
 903  	// start and end specify the starting and ending index to use within the pos
 904  	// and neg arrays, respectively.  This allows fixed size arrays to be used
 905  	// versus needing to dynamically allocate space on the heap.
 906  	//
 907  	// NOTE: The fields are defined in the order that they are to minimize the
 908  	// padding on 32-bit and 64-bit platforms.
 909  	pos        [33]byte
 910  	start, end uint8
 911  	neg        [33]byte
 912  }
 913  
 914  // Pos returns the bytes of the encoded value with bits set in the positions
 915  // that represent a signed digit of +1.
 916  func (s *nafScalar) Pos() []byte { return s.pos[s.start:s.end] }
 917  
 918  // Neg returns the bytes of the encoded value with bits set in the positions
 919  // that represent a signed digit of -1.
 920  func (s *nafScalar) Neg() []byte { return s.neg[s.start:s.end] }
 921  
 922  // naf takes a positive integer up to a maximum value of 2^256 - 1 and returns
 923  // its non-adjacent form (NAF), which is a unique signed-digit representation
 924  // such that no two consecutive digits are nonzero.  See the documentation for
 925  // the returned type for details on how the representation is encoded
 926  // efficiently and how to interpret it
 927  //
 928  // NAF is useful in that it has the fewest nonzero digits of any signed digit
 929  // representation, only 1/3rd of its digits are nonzero on average, and at least
 930  // half of the digits will be 0.
 931  //
 932  // The aforementioned properties are particularly beneficial for optimizing
 933  // elliptic curve point multiplication because they effectively minimize the
 934  // number of required point additions in exchange for needing to perform a mix
 935  // of fewer point additions and subtractions and possibly one additional point
 936  // doubling.  This is an excellent tradeoff because subtraction of points has
 937  // the same computational complexity as addition of points and point doubling is
 938  // faster than both.
 939  func naf(k []byte) nafScalar {
 940  	// Strip leading zero bytes.
 941  	for len(k) > 0 && k[0] == 0x00 {
 942  		k = k[1:]
 943  	}
 944  	// The non-adjacent form (NAF) of a positive integer k is an expression
 945  	// k = ∑_(i=0, l-1) k_i * 2^i where k_i ∈ {0,±1}, k_(l-1) != 0, and no two
 946  	// consecutive digits k_i are nonzero.
 947  	//
 948  	// The traditional method of computing the NAF of a positive integer is
 949  	// given by algorithm 3.30 in [GECC].  It consists of repeatedly dividing k
 950  	// by 2 and choosing the remainder so that the quotient (k−r)/2 is even
 951  	// which ensures the next NAF digit is 0.  This requires log_2(k) steps.
 952  	//
 953  	// However, in [BRID], Prodinger notes that a closed form expression for the
 954  	// NAF representation is the bitwise difference 3k/2 - k/2.  This is more
 955  	// efficient as it can be computed in O(1) versus the O(log(n)) of the
 956  	// traditional approach.
 957  	//
 958  	// The following code makes use of that formula to compute the NAF more
 959  	// efficiently.
 960  	//
 961  	// To understand the logic here, observe that the only way the NAF has a
 962  	// nonzero digit at a given bit is when either 3k/2 or k/2 has a bit set in
 963  	// that position, but not both.  In other words, the result of a bitwise
 964  	// xor.  This can be seen simply by considering that when the bits are the
 965  	// same, the subtraction is either 0-0 or 1-1, both of which are 0.
 966  	//
 967  	// Further, observe that the "+1" digits in the result are contributed by
 968  	// 3k/2 while the "-1" digits are from k/2.  So, they can be determined by
 969  	// taking the bitwise and of each respective value with the result of the
 970  	// xor which identifies which bits are nonzero.
 971  	//
 972  	// Using that information, this loops backwards from the least significant
 973  	// byte to the most significant byte while performing the aforementioned
 974  	// calculations by propagating the potential carry and high order bit from
 975  	// the next word during the right shift.
 976  	kLen := len(k)
 977  	var result nafScalar
 978  	var carry uint8
 979  	for byteNum := kLen - 1; byteNum >= 0; byteNum-- {
 980  		// Calculate k/2.  Notice the carry from the previous word is added and
 981  		// the low order bit from the next word is shifted in accordingly.
 982  		kc := uint16(k[byteNum]) + uint16(carry)
 983  		var nextWord uint8
 984  		if byteNum > 0 {
 985  			nextWord = k[byteNum-1]
 986  		}
 987  		halfK := kc>>1 | uint16(nextWord<<7)
 988  		// Calculate 3k/2 and determine the non-zero digits in the result.
 989  		threeHalfK := kc + halfK
 990  		nonZeroResultDigits := threeHalfK ^ halfK
 991  		// Determine the signed digits {0, ±1}.
 992  		result.pos[byteNum+1] = uint8(threeHalfK & nonZeroResultDigits)
 993  		result.neg[byteNum+1] = uint8(halfK & nonZeroResultDigits)
 994  		// Propagate the potential carry from the 3k/2 calculation.
 995  		carry = uint8(threeHalfK >> 8)
 996  	}
 997  	result.pos[0] = carry
 998  	// Set the starting and ending positions within the fixed size arrays to
 999  	// identify the bytes that are actually used.  This is important since the
1000  	// encoding is big endian and thus trailing zero bytes changes its value.
1001  	result.start = 1 - carry
1002  	result.end = uint8(kLen + 1)
1003  	return result
1004  }
1005  
1006  // ScalarMultNonConst multiplies k*P where k is a scalar modulo the curve order
1007  // and P is a point in Jacobian projective coordinates and stores the result in
1008  // the provided Jacobian point.
1009  //
1010  // NOTE: The point must be normalized for this function to return the correct
1011  // result.  The resulting point will be normalized.
1012  func ScalarMultNonConst(k *ModNScalar, point, result *JacobianPoint) {
1013  	// -------------------------------------------------------------------------
1014  	// This makes use of the following efficiently-computable endomorphism to
1015  	// accelerate the computation:
1016  	//
1017  	// φ(P) ⟼ λ*P = (β*P.x mod p, P.y)
1018  	//
1019  	// In other words, there is a special scalar λ that every point on the
1020  	// elliptic curve can be multiplied by that will result in the same point as
1021  	// performing a single field multiplication of the point's X coordinate by
1022  	// the special value β.
1023  	//
1024  	// This is useful because scalar point multiplication is significantly more
1025  	// expensive than a single field multiplication given the former involves a
1026  	// series of point doublings and additions which themselves consist of a
1027  	// combination of several field multiplications, squarings, and additions.
1028  	//
1029  	// So, the idea behind making use of the endomorphism is thus to decompose
1030  	// the scalar into two scalars that are each about half the bit length of
1031  	// the original scalar such that:
1032  	//
1033  	// k ≡ k1 + k2*λ (mod n)
1034  	//
1035  	// This in turn allows the scalar point multiplication to be performed as a
1036  	// sum of two smaller half-length multiplications as follows:
1037  	//
1038  	// k*P = (k1 + k2*λ)*P
1039  	//     = k1*P + k2*λ*P
1040  	//     = k1*P + k2*φ(P)
1041  	//
1042  	// Thus, a speedup is achieved so long as it's faster to decompose the
1043  	// scalar, compute φ(P), and perform a simultaneous multiply of the
1044  	// half-length point multiplications than it is to compute a full width
1045  	// point multiplication.
1046  	//
1047  	// In practice, benchmarks show the current implementation provides a
1048  	// speedup of around 30-35% versus not using the endomorphism.
1049  	//
1050  	// See section 3.5 in [GECC] for a more rigorous treatment.
1051  	// -------------------------------------------------------------------------
1052  
1053  	// Per above, the main equation here to remember is:
1054  	//   k*P = k1*P + k2*φ(P)
1055  	//
1056  	// p1 below is P in the equation while p2 is φ(P) in the equation.
1057  	//
1058  	// NOTE: φ(x,y) = (β*x,y).  The Jacobian z coordinates are the same, so this
1059  	// math goes through.
1060  	//
1061  	// Also, calculate -p1 and -p2 for use in the NAF optimization.
1062  	p1, p1Neg := &JacobianPoint{}, &JacobianPoint{}
1063  	p1.Set(point)
1064  	p1Neg.Set(p1)
1065  	p1Neg.Y.Negate(1).Normalize()
1066  	p2, p2Neg := &JacobianPoint{}, &JacobianPoint{}
1067  	p2.Set(p1)
1068  	p2.X.Mul(endoBeta).Normalize()
1069  	p2Neg.Set(p2)
1070  	p2Neg.Y.Negate(1).Normalize()
1071  	// Decompose k into k1 and k2 such that k = k1 + k2*λ (mod n) where k1 and
1072  	// k2 are around half the bit length of k in order to halve the number of EC
1073  	// operations.
1074  	//
1075  	// Notice that this also flips the sign of the scalars and points as needed
1076  	// to minimize the bit lengths of the scalars k1 and k2.
1077  	//
1078  	// This is done because the scalars are operating modulo the group order
1079  	// which means that when they would otherwise be a small negative magnitude
1080  	// they will instead be a large positive magnitude.  Since the goal is for
1081  	// the scalars to have a small magnitude to achieve a performance boost, use
1082  	// their negation when they are greater than the half order of the group and
1083  	// flip the positive and negative values of the corresponding point that
1084  	// will be multiplied by to compensate.
1085  	//
1086  	// In other words, transform the calc when k1 is over the half order to:
1087  	//   k1*P = -k1*-P
1088  	//
1089  	// Similarly, transform the calc when k2 is over the half order to:
1090  	//   k2*φ(P) = -k2*-φ(P)
1091  	k1, k2 := splitK(k)
1092  	if k1.IsOverHalfOrder() {
1093  		k1.Negate()
1094  		p1, p1Neg = p1Neg, p1
1095  	}
1096  	if k2.IsOverHalfOrder() {
1097  		k2.Negate()
1098  		p2, p2Neg = p2Neg, p2
1099  	}
1100  	// Convert k1 and k2 into their NAF representations since NAF has a lot more
1101  	// zeros overall on average which minimizes the number of required point
1102  	// additions in exchange for a mix of fewer point additions and subtractions
1103  	// at the cost of one additional point doubling.
1104  	//
1105  	// This is an excellent tradeoff because subtraction of points has the same
1106  	// computational complexity as addition of points and point doubling is
1107  	// faster than both.
1108  	//
1109  	// Concretely, on average, 1/2 of all bits will be non-zero with the normal
1110  	// binary representation whereas only 1/3rd of the bits will be non-zero
1111  	// with NAF.
1112  	//
1113  	// The Pos version of the bytes contain the +1s and the Neg versions contain
1114  	// the -1s.
1115  	k1Bytes, k2Bytes := k1.Bytes(), k2.Bytes()
1116  	k1NAF, k2NAF := naf(k1Bytes[:]), naf(k2Bytes[:])
1117  	k1PosNAF, k1NegNAF := k1NAF.Pos(), k1NAF.Neg()
1118  	k2PosNAF, k2NegNAF := k2NAF.Pos(), k2NAF.Neg()
1119  	k1Len, k2Len := len(k1PosNAF), len(k2PosNAF)
1120  	// Add left-to-right using the NAF optimization.  See algorithm 3.77 from
1121  	// [GECC].
1122  	//
1123  	// Point Q = ∞ (point at infinity).
1124  	var q JacobianPoint
1125  	m := k1Len
1126  	if m < k2Len {
1127  		m = k2Len
1128  	}
1129  	for i := 0; i < m; i++ {
1130  		// Since k1 and k2 are potentially different lengths and the calculation
1131  		// is being done left to right, pad the front of the shorter one with
1132  		// 0s.
1133  		var k1BytePos, k1ByteNeg, k2BytePos, k2ByteNeg byte
1134  		if i >= m-k1Len {
1135  			k1BytePos, k1ByteNeg = k1PosNAF[i-(m-k1Len)], k1NegNAF[i-(m-k1Len)]
1136  		}
1137  		if i >= m-k2Len {
1138  			k2BytePos, k2ByteNeg = k2PosNAF[i-(m-k2Len)], k2NegNAF[i-(m-k2Len)]
1139  		}
1140  		for mask := uint8(1 << 7); mask > 0; mask >>= 1 {
1141  			// Q = 2 * Q
1142  			DoubleNonConst(&q, &q)
1143  			// Add or subtract the first point based on the signed digit of the
1144  			// NAF representation of k1 at this bit position.
1145  			//
1146  			// +1: Q = Q + p1
1147  			// -1: Q = Q - p1
1148  			//  0: Q = Q (no change)
1149  			if k1BytePos&mask == mask {
1150  				AddNonConst(&q, p1, &q)
1151  			} else if k1ByteNeg&mask == mask {
1152  				AddNonConst(&q, p1Neg, &q)
1153  			}
1154  			// Add or subtract the second point based on the signed digit of the
1155  			// NAF representation of k2 at this bit position.
1156  			//
1157  			// +1: Q = Q + p2
1158  			// -1: Q = Q - p2
1159  			//  0: Q = Q (no change)
1160  			if k2BytePos&mask == mask {
1161  				AddNonConst(&q, p2, &q)
1162  			} else if k2ByteNeg&mask == mask {
1163  				AddNonConst(&q, p2Neg, &q)
1164  			}
1165  		}
1166  	}
1167  	result.Set(&q)
1168  }
1169  
1170  // ScalarBaseMultNonConst multiplies k*G where k is a scalar modulo the curve
1171  // order and G is the base point of the group and stores the result in the
1172  // provided Jacobian point.
1173  //
1174  // NOTE: The resulting point will be normalized.
1175  func ScalarBaseMultNonConst(k *ModNScalar, result *JacobianPoint) {
1176  	bytePoints := getBytePointTable()
1177  	// Start with the point at infinity.
1178  	result.X.Zero()
1179  	result.Y.Zero()
1180  	result.Z.Zero()
1181  	// bytePoints has all 256 byte points for each 8-bit window.  The strategy
1182  	// is to add up the byte points.  This is best understood by expressing k in
1183  	// base-256 which it already sort of is.  Each "digit" in the 8-bit window
1184  	// can be looked up using bytePoints and added together.
1185  	kb := k.Bytes()
1186  	for i := 0; i < len(kb); i++ {
1187  		pt := &bytePoints[i][kb[i]]
1188  		AddNonConst(result, pt, result)
1189  	}
1190  }
1191  
1192  // isOnCurve returns whether or not the affine point (x,y) is on the curve.
1193  func isOnCurve(fx, fy *FieldVal) bool {
1194  	// Elliptic curve equation for secp256k1 is: y^2 = x^3 + 7
1195  	y2 := (&FieldVal{}).SquareVal(fy).Normalize()
1196  	result := (&FieldVal{}).SquareVal(fx).Mul(fx).AddInt(7).Normalize()
1197  	return y2.Equals(result)
1198  }
1199  
1200  // DecompressY attempts to calculate the Y coordinate for the given X coordinate
1201  // such that the result pair is a point on the secp256k1 curve.  It adjusts Y
1202  // based on the desired oddness and returns whether or not it was successful
1203  // since not all X coordinates are valid.
1204  //
1205  // The magnitude of the provided X coordinate field val must be a max of 8 for a
1206  // correct result.  The resulting Y field val will have a max magnitude of 2.
1207  func DecompressY(x *FieldVal, odd bool, resultY *FieldVal) bool {
1208  	// The curve equation for secp256k1 is: y^2 = x^3 + 7.  Thus
1209  	// y = +-sqrt(x^3 + 7).
1210  	//
1211  	// The x coordinate must be invalid if there is no square root for the
1212  	// calculated rhs because it means the X coordinate is not for a point on
1213  	// the curve.
1214  	x3PlusB := (&FieldVal{}).SquareVal(x).Mul(x).AddInt(7)
1215  	if hasSqrt := resultY.SquareRootVal(x3PlusB); !hasSqrt {
1216  		return false
1217  	}
1218  	if resultY.Normalize().IsOdd() != odd {
1219  		resultY.Negate(1)
1220  	}
1221  	return true
1222  }
1223