field.go raw

   1  // Copyright (c) 2013-2016 The btcsuite developers
   2  // Copyright (c) 2013-2016 Dave Collins
   3  // Use of this source code is governed by an ISC
   4  // license that can be found in the LICENSE file.
   5  
   6  package ecc
   7  
   8  // References:
   9  //   [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
  10  //     http://cacr.uwaterloo.ca/hac/
  11  
  12  // All elliptic curve operations for secp256k1 are done in a finite field
  13  // characterized by a 256-bit prime.  Given this precision is larger than the
  14  // biggest available native type, obviously some form of bignum math is needed.
  15  // This package implements specialized fixed-precision field arithmetic rather
  16  // than relying on an arbitrary-precision arithmetic package such as math/big
  17  // for dealing with the field math since the size is known.  As a result, rather
  18  // large performance gains are achieved by taking advantage of many
  19  // optimizations not available to arbitrary-precision arithmetic and generic
  20  // modular arithmetic algorithms.
  21  //
  22  // There are various ways to internally represent each finite field element.
  23  // For example, the most obvious representation would be to use an array of 4
  24  // uint64s (64 bits * 4 = 256 bits).  However, that representation suffers from
  25  // a couple of issues.  First, there is no native Go type large enough to handle
  26  // the intermediate results while adding or multiplying two 64-bit numbers, and
  27  // second there is no space left for overflows when performing the intermediate
  28  // arithmetic between each array element which would lead to expensive carry
  29  // propagation.
  30  //
  31  // Given the above, this implementation represents the the field elements as
  32  // 10 uint32s with each word (array entry) treated as base 2^26.  This was
  33  // chosen for the following reasons:
  34  // 1) Most systems at the current time are 64-bit (or at least have 64-bit
  35  //    registers available for specialized purposes such as MMX) so the
  36  //    intermediate results can typically be done using a native register (and
  37  //    using uint64s to avoid the need for additional half-word arithmetic)
  38  // 2) In order to allow addition of the internal words without having to
  39  //    propagate the the carry, the max normalized value for each register must
  40  //    be less than the number of bits available in the register
  41  // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
  42  //    reasonable choice for #2
  43  // 4) Given the need for 256-bits of precision and the properties stated in #1,
  44  //    #2, and #3, the representation which best accommodates this is 10 uint32s
  45  //    with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
  46  //    bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
  47  //    overflow
  48  //
  49  // Since it is so important that the field arithmetic is extremely fast for
  50  // high performance crypto, this package does not perform any validation where
  51  // it ordinarily would.  For example, some functions only give the correct
  52  // result is the field is normalized and there is no checking to ensure it is.
  53  // While I typically prefer to ensure all state and input is valid for most
  54  // packages, this code is really only used internally and every extra check
  55  // counts.
  56  
  57  import (
  58  	"encoding/hex"
  59  )
  60  
  61  // Constants used to make the code more readable.
  62  const (
  63  	twoBitsMask   = 0x3
  64  	fourBitsMask  = 0xf
  65  	sixBitsMask   = 0x3f
  66  	eightBitsMask = 0xff
  67  )
  68  
  69  // Constants related to the field representation.
  70  const (
  71  	// fieldWords is the number of words used to internally represent the
  72  	// 256-bit value.
  73  	fieldWords = 10
  74  
  75  	// fieldBase is the exponent used to form the numeric base of each word.
  76  	// 2^(fieldBase*i) where i is the word position.
  77  	fieldBase = 26
  78  
  79  	// fieldOverflowBits is the minimum number of "overflow" bits for each
  80  	// word in the field value.
  81  	fieldOverflowBits = 32 - fieldBase
  82  
  83  	// fieldBaseMask is the mask for the bits in each word needed to
  84  	// represent the numeric base of each word (except the most significant
  85  	// word).
  86  	fieldBaseMask = (1 << fieldBase) - 1
  87  
  88  	// fieldMSBBits is the number of bits in the most significant word used
  89  	// to represent the value.
  90  	fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
  91  
  92  	// fieldMSBMask is the mask for the bits in the most significant word
  93  	// needed to represent the value.
  94  	fieldMSBMask = (1 << fieldMSBBits) - 1
  95  
  96  	// fieldPrimeWordZero is word zero of the secp256k1 prime in the
  97  	// internal field representation.  It is used during negation.
  98  	fieldPrimeWordZero = 0x3fffc2f
  99  
 100  	// fieldPrimeWordOne is word one of the secp256k1 prime in the
 101  	// internal field representation.  It is used during negation.
 102  	fieldPrimeWordOne = 0x3ffffbf
 103  )
 104  
 105  var (
 106  	// fieldQBytes is the value Q = (P+1)/4 for the secp256k1 prime P. This
 107  	// value is used to efficiently compute the square root of values in the
 108  	// field via exponentiation. The value of Q in hex is:
 109  	//
 110  	//   Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c
 111  	fieldQBytes = []byte{
 112  		0x3f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 113  		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 114  		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 115  		0xff, 0xff, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x0c,
 116  	}
 117  )
 118  
 119  // fieldVal implements optimized fixed-precision arithmetic over the
 120  // secp256k1 finite field.  This means all arithmetic is performed modulo
 121  // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.  It
 122  // represents each 256-bit value as 10 32-bit integers in base 2^26.  This
 123  // provides 6 bits of overflow in each word (10 bits in the most significant
 124  // word) for a total of 64 bits of overflow (9*6 + 10 = 64).  It only implements
 125  // the arithmetic needed for elliptic curve operations.
 126  //
 127  // The following depicts the internal representation:
 128  // 	 -----------------------------------------------------------------
 129  // 	|        n[9]       |        n[8]       | ... |        n[0]       |
 130  // 	| 32 bits available | 32 bits available | ... | 32 bits available |
 131  // 	| 22 bits for value | 26 bits for value | ... | 26 bits for value |
 132  // 	| 10 bits overflow  |  6 bits overflow  | ... |  6 bits overflow  |
 133  // 	| Mult: 2^(26*9)    | Mult: 2^(26*8)    | ... | Mult: 2^(26*0)    |
 134  // 	 -----------------------------------------------------------------
 135  //
 136  // For example, consider the number 2^49 + 1.  It would be represented as:
 137  // 	n[0] = 1
 138  // 	n[1] = 2^23
 139  // 	n[2..9] = 0
 140  //
 141  // The full 256-bit value is then calculated by looping i from 9..0 and
 142  // doing sum(n[i] * 2^(26i)) like so:
 143  // 	n[9] * 2^(26*9) = 0    * 2^234 = 0
 144  // 	n[8] * 2^(26*8) = 0    * 2^208 = 0
 145  // 	...
 146  // 	n[1] * 2^(26*1) = 2^23 * 2^26  = 2^49
 147  // 	n[0] * 2^(26*0) = 1    * 2^0   = 1
 148  // 	Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
 149  type fieldVal struct {
 150  	n [10]uint32
 151  }
 152  
 153  // String returns the field value as a human-readable hex string.
 154  func (f fieldVal) String() string {
 155  	t := new(fieldVal).Set(&f).Normalize()
 156  	return hex.EncodeToString(t.Bytes()[:])
 157  }
 158  
 159  // Zero sets the field value to zero.  A newly created field value is already
 160  // set to zero.  This function can be useful to clear an existing field value
 161  // for reuse.
 162  func (f *fieldVal) Zero() {
 163  	f.n[0] = 0
 164  	f.n[1] = 0
 165  	f.n[2] = 0
 166  	f.n[3] = 0
 167  	f.n[4] = 0
 168  	f.n[5] = 0
 169  	f.n[6] = 0
 170  	f.n[7] = 0
 171  	f.n[8] = 0
 172  	f.n[9] = 0
 173  }
 174  
 175  // Set sets the field value equal to the passed value.
 176  //
 177  // The field value is returned to support chaining.  This enables syntax like:
 178  // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
 179  // modified.
 180  func (f *fieldVal) Set(val *fieldVal) *fieldVal {
 181  	*f = *val
 182  	return f
 183  }
 184  
 185  // SetInt sets the field value to the passed integer.  This is a convenience
 186  // function since it is fairly common to perform some arithemetic with small
 187  // native integers.
 188  //
 189  // The field value is returned to support chaining.  This enables syntax such
 190  // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
 191  func (f *fieldVal) SetInt(ui uint) *fieldVal {
 192  	f.Zero()
 193  	f.n[0] = uint32(ui)
 194  	return f
 195  }
 196  
 197  // SetBytes packs the passed 32-byte big-endian value into the internal field
 198  // value representation.
 199  //
 200  // The field value is returned to support chaining.  This enables syntax like:
 201  // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2.
 202  func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal {
 203  	// Pack the 256 total bits across the 10 uint32 words with a max of
 204  	// 26-bits per word.  This could be done with a couple of for loops,
 205  	// but this unrolled version is significantly faster.  Benchmarks show
 206  	// this is about 34 times faster than the variant which uses loops.
 207  	f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
 208  		(uint32(b[28])&twoBitsMask)<<24
 209  	f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
 210  		(uint32(b[25])&fourBitsMask)<<22
 211  	f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
 212  		(uint32(b[22])&sixBitsMask)<<20
 213  	f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
 214  		uint32(b[19])<<18
 215  	f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
 216  		(uint32(b[15])&twoBitsMask)<<24
 217  	f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
 218  		(uint32(b[12])&fourBitsMask)<<22
 219  	f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
 220  		(uint32(b[9])&sixBitsMask)<<20
 221  	f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
 222  		uint32(b[6])<<18
 223  	f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
 224  		(uint32(b[2])&twoBitsMask)<<24
 225  	f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
 226  	return f
 227  }
 228  
 229  // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
 230  // integer (meaning it is truncated to the first 32 bytes), packs it into the
 231  // internal field value representation, and returns the updated field value.
 232  //
 233  // Note that since passing a slice with more than 32 bytes is truncated, it is
 234  // possible that the truncated value is less than the field prime.  It is up to
 235  // the caller to decide whether it needs to provide numbers of the appropriate
 236  // size or if it is acceptable to use this function with the described
 237  // truncation behavior.
 238  //
 239  // The field value is returned to support chaining.  This enables syntax like:
 240  // f := new(fieldVal).SetByteSlice(byteSlice)
 241  func (f *fieldVal) SetByteSlice(b []byte) *fieldVal {
 242  	var b32 [32]byte
 243  	if len(b) > 32 {
 244  		b = b[:32]
 245  	}
 246  	copy(b32[32-len(b):], b)
 247  	return f.SetBytes(&b32)
 248  }
 249  
 250  // SetHex decodes the passed big-endian hex string into the internal field value
 251  // representation.  Only the first 32-bytes are used.
 252  //
 253  // The field value is returned to support chaining.  This enables syntax like:
 254  // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1
 255  func (f *fieldVal) SetHex(hexString string) *fieldVal {
 256  	if len(hexString)%2 != 0 {
 257  		hexString = "0" + hexString
 258  	}
 259  	bytes, _ := hex.DecodeString(hexString)
 260  	return f.SetByteSlice(bytes)
 261  }
 262  
 263  // Normalize normalizes the internal field words into the desired range and
 264  // performs fast modular reduction over the secp256k1 prime by making use of the
 265  // special form of the prime.
 266  func (f *fieldVal) Normalize() *fieldVal {
 267  	// The field representation leaves 6 bits of overflow in each word so
 268  	// intermediate calculations can be performed without needing to
 269  	// propagate the carry to each higher word during the calculations.  In
 270  	// order to normalize, we need to "compact" the full 256-bit value to
 271  	// the right while propagating any carries through to the high order
 272  	// word.
 273  	//
 274  	// Since this field is doing arithmetic modulo the secp256k1 prime, we
 275  	// also need to perform modular reduction over the prime.
 276  	//
 277  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
 278  	// when the modulus is of the special form m = b^t - c, highly efficient
 279  	// reduction can be achieved.
 280  	//
 281  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
 282  	// this criteria.
 283  	//
 284  	// 4294968273 in field representation (base 2^26) is:
 285  	// n[0] = 977
 286  	// n[1] = 64
 287  	// That is to say (2^26 * 64) + 977 = 4294968273
 288  	//
 289  	// The algorithm presented in the referenced section typically repeats
 290  	// until the quotient is zero.  However, due to our field representation
 291  	// we already know to within one reduction how many times we would need
 292  	// to repeat as it's the uppermost bits of the high order word.  Thus we
 293  	// can simply multiply the magnitude by the field representation of the
 294  	// prime and do a single iteration.  After this step there might be an
 295  	// additional carry to bit 256 (bit 22 of the high order word).
 296  	t9 := f.n[9]
 297  	m := t9 >> fieldMSBBits
 298  	t9 = t9 & fieldMSBMask
 299  	t0 := f.n[0] + m*977
 300  	t1 := (t0 >> fieldBase) + f.n[1] + (m << 6)
 301  	t0 = t0 & fieldBaseMask
 302  	t2 := (t1 >> fieldBase) + f.n[2]
 303  	t1 = t1 & fieldBaseMask
 304  	t3 := (t2 >> fieldBase) + f.n[3]
 305  	t2 = t2 & fieldBaseMask
 306  	t4 := (t3 >> fieldBase) + f.n[4]
 307  	t3 = t3 & fieldBaseMask
 308  	t5 := (t4 >> fieldBase) + f.n[5]
 309  	t4 = t4 & fieldBaseMask
 310  	t6 := (t5 >> fieldBase) + f.n[6]
 311  	t5 = t5 & fieldBaseMask
 312  	t7 := (t6 >> fieldBase) + f.n[7]
 313  	t6 = t6 & fieldBaseMask
 314  	t8 := (t7 >> fieldBase) + f.n[8]
 315  	t7 = t7 & fieldBaseMask
 316  	t9 = (t8 >> fieldBase) + t9
 317  	t8 = t8 & fieldBaseMask
 318  
 319  	// At this point, the magnitude is guaranteed to be one, however, the
 320  	// value could still be greater than the prime if there was either a
 321  	// carry through to bit 256 (bit 22 of the higher order word) or the
 322  	// value is greater than or equal to the field characteristic.  The
 323  	// following determines if either or these conditions are true and does
 324  	// the final reduction in constant time.
 325  	//
 326  	// Note that the if/else statements here intentionally do the bitwise
 327  	// operators even when it won't change the value to ensure constant time
 328  	// between the branches.  Also note that 'm' will be zero when neither
 329  	// of the aforementioned conditions are true and the value will not be
 330  	// changed when 'm' is zero.
 331  	m = 1
 332  	if t9 == fieldMSBMask {
 333  		m &= 1
 334  	} else {
 335  		m &= 0
 336  	}
 337  	if t2&t3&t4&t5&t6&t7&t8 == fieldBaseMask {
 338  		m &= 1
 339  	} else {
 340  		m &= 0
 341  	}
 342  	if ((t0+977)>>fieldBase + t1 + 64) > fieldBaseMask {
 343  		m &= 1
 344  	} else {
 345  		m &= 0
 346  	}
 347  	if t9>>fieldMSBBits != 0 {
 348  		m |= 1
 349  	} else {
 350  		m |= 0
 351  	}
 352  	t0 = t0 + m*977
 353  	t1 = (t0 >> fieldBase) + t1 + (m << 6)
 354  	t0 = t0 & fieldBaseMask
 355  	t2 = (t1 >> fieldBase) + t2
 356  	t1 = t1 & fieldBaseMask
 357  	t3 = (t2 >> fieldBase) + t3
 358  	t2 = t2 & fieldBaseMask
 359  	t4 = (t3 >> fieldBase) + t4
 360  	t3 = t3 & fieldBaseMask
 361  	t5 = (t4 >> fieldBase) + t5
 362  	t4 = t4 & fieldBaseMask
 363  	t6 = (t5 >> fieldBase) + t6
 364  	t5 = t5 & fieldBaseMask
 365  	t7 = (t6 >> fieldBase) + t7
 366  	t6 = t6 & fieldBaseMask
 367  	t8 = (t7 >> fieldBase) + t8
 368  	t7 = t7 & fieldBaseMask
 369  	t9 = (t8 >> fieldBase) + t9
 370  	t8 = t8 & fieldBaseMask
 371  	t9 = t9 & fieldMSBMask // Remove potential multiple of 2^256.
 372  
 373  	// Finally, set the normalized and reduced words.
 374  	f.n[0] = t0
 375  	f.n[1] = t1
 376  	f.n[2] = t2
 377  	f.n[3] = t3
 378  	f.n[4] = t4
 379  	f.n[5] = t5
 380  	f.n[6] = t6
 381  	f.n[7] = t7
 382  	f.n[8] = t8
 383  	f.n[9] = t9
 384  	return f
 385  }
 386  
 387  // PutBytes unpacks the field value to a 32-byte big-endian value using the
 388  // passed byte array.  There is a similar function, Bytes, which unpacks the
 389  // field value into a new array and returns that.  This version is provided
 390  // since it can be useful to cut down on the number of allocations by allowing
 391  // the caller to reuse a buffer.
 392  //
 393  // The field value must be normalized for this function to return the correct
 394  // result.
 395  func (f *fieldVal) PutBytes(b *[32]byte) {
 396  	// Unpack the 256 total bits from the 10 uint32 words with a max of
 397  	// 26-bits per word.  This could be done with a couple of for loops,
 398  	// but this unrolled version is a bit faster.  Benchmarks show this is
 399  	// about 10 times faster than the variant which uses loops.
 400  	b[31] = byte(f.n[0] & eightBitsMask)
 401  	b[30] = byte((f.n[0] >> 8) & eightBitsMask)
 402  	b[29] = byte((f.n[0] >> 16) & eightBitsMask)
 403  	b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
 404  	b[27] = byte((f.n[1] >> 6) & eightBitsMask)
 405  	b[26] = byte((f.n[1] >> 14) & eightBitsMask)
 406  	b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
 407  	b[24] = byte((f.n[2] >> 4) & eightBitsMask)
 408  	b[23] = byte((f.n[2] >> 12) & eightBitsMask)
 409  	b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
 410  	b[21] = byte((f.n[3] >> 2) & eightBitsMask)
 411  	b[20] = byte((f.n[3] >> 10) & eightBitsMask)
 412  	b[19] = byte((f.n[3] >> 18) & eightBitsMask)
 413  	b[18] = byte(f.n[4] & eightBitsMask)
 414  	b[17] = byte((f.n[4] >> 8) & eightBitsMask)
 415  	b[16] = byte((f.n[4] >> 16) & eightBitsMask)
 416  	b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
 417  	b[14] = byte((f.n[5] >> 6) & eightBitsMask)
 418  	b[13] = byte((f.n[5] >> 14) & eightBitsMask)
 419  	b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
 420  	b[11] = byte((f.n[6] >> 4) & eightBitsMask)
 421  	b[10] = byte((f.n[6] >> 12) & eightBitsMask)
 422  	b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
 423  	b[8] = byte((f.n[7] >> 2) & eightBitsMask)
 424  	b[7] = byte((f.n[7] >> 10) & eightBitsMask)
 425  	b[6] = byte((f.n[7] >> 18) & eightBitsMask)
 426  	b[5] = byte(f.n[8] & eightBitsMask)
 427  	b[4] = byte((f.n[8] >> 8) & eightBitsMask)
 428  	b[3] = byte((f.n[8] >> 16) & eightBitsMask)
 429  	b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
 430  	b[1] = byte((f.n[9] >> 6) & eightBitsMask)
 431  	b[0] = byte((f.n[9] >> 14) & eightBitsMask)
 432  }
 433  
 434  // Bytes unpacks the field value to a 32-byte big-endian value.  See PutBytes
 435  // for a variant that allows the a buffer to be passed which can be useful to
 436  // to cut down on the number of allocations by allowing the caller to reuse a
 437  // buffer.
 438  //
 439  // The field value must be normalized for this function to return correct
 440  // result.
 441  func (f *fieldVal) Bytes() *[32]byte {
 442  	b := new([32]byte)
 443  	f.PutBytes(b)
 444  	return b
 445  }
 446  
 447  // IsZero returns whether or not the field value is equal to zero.
 448  func (f *fieldVal) IsZero() bool {
 449  	// The value can only be zero if no bits are set in any of the words.
 450  	// This is a constant time implementation.
 451  	bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
 452  		f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
 453  
 454  	return bits == 0
 455  }
 456  
 457  // IsOdd returns whether or not the field value is an odd number.
 458  //
 459  // The field value must be normalized for this function to return correct
 460  // result.
 461  func (f *fieldVal) IsOdd() bool {
 462  	// Only odd numbers have the bottom bit set.
 463  	return f.n[0]&1 == 1
 464  }
 465  
 466  // Equals returns whether or not the two field values are the same.  Both
 467  // field values being compared must be normalized for this function to return
 468  // the correct result.
 469  func (f *fieldVal) Equals(val *fieldVal) bool {
 470  	// Xor only sets bits when they are different, so the two field values
 471  	// can only be the same if no bits are set after xoring each word.
 472  	// This is a constant time implementation.
 473  	bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
 474  		(f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
 475  		(f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
 476  		(f.n[9] ^ val.n[9])
 477  
 478  	return bits == 0
 479  }
 480  
 481  // NegateVal negates the passed value and stores the result in f.  The caller
 482  // must provide the magnitude of the passed value for a correct result.
 483  //
 484  // The field value is returned to support chaining.  This enables syntax like:
 485  // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
 486  func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal {
 487  	// Negation in the field is just the prime minus the value.  However,
 488  	// in order to allow negation against a field value without having to
 489  	// normalize/reduce it first, multiply by the magnitude (that is how
 490  	// "far" away it is from the normalized value) to adjust.  Also, since
 491  	// negating a value pushes it one more order of magnitude away from the
 492  	// normalized range, add 1 to compensate.
 493  	//
 494  	// For some intuition here, imagine you're performing mod 12 arithmetic
 495  	// (picture a clock) and you are negating the number 7.  So you start at
 496  	// 12 (which is of course 0 under mod 12) and count backwards (left on
 497  	// the clock) 7 times to arrive at 5.  Notice this is just 12-7 = 5.
 498  	// Now, assume you're starting with 19, which is a number that is
 499  	// already larger than the modulus and congruent to 7 (mod 12).  When a
 500  	// value is already in the desired range, its magnitude is 1.  Since 19
 501  	// is an additional "step", its magnitude (mod 12) is 2.  Since any
 502  	// multiple of the modulus is conguent to zero (mod m), the answer can
 503  	// be shortcut by simply mulplying the magnitude by the modulus and
 504  	// subtracting.  Keeping with the example, this would be (2*12)-19 = 5.
 505  	f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
 506  	f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
 507  	f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
 508  	f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
 509  	f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
 510  	f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
 511  	f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
 512  	f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
 513  	f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
 514  	f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
 515  
 516  	return f
 517  }
 518  
 519  // Negate negates the field value.  The existing field value is modified.  The
 520  // caller must provide the magnitude of the field value for a correct result.
 521  //
 522  // The field value is returned to support chaining.  This enables syntax like:
 523  // f.Negate().AddInt(1) so that f = -f + 1.
 524  func (f *fieldVal) Negate(magnitude uint32) *fieldVal {
 525  	return f.NegateVal(f, magnitude)
 526  }
 527  
 528  // AddInt adds the passed integer to the existing field value and stores the
 529  // result in f.  This is a convenience function since it is fairly common to
 530  // perform some arithemetic with small native integers.
 531  //
 532  // The field value is returned to support chaining.  This enables syntax like:
 533  // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
 534  func (f *fieldVal) AddInt(ui uint) *fieldVal {
 535  	// Since the field representation intentionally provides overflow bits,
 536  	// it's ok to use carryless addition as the carry bit is safely part of
 537  	// the word and will be normalized out.
 538  	f.n[0] += uint32(ui)
 539  
 540  	return f
 541  }
 542  
 543  // Add adds the passed value to the existing field value and stores the result
 544  // in f.
 545  //
 546  // The field value is returned to support chaining.  This enables syntax like:
 547  // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
 548  func (f *fieldVal) Add(val *fieldVal) *fieldVal {
 549  	// Since the field representation intentionally provides overflow bits,
 550  	// it's ok to use carryless addition as the carry bit is safely part of
 551  	// each word and will be normalized out.  This could obviously be done
 552  	// in a loop, but the unrolled version is faster.
 553  	f.n[0] += val.n[0]
 554  	f.n[1] += val.n[1]
 555  	f.n[2] += val.n[2]
 556  	f.n[3] += val.n[3]
 557  	f.n[4] += val.n[4]
 558  	f.n[5] += val.n[5]
 559  	f.n[6] += val.n[6]
 560  	f.n[7] += val.n[7]
 561  	f.n[8] += val.n[8]
 562  	f.n[9] += val.n[9]
 563  
 564  	return f
 565  }
 566  
 567  // Add2 adds the passed two field values together and stores the result in f.
 568  //
 569  // The field value is returned to support chaining.  This enables syntax like:
 570  // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
 571  func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal {
 572  	// Since the field representation intentionally provides overflow bits,
 573  	// it's ok to use carryless addition as the carry bit is safely part of
 574  	// each word and will be normalized out.  This could obviously be done
 575  	// in a loop, but the unrolled version is faster.
 576  	f.n[0] = val.n[0] + val2.n[0]
 577  	f.n[1] = val.n[1] + val2.n[1]
 578  	f.n[2] = val.n[2] + val2.n[2]
 579  	f.n[3] = val.n[3] + val2.n[3]
 580  	f.n[4] = val.n[4] + val2.n[4]
 581  	f.n[5] = val.n[5] + val2.n[5]
 582  	f.n[6] = val.n[6] + val2.n[6]
 583  	f.n[7] = val.n[7] + val2.n[7]
 584  	f.n[8] = val.n[8] + val2.n[8]
 585  	f.n[9] = val.n[9] + val2.n[9]
 586  
 587  	return f
 588  }
 589  
 590  // MulInt multiplies the field value by the passed int and stores the result in
 591  // f.  Note that this function can overflow if multiplying the value by any of
 592  // the individual words exceeds a max uint32.  Therefore it is important that
 593  // the caller ensures no overflows will occur before using this function.
 594  //
 595  // The field value is returned to support chaining.  This enables syntax like:
 596  // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
 597  func (f *fieldVal) MulInt(val uint) *fieldVal {
 598  	// Since each word of the field representation can hold up to
 599  	// fieldOverflowBits extra bits which will be normalized out, it's safe
 600  	// to multiply each word without using a larger type or carry
 601  	// propagation so long as the values won't overflow a uint32.  This
 602  	// could obviously be done in a loop, but the unrolled version is
 603  	// faster.
 604  	ui := uint32(val)
 605  	f.n[0] *= ui
 606  	f.n[1] *= ui
 607  	f.n[2] *= ui
 608  	f.n[3] *= ui
 609  	f.n[4] *= ui
 610  	f.n[5] *= ui
 611  	f.n[6] *= ui
 612  	f.n[7] *= ui
 613  	f.n[8] *= ui
 614  	f.n[9] *= ui
 615  
 616  	return f
 617  }
 618  
 619  // Mul multiplies the passed value to the existing field value and stores the
 620  // result in f.  Note that this function can overflow if multiplying any
 621  // of the individual words exceeds a max uint32.  In practice, this means the
 622  // magnitude of either value involved in the multiplication must be a max of
 623  // 8.
 624  //
 625  // The field value is returned to support chaining.  This enables syntax like:
 626  // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
 627  func (f *fieldVal) Mul(val *fieldVal) *fieldVal {
 628  	return f.Mul2(f, val)
 629  }
 630  
 631  // Mul2 multiplies the passed two field values together and stores the result
 632  // result in f.  Note that this function can overflow if multiplying any of
 633  // the individual words exceeds a max uint32.  In practice, this means the
 634  // magnitude of either value involved in the multiplication must be a max of
 635  // 8.
 636  //
 637  // The field value is returned to support chaining.  This enables syntax like:
 638  // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
 639  func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal {
 640  	// This could be done with a couple of for loops and an array to store
 641  	// the intermediate terms, but this unrolled version is significantly
 642  	// faster.
 643  
 644  	// Terms for 2^(fieldBase*0).
 645  	m := uint64(val.n[0]) * uint64(val2.n[0])
 646  	t0 := m & fieldBaseMask
 647  
 648  	// Terms for 2^(fieldBase*1).
 649  	m = (m >> fieldBase) +
 650  		uint64(val.n[0])*uint64(val2.n[1]) +
 651  		uint64(val.n[1])*uint64(val2.n[0])
 652  	t1 := m & fieldBaseMask
 653  
 654  	// Terms for 2^(fieldBase*2).
 655  	m = (m >> fieldBase) +
 656  		uint64(val.n[0])*uint64(val2.n[2]) +
 657  		uint64(val.n[1])*uint64(val2.n[1]) +
 658  		uint64(val.n[2])*uint64(val2.n[0])
 659  	t2 := m & fieldBaseMask
 660  
 661  	// Terms for 2^(fieldBase*3).
 662  	m = (m >> fieldBase) +
 663  		uint64(val.n[0])*uint64(val2.n[3]) +
 664  		uint64(val.n[1])*uint64(val2.n[2]) +
 665  		uint64(val.n[2])*uint64(val2.n[1]) +
 666  		uint64(val.n[3])*uint64(val2.n[0])
 667  	t3 := m & fieldBaseMask
 668  
 669  	// Terms for 2^(fieldBase*4).
 670  	m = (m >> fieldBase) +
 671  		uint64(val.n[0])*uint64(val2.n[4]) +
 672  		uint64(val.n[1])*uint64(val2.n[3]) +
 673  		uint64(val.n[2])*uint64(val2.n[2]) +
 674  		uint64(val.n[3])*uint64(val2.n[1]) +
 675  		uint64(val.n[4])*uint64(val2.n[0])
 676  	t4 := m & fieldBaseMask
 677  
 678  	// Terms for 2^(fieldBase*5).
 679  	m = (m >> fieldBase) +
 680  		uint64(val.n[0])*uint64(val2.n[5]) +
 681  		uint64(val.n[1])*uint64(val2.n[4]) +
 682  		uint64(val.n[2])*uint64(val2.n[3]) +
 683  		uint64(val.n[3])*uint64(val2.n[2]) +
 684  		uint64(val.n[4])*uint64(val2.n[1]) +
 685  		uint64(val.n[5])*uint64(val2.n[0])
 686  	t5 := m & fieldBaseMask
 687  
 688  	// Terms for 2^(fieldBase*6).
 689  	m = (m >> fieldBase) +
 690  		uint64(val.n[0])*uint64(val2.n[6]) +
 691  		uint64(val.n[1])*uint64(val2.n[5]) +
 692  		uint64(val.n[2])*uint64(val2.n[4]) +
 693  		uint64(val.n[3])*uint64(val2.n[3]) +
 694  		uint64(val.n[4])*uint64(val2.n[2]) +
 695  		uint64(val.n[5])*uint64(val2.n[1]) +
 696  		uint64(val.n[6])*uint64(val2.n[0])
 697  	t6 := m & fieldBaseMask
 698  
 699  	// Terms for 2^(fieldBase*7).
 700  	m = (m >> fieldBase) +
 701  		uint64(val.n[0])*uint64(val2.n[7]) +
 702  		uint64(val.n[1])*uint64(val2.n[6]) +
 703  		uint64(val.n[2])*uint64(val2.n[5]) +
 704  		uint64(val.n[3])*uint64(val2.n[4]) +
 705  		uint64(val.n[4])*uint64(val2.n[3]) +
 706  		uint64(val.n[5])*uint64(val2.n[2]) +
 707  		uint64(val.n[6])*uint64(val2.n[1]) +
 708  		uint64(val.n[7])*uint64(val2.n[0])
 709  	t7 := m & fieldBaseMask
 710  
 711  	// Terms for 2^(fieldBase*8).
 712  	m = (m >> fieldBase) +
 713  		uint64(val.n[0])*uint64(val2.n[8]) +
 714  		uint64(val.n[1])*uint64(val2.n[7]) +
 715  		uint64(val.n[2])*uint64(val2.n[6]) +
 716  		uint64(val.n[3])*uint64(val2.n[5]) +
 717  		uint64(val.n[4])*uint64(val2.n[4]) +
 718  		uint64(val.n[5])*uint64(val2.n[3]) +
 719  		uint64(val.n[6])*uint64(val2.n[2]) +
 720  		uint64(val.n[7])*uint64(val2.n[1]) +
 721  		uint64(val.n[8])*uint64(val2.n[0])
 722  	t8 := m & fieldBaseMask
 723  
 724  	// Terms for 2^(fieldBase*9).
 725  	m = (m >> fieldBase) +
 726  		uint64(val.n[0])*uint64(val2.n[9]) +
 727  		uint64(val.n[1])*uint64(val2.n[8]) +
 728  		uint64(val.n[2])*uint64(val2.n[7]) +
 729  		uint64(val.n[3])*uint64(val2.n[6]) +
 730  		uint64(val.n[4])*uint64(val2.n[5]) +
 731  		uint64(val.n[5])*uint64(val2.n[4]) +
 732  		uint64(val.n[6])*uint64(val2.n[3]) +
 733  		uint64(val.n[7])*uint64(val2.n[2]) +
 734  		uint64(val.n[8])*uint64(val2.n[1]) +
 735  		uint64(val.n[9])*uint64(val2.n[0])
 736  	t9 := m & fieldBaseMask
 737  
 738  	// Terms for 2^(fieldBase*10).
 739  	m = (m >> fieldBase) +
 740  		uint64(val.n[1])*uint64(val2.n[9]) +
 741  		uint64(val.n[2])*uint64(val2.n[8]) +
 742  		uint64(val.n[3])*uint64(val2.n[7]) +
 743  		uint64(val.n[4])*uint64(val2.n[6]) +
 744  		uint64(val.n[5])*uint64(val2.n[5]) +
 745  		uint64(val.n[6])*uint64(val2.n[4]) +
 746  		uint64(val.n[7])*uint64(val2.n[3]) +
 747  		uint64(val.n[8])*uint64(val2.n[2]) +
 748  		uint64(val.n[9])*uint64(val2.n[1])
 749  	t10 := m & fieldBaseMask
 750  
 751  	// Terms for 2^(fieldBase*11).
 752  	m = (m >> fieldBase) +
 753  		uint64(val.n[2])*uint64(val2.n[9]) +
 754  		uint64(val.n[3])*uint64(val2.n[8]) +
 755  		uint64(val.n[4])*uint64(val2.n[7]) +
 756  		uint64(val.n[5])*uint64(val2.n[6]) +
 757  		uint64(val.n[6])*uint64(val2.n[5]) +
 758  		uint64(val.n[7])*uint64(val2.n[4]) +
 759  		uint64(val.n[8])*uint64(val2.n[3]) +
 760  		uint64(val.n[9])*uint64(val2.n[2])
 761  	t11 := m & fieldBaseMask
 762  
 763  	// Terms for 2^(fieldBase*12).
 764  	m = (m >> fieldBase) +
 765  		uint64(val.n[3])*uint64(val2.n[9]) +
 766  		uint64(val.n[4])*uint64(val2.n[8]) +
 767  		uint64(val.n[5])*uint64(val2.n[7]) +
 768  		uint64(val.n[6])*uint64(val2.n[6]) +
 769  		uint64(val.n[7])*uint64(val2.n[5]) +
 770  		uint64(val.n[8])*uint64(val2.n[4]) +
 771  		uint64(val.n[9])*uint64(val2.n[3])
 772  	t12 := m & fieldBaseMask
 773  
 774  	// Terms for 2^(fieldBase*13).
 775  	m = (m >> fieldBase) +
 776  		uint64(val.n[4])*uint64(val2.n[9]) +
 777  		uint64(val.n[5])*uint64(val2.n[8]) +
 778  		uint64(val.n[6])*uint64(val2.n[7]) +
 779  		uint64(val.n[7])*uint64(val2.n[6]) +
 780  		uint64(val.n[8])*uint64(val2.n[5]) +
 781  		uint64(val.n[9])*uint64(val2.n[4])
 782  	t13 := m & fieldBaseMask
 783  
 784  	// Terms for 2^(fieldBase*14).
 785  	m = (m >> fieldBase) +
 786  		uint64(val.n[5])*uint64(val2.n[9]) +
 787  		uint64(val.n[6])*uint64(val2.n[8]) +
 788  		uint64(val.n[7])*uint64(val2.n[7]) +
 789  		uint64(val.n[8])*uint64(val2.n[6]) +
 790  		uint64(val.n[9])*uint64(val2.n[5])
 791  	t14 := m & fieldBaseMask
 792  
 793  	// Terms for 2^(fieldBase*15).
 794  	m = (m >> fieldBase) +
 795  		uint64(val.n[6])*uint64(val2.n[9]) +
 796  		uint64(val.n[7])*uint64(val2.n[8]) +
 797  		uint64(val.n[8])*uint64(val2.n[7]) +
 798  		uint64(val.n[9])*uint64(val2.n[6])
 799  	t15 := m & fieldBaseMask
 800  
 801  	// Terms for 2^(fieldBase*16).
 802  	m = (m >> fieldBase) +
 803  		uint64(val.n[7])*uint64(val2.n[9]) +
 804  		uint64(val.n[8])*uint64(val2.n[8]) +
 805  		uint64(val.n[9])*uint64(val2.n[7])
 806  	t16 := m & fieldBaseMask
 807  
 808  	// Terms for 2^(fieldBase*17).
 809  	m = (m >> fieldBase) +
 810  		uint64(val.n[8])*uint64(val2.n[9]) +
 811  		uint64(val.n[9])*uint64(val2.n[8])
 812  	t17 := m & fieldBaseMask
 813  
 814  	// Terms for 2^(fieldBase*18).
 815  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
 816  	t18 := m & fieldBaseMask
 817  
 818  	// What's left is for 2^(fieldBase*19).
 819  	t19 := m >> fieldBase
 820  
 821  	// At this point, all of the terms are grouped into their respective
 822  	// base.
 823  	//
 824  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
 825  	// when the modulus is of the special form m = b^t - c, highly efficient
 826  	// reduction can be achieved per the provided algorithm.
 827  	//
 828  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
 829  	// this criteria.
 830  	//
 831  	// 4294968273 in field representation (base 2^26) is:
 832  	// n[0] = 977
 833  	// n[1] = 64
 834  	// That is to say (2^26 * 64) + 977 = 4294968273
 835  	//
 836  	// Since each word is in base 26, the upper terms (t10 and up) start
 837  	// at 260 bits (versus the final desired range of 256 bits), so the
 838  	// field representation of 'c' from above needs to be adjusted for the
 839  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
 840  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
 841  	// n[0] = 977 * 16 = 15632
 842  	// n[1] = 64 * 16 = 1024
 843  	// That is to say (2^26 * 1024) + 15632 = 68719492368
 844  	//
 845  	// To reduce the final term, t19, the entire 'c' value is needed instead
 846  	// of only n[0] because there are no more terms left to handle n[1].
 847  	// This means there might be some magnitude left in the upper bits that
 848  	// is handled below.
 849  	m = t0 + t10*15632
 850  	t0 = m & fieldBaseMask
 851  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
 852  	t1 = m & fieldBaseMask
 853  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
 854  	t2 = m & fieldBaseMask
 855  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
 856  	t3 = m & fieldBaseMask
 857  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
 858  	t4 = m & fieldBaseMask
 859  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
 860  	t5 = m & fieldBaseMask
 861  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
 862  	t6 = m & fieldBaseMask
 863  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
 864  	t7 = m & fieldBaseMask
 865  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
 866  	t8 = m & fieldBaseMask
 867  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
 868  	t9 = m & fieldMSBMask
 869  	m = m >> fieldMSBBits
 870  
 871  	// At this point, if the magnitude is greater than 0, the overall value
 872  	// is greater than the max possible 256-bit value.  In particular, it is
 873  	// "how many times larger" than the max value it is.
 874  	//
 875  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
 876  	// quotient is zero.  However, due to the above, we already know at
 877  	// least how many times we would need to repeat as it's the value
 878  	// currently in m.  Thus we can simply multiply the magnitude by the
 879  	// field representation of the prime and do a single iteration.  Notice
 880  	// that nothing will be changed when the magnitude is zero, so we could
 881  	// skip this in that case, however always running regardless allows it
 882  	// to run in constant time.  The final result will be in the range
 883  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
 884  	// magnitude of 1, but it is denormalized.
 885  	d := t0 + m*977
 886  	f.n[0] = uint32(d & fieldBaseMask)
 887  	d = (d >> fieldBase) + t1 + m*64
 888  	f.n[1] = uint32(d & fieldBaseMask)
 889  	f.n[2] = uint32((d >> fieldBase) + t2)
 890  	f.n[3] = uint32(t3)
 891  	f.n[4] = uint32(t4)
 892  	f.n[5] = uint32(t5)
 893  	f.n[6] = uint32(t6)
 894  	f.n[7] = uint32(t7)
 895  	f.n[8] = uint32(t8)
 896  	f.n[9] = uint32(t9)
 897  
 898  	return f
 899  }
 900  
 901  // Square squares the field value.  The existing field value is modified.  Note
 902  // that this function can overflow if multiplying any of the individual words
 903  // exceeds a max uint32.  In practice, this means the magnitude of the field
 904  // must be a max of 8 to prevent overflow.
 905  //
 906  // The field value is returned to support chaining.  This enables syntax like:
 907  // f.Square().Mul(f2) so that f = f^2 * f2.
 908  func (f *fieldVal) Square() *fieldVal {
 909  	return f.SquareVal(f)
 910  }
 911  
 912  // SquareVal squares the passed value and stores the result in f.  Note that
 913  // this function can overflow if multiplying any of the individual words
 914  // exceeds a max uint32.  In practice, this means the magnitude of the field
 915  // being squred must be a max of 8 to prevent overflow.
 916  //
 917  // The field value is returned to support chaining.  This enables syntax like:
 918  // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
 919  func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal {
 920  	// This could be done with a couple of for loops and an array to store
 921  	// the intermediate terms, but this unrolled version is significantly
 922  	// faster.
 923  
 924  	// Terms for 2^(fieldBase*0).
 925  	m := uint64(val.n[0]) * uint64(val.n[0])
 926  	t0 := m & fieldBaseMask
 927  
 928  	// Terms for 2^(fieldBase*1).
 929  	m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
 930  	t1 := m & fieldBaseMask
 931  
 932  	// Terms for 2^(fieldBase*2).
 933  	m = (m >> fieldBase) +
 934  		2*uint64(val.n[0])*uint64(val.n[2]) +
 935  		uint64(val.n[1])*uint64(val.n[1])
 936  	t2 := m & fieldBaseMask
 937  
 938  	// Terms for 2^(fieldBase*3).
 939  	m = (m >> fieldBase) +
 940  		2*uint64(val.n[0])*uint64(val.n[3]) +
 941  		2*uint64(val.n[1])*uint64(val.n[2])
 942  	t3 := m & fieldBaseMask
 943  
 944  	// Terms for 2^(fieldBase*4).
 945  	m = (m >> fieldBase) +
 946  		2*uint64(val.n[0])*uint64(val.n[4]) +
 947  		2*uint64(val.n[1])*uint64(val.n[3]) +
 948  		uint64(val.n[2])*uint64(val.n[2])
 949  	t4 := m & fieldBaseMask
 950  
 951  	// Terms for 2^(fieldBase*5).
 952  	m = (m >> fieldBase) +
 953  		2*uint64(val.n[0])*uint64(val.n[5]) +
 954  		2*uint64(val.n[1])*uint64(val.n[4]) +
 955  		2*uint64(val.n[2])*uint64(val.n[3])
 956  	t5 := m & fieldBaseMask
 957  
 958  	// Terms for 2^(fieldBase*6).
 959  	m = (m >> fieldBase) +
 960  		2*uint64(val.n[0])*uint64(val.n[6]) +
 961  		2*uint64(val.n[1])*uint64(val.n[5]) +
 962  		2*uint64(val.n[2])*uint64(val.n[4]) +
 963  		uint64(val.n[3])*uint64(val.n[3])
 964  	t6 := m & fieldBaseMask
 965  
 966  	// Terms for 2^(fieldBase*7).
 967  	m = (m >> fieldBase) +
 968  		2*uint64(val.n[0])*uint64(val.n[7]) +
 969  		2*uint64(val.n[1])*uint64(val.n[6]) +
 970  		2*uint64(val.n[2])*uint64(val.n[5]) +
 971  		2*uint64(val.n[3])*uint64(val.n[4])
 972  	t7 := m & fieldBaseMask
 973  
 974  	// Terms for 2^(fieldBase*8).
 975  	m = (m >> fieldBase) +
 976  		2*uint64(val.n[0])*uint64(val.n[8]) +
 977  		2*uint64(val.n[1])*uint64(val.n[7]) +
 978  		2*uint64(val.n[2])*uint64(val.n[6]) +
 979  		2*uint64(val.n[3])*uint64(val.n[5]) +
 980  		uint64(val.n[4])*uint64(val.n[4])
 981  	t8 := m & fieldBaseMask
 982  
 983  	// Terms for 2^(fieldBase*9).
 984  	m = (m >> fieldBase) +
 985  		2*uint64(val.n[0])*uint64(val.n[9]) +
 986  		2*uint64(val.n[1])*uint64(val.n[8]) +
 987  		2*uint64(val.n[2])*uint64(val.n[7]) +
 988  		2*uint64(val.n[3])*uint64(val.n[6]) +
 989  		2*uint64(val.n[4])*uint64(val.n[5])
 990  	t9 := m & fieldBaseMask
 991  
 992  	// Terms for 2^(fieldBase*10).
 993  	m = (m >> fieldBase) +
 994  		2*uint64(val.n[1])*uint64(val.n[9]) +
 995  		2*uint64(val.n[2])*uint64(val.n[8]) +
 996  		2*uint64(val.n[3])*uint64(val.n[7]) +
 997  		2*uint64(val.n[4])*uint64(val.n[6]) +
 998  		uint64(val.n[5])*uint64(val.n[5])
 999  	t10 := m & fieldBaseMask
1000  
1001  	// Terms for 2^(fieldBase*11).
1002  	m = (m >> fieldBase) +
1003  		2*uint64(val.n[2])*uint64(val.n[9]) +
1004  		2*uint64(val.n[3])*uint64(val.n[8]) +
1005  		2*uint64(val.n[4])*uint64(val.n[7]) +
1006  		2*uint64(val.n[5])*uint64(val.n[6])
1007  	t11 := m & fieldBaseMask
1008  
1009  	// Terms for 2^(fieldBase*12).
1010  	m = (m >> fieldBase) +
1011  		2*uint64(val.n[3])*uint64(val.n[9]) +
1012  		2*uint64(val.n[4])*uint64(val.n[8]) +
1013  		2*uint64(val.n[5])*uint64(val.n[7]) +
1014  		uint64(val.n[6])*uint64(val.n[6])
1015  	t12 := m & fieldBaseMask
1016  
1017  	// Terms for 2^(fieldBase*13).
1018  	m = (m >> fieldBase) +
1019  		2*uint64(val.n[4])*uint64(val.n[9]) +
1020  		2*uint64(val.n[5])*uint64(val.n[8]) +
1021  		2*uint64(val.n[6])*uint64(val.n[7])
1022  	t13 := m & fieldBaseMask
1023  
1024  	// Terms for 2^(fieldBase*14).
1025  	m = (m >> fieldBase) +
1026  		2*uint64(val.n[5])*uint64(val.n[9]) +
1027  		2*uint64(val.n[6])*uint64(val.n[8]) +
1028  		uint64(val.n[7])*uint64(val.n[7])
1029  	t14 := m & fieldBaseMask
1030  
1031  	// Terms for 2^(fieldBase*15).
1032  	m = (m >> fieldBase) +
1033  		2*uint64(val.n[6])*uint64(val.n[9]) +
1034  		2*uint64(val.n[7])*uint64(val.n[8])
1035  	t15 := m & fieldBaseMask
1036  
1037  	// Terms for 2^(fieldBase*16).
1038  	m = (m >> fieldBase) +
1039  		2*uint64(val.n[7])*uint64(val.n[9]) +
1040  		uint64(val.n[8])*uint64(val.n[8])
1041  	t16 := m & fieldBaseMask
1042  
1043  	// Terms for 2^(fieldBase*17).
1044  	m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
1045  	t17 := m & fieldBaseMask
1046  
1047  	// Terms for 2^(fieldBase*18).
1048  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
1049  	t18 := m & fieldBaseMask
1050  
1051  	// What's left is for 2^(fieldBase*19).
1052  	t19 := m >> fieldBase
1053  
1054  	// At this point, all of the terms are grouped into their respective
1055  	// base.
1056  	//
1057  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
1058  	// when the modulus is of the special form m = b^t - c, highly efficient
1059  	// reduction can be achieved per the provided algorithm.
1060  	//
1061  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
1062  	// this criteria.
1063  	//
1064  	// 4294968273 in field representation (base 2^26) is:
1065  	// n[0] = 977
1066  	// n[1] = 64
1067  	// That is to say (2^26 * 64) + 977 = 4294968273
1068  	//
1069  	// Since each word is in base 26, the upper terms (t10 and up) start
1070  	// at 260 bits (versus the final desired range of 256 bits), so the
1071  	// field representation of 'c' from above needs to be adjusted for the
1072  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
1073  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
1074  	// n[0] = 977 * 16 = 15632
1075  	// n[1] = 64 * 16 = 1024
1076  	// That is to say (2^26 * 1024) + 15632 = 68719492368
1077  	//
1078  	// To reduce the final term, t19, the entire 'c' value is needed instead
1079  	// of only n[0] because there are no more terms left to handle n[1].
1080  	// This means there might be some magnitude left in the upper bits that
1081  	// is handled below.
1082  	m = t0 + t10*15632
1083  	t0 = m & fieldBaseMask
1084  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
1085  	t1 = m & fieldBaseMask
1086  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
1087  	t2 = m & fieldBaseMask
1088  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
1089  	t3 = m & fieldBaseMask
1090  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
1091  	t4 = m & fieldBaseMask
1092  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
1093  	t5 = m & fieldBaseMask
1094  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
1095  	t6 = m & fieldBaseMask
1096  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
1097  	t7 = m & fieldBaseMask
1098  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
1099  	t8 = m & fieldBaseMask
1100  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
1101  	t9 = m & fieldMSBMask
1102  	m = m >> fieldMSBBits
1103  
1104  	// At this point, if the magnitude is greater than 0, the overall value
1105  	// is greater than the max possible 256-bit value.  In particular, it is
1106  	// "how many times larger" than the max value it is.
1107  	//
1108  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
1109  	// quotient is zero.  However, due to the above, we already know at
1110  	// least how many times we would need to repeat as it's the value
1111  	// currently in m.  Thus we can simply multiply the magnitude by the
1112  	// field representation of the prime and do a single iteration.  Notice
1113  	// that nothing will be changed when the magnitude is zero, so we could
1114  	// skip this in that case, however always running regardless allows it
1115  	// to run in constant time.  The final result will be in the range
1116  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
1117  	// magnitude of 1, but it is denormalized.
1118  	n := t0 + m*977
1119  	f.n[0] = uint32(n & fieldBaseMask)
1120  	n = (n >> fieldBase) + t1 + m*64
1121  	f.n[1] = uint32(n & fieldBaseMask)
1122  	f.n[2] = uint32((n >> fieldBase) + t2)
1123  	f.n[3] = uint32(t3)
1124  	f.n[4] = uint32(t4)
1125  	f.n[5] = uint32(t5)
1126  	f.n[6] = uint32(t6)
1127  	f.n[7] = uint32(t7)
1128  	f.n[8] = uint32(t8)
1129  	f.n[9] = uint32(t9)
1130  
1131  	return f
1132  }
1133  
1134  // Inverse finds the modular multiplicative inverse of the field value.  The
1135  // existing field value is modified.
1136  //
1137  // The field value is returned to support chaining.  This enables syntax like:
1138  // f.Inverse().Mul(f2) so that f = f^-1 * f2.
1139  func (f *fieldVal) Inverse() *fieldVal {
1140  	// Fermat's little theorem states that for a nonzero number a and prime
1141  	// prime p, a^(p-1) = 1 (mod p).  Since the multipliciative inverse is
1142  	// a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p).
1143  	// Thus, a^(p-2) is the multiplicative inverse.
1144  	//
1145  	// In order to efficiently compute a^(p-2), p-2 needs to be split into
1146  	// a sequence of squares and multipications that minimizes the number of
1147  	// multiplications needed (since they are more costly than squarings).
1148  	// Intermediate results are saved and reused as well.
1149  	//
1150  	// The secp256k1 prime - 2 is 2^256 - 4294968275.
1151  	//
1152  	// This has a cost of 258 field squarings and 33 field multiplications.
1153  	var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal
1154  	a2.SquareVal(f)
1155  	a3.Mul2(&a2, f)
1156  	a4.SquareVal(&a2)
1157  	a10.SquareVal(&a4).Mul(&a2)
1158  	a11.Mul2(&a10, f)
1159  	a21.Mul2(&a10, &a11)
1160  	a42.SquareVal(&a21)
1161  	a45.Mul2(&a42, &a3)
1162  	a63.Mul2(&a42, &a21)
1163  	a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11)
1164  	a1023.Mul2(&a1019, &a4)
1165  	f.Set(&a63)                                    // f = a^(2^6 - 1)
1166  	f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32)
1167  	f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024)
1168  	f.Mul(&a1023)                                  // f = a^(2^16 - 1)
1169  	f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32)
1170  	f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024)
1171  	f.Mul(&a1023)                                  // f = a^(2^26 - 1)
1172  	f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32)
1173  	f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024)
1174  	f.Mul(&a1023)                                  // f = a^(2^36 - 1)
1175  	f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32)
1176  	f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024)
1177  	f.Mul(&a1023)                                  // f = a^(2^46 - 1)
1178  	f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32)
1179  	f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024)
1180  	f.Mul(&a1023)                                  // f = a^(2^56 - 1)
1181  	f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32)
1182  	f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024)
1183  	f.Mul(&a1023)                                  // f = a^(2^66 - 1)
1184  	f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32)
1185  	f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024)
1186  	f.Mul(&a1023)                                  // f = a^(2^76 - 1)
1187  	f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32)
1188  	f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024)
1189  	f.Mul(&a1023)                                  // f = a^(2^86 - 1)
1190  	f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32)
1191  	f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024)
1192  	f.Mul(&a1023)                                  // f = a^(2^96 - 1)
1193  	f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32)
1194  	f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024)
1195  	f.Mul(&a1023)                                  // f = a^(2^106 - 1)
1196  	f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32)
1197  	f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024)
1198  	f.Mul(&a1023)                                  // f = a^(2^116 - 1)
1199  	f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32)
1200  	f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024)
1201  	f.Mul(&a1023)                                  // f = a^(2^126 - 1)
1202  	f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32)
1203  	f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024)
1204  	f.Mul(&a1023)                                  // f = a^(2^136 - 1)
1205  	f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32)
1206  	f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024)
1207  	f.Mul(&a1023)                                  // f = a^(2^146 - 1)
1208  	f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32)
1209  	f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024)
1210  	f.Mul(&a1023)                                  // f = a^(2^156 - 1)
1211  	f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32)
1212  	f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024)
1213  	f.Mul(&a1023)                                  // f = a^(2^166 - 1)
1214  	f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32)
1215  	f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024)
1216  	f.Mul(&a1023)                                  // f = a^(2^176 - 1)
1217  	f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32)
1218  	f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024)
1219  	f.Mul(&a1023)                                  // f = a^(2^186 - 1)
1220  	f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32)
1221  	f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024)
1222  	f.Mul(&a1023)                                  // f = a^(2^196 - 1)
1223  	f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32)
1224  	f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024)
1225  	f.Mul(&a1023)                                  // f = a^(2^206 - 1)
1226  	f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32)
1227  	f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024)
1228  	f.Mul(&a1023)                                  // f = a^(2^216 - 1)
1229  	f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32)
1230  	f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024)
1231  	f.Mul(&a1019)                                  // f = a^(2^226 - 5)
1232  	f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160)
1233  	f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120)
1234  	f.Mul(&a1023)                                  // f = a^(2^236 - 4097)
1235  	f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104)
1236  	f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328)
1237  	f.Mul(&a1023)                                  // f = a^(2^246 - 4194305)
1238  	f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760)
1239  	f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320)
1240  	return f.Mul(&a45)                             // f = a^(2^256 - 4294968275) = a^(p-2)
1241  }
1242  
1243  // SqrtVal computes the square root of x modulo the curve's prime, and stores
1244  // the result in f. The square root is computed via exponentiation of x by the
1245  // value Q = (P+1)/4 using the curve's precomputed big-endian representation of
1246  // the Q.  This method uses a modified version of square-and-multiply
1247  // exponentiation over secp256k1 fieldVals to operate on bytes instead of bits,
1248  // which offers better performance over both big.Int exponentiation and bit-wise
1249  // square-and-multiply.
1250  //
1251  // NOTE: This method only works when P is intended to be the secp256k1 prime and
1252  // is not constant time. The returned value is of magnitude 1, but is
1253  // denormalized.
1254  func (f *fieldVal) SqrtVal(x *fieldVal) *fieldVal {
1255  	// The following computation iteratively computes x^((P+1)/4) = x^Q
1256  	// using the recursive, piece-wise definition:
1257  	//
1258  	//   x^n = (x^2)^(n/2) mod P       if n is even
1259  	//   x^n = x(x^2)^(n-1/2) mod P    if n is odd
1260  	//
1261  	// Given n in its big-endian representation b_k, ..., b_0, x^n can be
1262  	// computed by defining the sequence r_k+1, ..., r_0, where:
1263  	//
1264  	//   r_k+1 = 1
1265  	//   r_i   = (r_i+1)^2 * x^b_i    for i = k, ..., 0
1266  	//
1267  	// The final value r_0 = x^n.
1268  	//
1269  	// See https://en.wikipedia.org/wiki/Exponentiation_by_squaring for more
1270  	// details.
1271  	//
1272  	// This can be further optimized, by observing that the value of Q in
1273  	// secp256k1 has the value:
1274  	//
1275  	//   Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c
1276  	//
1277  	// We can unroll the typical bit-wise interpretation of the
1278  	// exponentiation algorithm above to instead operate on bytes.
1279  	// This reduces the number of comparisons by an order of magnitude,
1280  	// reducing the overhead of failed branch predictions and additional
1281  	// comparisons in this method.
1282  	//
1283  	// Since there there are only 4 unique bytes of Q, this keeps the jump
1284  	// table small without the need to handle all possible 8-bit values.
1285  	// Further, we observe that 29 of the 32 bytes are 0xff; making the
1286  	// first case handle 0xff therefore optimizes the hot path.
1287  	f.SetInt(1)
1288  	for _, b := range fieldQBytes {
1289  		switch b {
1290  
1291  		// Most common case, where all 8 bits are set.
1292  		case 0xff:
1293  			f.Square().Mul(x)
1294  			f.Square().Mul(x)
1295  			f.Square().Mul(x)
1296  			f.Square().Mul(x)
1297  			f.Square().Mul(x)
1298  			f.Square().Mul(x)
1299  			f.Square().Mul(x)
1300  			f.Square().Mul(x)
1301  
1302  		// First byte of Q (0x3f), where all but the top two bits are
1303  		// set. Note that this case only applies six operations, since
1304  		// the highest bit of Q resides in bit six of the first byte. We
1305  		// ignore the first two bits, since squaring for these bits will
1306  		// result in an invalid result. We forgo squaring f before the
1307  		// first multiply, since 1^2 = 1.
1308  		case 0x3f:
1309  			f.Mul(x)
1310  			f.Square().Mul(x)
1311  			f.Square().Mul(x)
1312  			f.Square().Mul(x)
1313  			f.Square().Mul(x)
1314  			f.Square().Mul(x)
1315  
1316  		// Byte 28 of Q (0xbf), where only bit 7 is unset.
1317  		case 0xbf:
1318  			f.Square().Mul(x)
1319  			f.Square()
1320  			f.Square().Mul(x)
1321  			f.Square().Mul(x)
1322  			f.Square().Mul(x)
1323  			f.Square().Mul(x)
1324  			f.Square().Mul(x)
1325  			f.Square().Mul(x)
1326  
1327  		// Byte 31 of Q (0x0c), where only bits 3 and 4 are set.
1328  		default:
1329  			f.Square()
1330  			f.Square()
1331  			f.Square()
1332  			f.Square()
1333  			f.Square().Mul(x)
1334  			f.Square().Mul(x)
1335  			f.Square()
1336  			f.Square()
1337  		}
1338  	}
1339  
1340  	return f
1341  }
1342  
1343  // Sqrt computes the square root of f modulo the curve's prime, and stores the
1344  // result in f. The square root is computed via exponentiation of x by the value
1345  // Q = (P+1)/4 using the curve's precomputed big-endian representation of the Q.
1346  // This method uses a modified version of square-and-multiply exponentiation
1347  // over secp256k1 fieldVals to operate on bytes instead of bits, which offers
1348  // better performance over both big.Int exponentiation and bit-wise
1349  // square-and-multiply.
1350  //
1351  // NOTE: This method only works when P is intended to be the secp256k1 prime and
1352  // is not constant time. The returned value is of magnitude 1, but is
1353  // denormalized.
1354  func (f *fieldVal) Sqrt() *fieldVal {
1355  	return f.SqrtVal(f)
1356  }
1357