field.go raw

   1  // Copyright (c) 2013-2014 The btcsuite developers
   2  // Copyright (c) 2015-2024 The Decred developers
   3  // Copyright (c) 2013-2024 Dave Collins
   4  // Use of this source code is governed by an ISC
   5  // license that can be found in the LICENSE file.
   6  
   7  package secp256k1
   8  
   9  // References:
  10  //   [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
  11  //     http://cacr.uwaterloo.ca/hac/
  12  
  13  // All elliptic curve operations for secp256k1 are done in a finite field
  14  // characterized by a 256-bit prime.  Given this precision is larger than the
  15  // biggest available native type, obviously some form of bignum math is needed.
  16  // This package implements specialized fixed-precision field arithmetic rather
  17  // than relying on an arbitrary-precision arithmetic package such as math/big
  18  // for dealing with the field math since the size is known.  As a result, rather
  19  // large performance gains are achieved by taking advantage of many
  20  // optimizations not available to arbitrary-precision arithmetic and generic
  21  // modular arithmetic algorithms.
  22  //
  23  // There are various ways to internally represent each finite field element.
  24  // For example, the most obvious representation would be to use an array of 4
  25  // uint64s (64 bits * 4 = 256 bits).  However, that representation suffers from
  26  // a couple of issues.  First, there is no native Go type large enough to handle
  27  // the intermediate results while adding or multiplying two 64-bit numbers, and
  28  // second there is no space left for overflows when performing the intermediate
  29  // arithmetic between each array element which would lead to expensive carry
  30  // propagation.
  31  //
  32  // Given the above, this implementation represents the field elements as
  33  // 10 uint32s with each word (array entry) treated as base 2^26.  This was
  34  // chosen for the following reasons:
  35  // 1) Most systems at the current time are 64-bit (or at least have 64-bit
  36  //    registers available for specialized purposes such as MMX) so the
  37  //    intermediate results can typically be done using a native register (and
  38  //    using uint64s to avoid the need for additional half-word arithmetic)
  39  // 2) In order to allow addition of the internal words without having to
  40  //    propagate the carry, the max normalized value for each register must
  41  //    be less than the number of bits available in the register
  42  // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
  43  //    reasonable choice for #2
  44  // 4) Given the need for 256-bits of precision and the properties stated in #1,
  45  //    #2, and #3, the representation which best accommodates this is 10 uint32s
  46  //    with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
  47  //    bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
  48  //    overflow
  49  //
  50  // Since it is so important that the field arithmetic is extremely fast for high
  51  // performance crypto, this type does not perform any validation where it
  52  // ordinarily would.  See the documentation for FieldVal for more details.
  53  
  54  import (
  55  	"encoding/hex"
  56  )
  57  
  58  // Constants used to make the code more readable.
  59  const (
  60  	twoBitsMask   = 0x3
  61  	fourBitsMask  = 0xf
  62  	sixBitsMask   = 0x3f
  63  	eightBitsMask = 0xff
  64  )
  65  
  66  // Constants related to the field representation.
  67  const (
  68  	// fieldWords is the number of words used to internally represent the
  69  	// 256-bit value.
  70  	fieldWords = 10
  71  
  72  	// fieldBase is the exponent used to form the numeric base of each word.
  73  	// 2^(fieldBase*i) where i is the word position.
  74  	fieldBase = 26
  75  
  76  	// fieldBaseMask is the mask for the bits in each word needed to
  77  	// represent the numeric base of each word (except the most significant
  78  	// word).
  79  	fieldBaseMask = (1 << fieldBase) - 1
  80  
  81  	// fieldMSBBits is the number of bits in the most significant word used
  82  	// to represent the value.
  83  	fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
  84  
  85  	// fieldMSBMask is the mask for the bits in the most significant word
  86  	// needed to represent the value.
  87  	fieldMSBMask = (1 << fieldMSBBits) - 1
  88  
  89  	// These fields provide convenient access to each of the words of the
  90  	// secp256k1 prime in the internal field representation to improve code
  91  	// readability.
  92  	fieldPrimeWordZero  = 0x03fffc2f
  93  	fieldPrimeWordOne   = 0x03ffffbf
  94  	fieldPrimeWordTwo   = 0x03ffffff
  95  	fieldPrimeWordThree = 0x03ffffff
  96  	fieldPrimeWordFour  = 0x03ffffff
  97  	fieldPrimeWordFive  = 0x03ffffff
  98  	fieldPrimeWordSix   = 0x03ffffff
  99  	fieldPrimeWordSeven = 0x03ffffff
 100  	fieldPrimeWordEight = 0x03ffffff
 101  	fieldPrimeWordNine  = 0x003fffff
 102  )
 103  
 104  // FieldVal implements optimized fixed-precision arithmetic over the
 105  // secp256k1 finite field.  This means all arithmetic is performed modulo
 106  //
 107  //	0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.
 108  //
 109  // WARNING: Since it is so important for the field arithmetic to be extremely
 110  // fast for high performance crypto, this type does not perform any validation
 111  // of documented preconditions where it ordinarily would.  As a result, it is
 112  // IMPERATIVE for callers to understand some key concepts that are described
 113  // below and ensure the methods are called with the necessary preconditions that
 114  // each method is documented with.  For example, some methods only give the
 115  // correct result if the field value is normalized and others require the field
 116  // values involved to have a maximum magnitude and THERE ARE NO EXPLICIT CHECKS
 117  // TO ENSURE THOSE PRECONDITIONS ARE SATISFIED.  This does, unfortunately, make
 118  // the type more difficult to use correctly and while I typically prefer to
 119  // ensure all state and input is valid for most code, this is a bit of an
 120  // exception because those extra checks really add up in what ends up being
 121  // critical hot paths.
 122  //
 123  // The first key concept when working with this type is normalization.  In order
 124  // to avoid the need to propagate a ton of carries, the internal representation
 125  // provides additional overflow bits for each word of the overall 256-bit value.
 126  // This means that there are multiple internal representations for the same
 127  // value and, as a result, any methods that rely on comparison of the value,
 128  // such as equality and oddness determination, require the caller to provide a
 129  // normalized value.
 130  //
 131  // The second key concept when working with this type is magnitude.  As
 132  // previously mentioned, the internal representation provides additional
 133  // overflow bits which means that the more math operations that are performed on
 134  // the field value between normalizations, the more those overflow bits
 135  // accumulate.  The magnitude is effectively that maximum possible number of
 136  // those overflow bits that could possibly be required as a result of a given
 137  // operation.  Since there are only a limited number of overflow bits available,
 138  // this implies that the max possible magnitude MUST be tracked by the caller
 139  // and the caller MUST normalize the field value if a given operation would
 140  // cause the magnitude of the result to exceed the max allowed value.
 141  //
 142  // IMPORTANT: The max allowed magnitude of a field value is 32.
 143  type FieldVal struct {
 144  	// Each 256-bit value is represented as 10 32-bit integers in base 2^26.
 145  	// This provides 6 bits of overflow in each word (10 bits in the most
 146  	// significant word) for a total of 64 bits of overflow (9*6 + 10 = 64).  It
 147  	// only implements the arithmetic needed for elliptic curve operations.
 148  	//
 149  	// The following depicts the internal representation:
 150  	// 	 -----------------------------------------------------------------
 151  	// 	|        n[9]       |        n[8]       | ... |        n[0]       |
 152  	// 	| 32 bits available | 32 bits available | ... | 32 bits available |
 153  	// 	| 22 bits for value | 26 bits for value | ... | 26 bits for value |
 154  	// 	| 10 bits overflow  |  6 bits overflow  | ... |  6 bits overflow  |
 155  	// 	| Mult: 2^(26*9)    | Mult: 2^(26*8)    | ... | Mult: 2^(26*0)    |
 156  	// 	 -----------------------------------------------------------------
 157  	//
 158  	// For example, consider the number 2^49 + 1.  It would be represented as:
 159  	// 	n[0] = 1
 160  	// 	n[1] = 2^23
 161  	// 	n[2..9] = 0
 162  	//
 163  	// The full 256-bit value is then calculated by looping i from 9..0 and
 164  	// doing sum(n[i] * 2^(26i)) like so:
 165  	// 	n[9] * 2^(26*9) = 0    * 2^234 = 0
 166  	// 	n[8] * 2^(26*8) = 0    * 2^208 = 0
 167  	// 	...
 168  	// 	n[1] * 2^(26*1) = 2^23 * 2^26  = 2^49
 169  	// 	n[0] * 2^(26*0) = 1    * 2^0   = 1
 170  	// 	Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
 171  	n [10]uint32
 172  }
 173  
 174  // String returns the field value as a normalized human-readable hex string.
 175  //
 176  //	Preconditions: None
 177  //	Output Normalized: Field is not modified -- same as input value
 178  //	Output Max Magnitude: Field is not modified -- same as input value
 179  func (f FieldVal) String() string {
 180  	// f is a copy, so it's safe to normalize it without mutating the original.
 181  	f.Normalize()
 182  	return hex.EncodeToString(f.Bytes()[:])
 183  }
 184  
 185  // Zero sets the field value to zero in constant time.  A newly created field
 186  // value is already set to zero.  This function can be useful to clear an
 187  // existing field value for reuse.
 188  //
 189  //	Preconditions: None
 190  //	Output Normalized: Yes
 191  //	Output Max Magnitude: 1
 192  func (f *FieldVal) Zero() {
 193  	f.n[0] = 0
 194  	f.n[1] = 0
 195  	f.n[2] = 0
 196  	f.n[3] = 0
 197  	f.n[4] = 0
 198  	f.n[5] = 0
 199  	f.n[6] = 0
 200  	f.n[7] = 0
 201  	f.n[8] = 0
 202  	f.n[9] = 0
 203  }
 204  
 205  // Set sets the field value equal to the passed value in constant time.  The
 206  // normalization and magnitude of the two fields will be identical.
 207  //
 208  // The field value is returned to support chaining.  This enables syntax like:
 209  // f := new(FieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
 210  // modified.
 211  //
 212  //	Preconditions: None
 213  //	Output Normalized: Same as input value
 214  //	Output Max Magnitude: Same as input value
 215  func (f *FieldVal) Set(val *FieldVal) *FieldVal {
 216  	*f = *val
 217  	return f
 218  }
 219  
 220  // SetInt sets the field value to the passed integer in constant time.  This is
 221  // a convenience function since it is fairly common to perform some arithmetic
 222  // with small native integers.
 223  //
 224  // The field value is returned to support chaining.  This enables syntax such
 225  // as f := new(FieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
 226  //
 227  //	Preconditions: None
 228  //	Output Normalized: Yes
 229  //	Output Max Magnitude: 1
 230  func (f *FieldVal) SetInt(ui uint16) *FieldVal {
 231  	f.Zero()
 232  	f.n[0] = uint32(ui)
 233  	return f
 234  }
 235  
 236  // SetBytes packs the passed 32-byte big-endian value into the internal field
 237  // value representation in constant time.  SetBytes interprets the provided
 238  // array as a 256-bit big-endian unsigned integer, packs it into the internal
 239  // field value representation, and returns either 1 if it is greater than or
 240  // equal to the field prime (aka it overflowed) or 0 otherwise in constant time.
 241  //
 242  // Note that a bool is not used here because it is not possible in Go to convert
 243  // from a bool to numeric value in constant time and many constant-time
 244  // operations require a numeric value.
 245  //
 246  //	Preconditions: None
 247  //	Output Normalized: Yes if no overflow, no otherwise
 248  //	Output Max Magnitude: 1
 249  func (f *FieldVal) SetBytes(b *[32]byte) uint32 {
 250  	// Pack the 256 total bits across the 10 uint32 words with a max of
 251  	// 26-bits per word.  This could be done with a couple of for loops,
 252  	// but this unrolled version is significantly faster.  Benchmarks show
 253  	// this is about 34 times faster than the variant which uses loops.
 254  	f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
 255  		(uint32(b[28])&twoBitsMask)<<24
 256  	f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
 257  		(uint32(b[25])&fourBitsMask)<<22
 258  	f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
 259  		(uint32(b[22])&sixBitsMask)<<20
 260  	f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
 261  		uint32(b[19])<<18
 262  	f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
 263  		(uint32(b[15])&twoBitsMask)<<24
 264  	f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
 265  		(uint32(b[12])&fourBitsMask)<<22
 266  	f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
 267  		(uint32(b[9])&sixBitsMask)<<20
 268  	f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
 269  		uint32(b[6])<<18
 270  	f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
 271  		(uint32(b[2])&twoBitsMask)<<24
 272  	f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
 273  
 274  	// The intuition here is that the field value is greater than the prime if
 275  	// one of the higher individual words is greater than corresponding word of
 276  	// the prime and all higher words in the field value are equal to their
 277  	// corresponding word of the prime.  Since this type is modulo the prime,
 278  	// being equal is also an overflow back to 0.
 279  	//
 280  	// Note that because the input is 32 bytes and it was just packed into the
 281  	// field representation, the only words that can possibly be greater are
 282  	// zero and one, because ceil(log_2(2^256 - 1 - P)) = 33 bits max and the
 283  	// internal field representation encodes 26 bits with each word.
 284  	//
 285  	// Thus, there is no need to test if the upper words of the field value
 286  	// exceeds them, hence, only equality is checked for them.
 287  	highWordsEq := constantTimeEq(f.n[9], fieldPrimeWordNine)
 288  	highWordsEq &= constantTimeEq(f.n[8], fieldPrimeWordEight)
 289  	highWordsEq &= constantTimeEq(f.n[7], fieldPrimeWordSeven)
 290  	highWordsEq &= constantTimeEq(f.n[6], fieldPrimeWordSix)
 291  	highWordsEq &= constantTimeEq(f.n[5], fieldPrimeWordFive)
 292  	highWordsEq &= constantTimeEq(f.n[4], fieldPrimeWordFour)
 293  	highWordsEq &= constantTimeEq(f.n[3], fieldPrimeWordThree)
 294  	highWordsEq &= constantTimeEq(f.n[2], fieldPrimeWordTwo)
 295  	overflow := highWordsEq & constantTimeGreater(f.n[1], fieldPrimeWordOne)
 296  	highWordsEq &= constantTimeEq(f.n[1], fieldPrimeWordOne)
 297  	overflow |= highWordsEq & constantTimeGreaterOrEq(f.n[0], fieldPrimeWordZero)
 298  
 299  	return overflow
 300  }
 301  
 302  // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
 303  // integer (meaning it is truncated to the first 32 bytes), packs it into the
 304  // internal field value representation, and returns whether or not the resulting
 305  // truncated 256-bit integer is greater than or equal to the field prime (aka it
 306  // overflowed) in constant time.
 307  //
 308  // Note that since passing a slice with more than 32 bytes is truncated, it is
 309  // possible that the truncated value is less than the field prime and hence it
 310  // will not be reported as having overflowed in that case.  It is up to the
 311  // caller to decide whether it needs to provide numbers of the appropriate size
 312  // or it if is acceptable to use this function with the described truncation and
 313  // overflow behavior.
 314  //
 315  //	Preconditions: None
 316  //	Output Normalized: Yes if no overflow, no otherwise
 317  //	Output Max Magnitude: 1
 318  func (f *FieldVal) SetByteSlice(b []byte) bool {
 319  	var b32 [32]byte
 320  	b = b[:constantTimeMin(uint32(len(b)), 32)]
 321  	copy(b32[:], b32[:32-len(b)])
 322  	copy(b32[32-len(b):], b)
 323  	result := f.SetBytes(&b32)
 324  	zeroArray32(&b32)
 325  	return result != 0
 326  }
 327  
 328  // Normalize normalizes the internal field words into the desired range and
 329  // performs fast modular reduction over the secp256k1 prime by making use of the
 330  // special form of the prime in constant time.
 331  //
 332  //	Preconditions: None
 333  //	Output Normalized: Yes
 334  //	Output Max Magnitude: 1
 335  func (f *FieldVal) Normalize() *FieldVal {
 336  	// The field representation leaves 6 bits of overflow in each word so
 337  	// intermediate calculations can be performed without needing to
 338  	// propagate the carry to each higher word during the calculations.  In
 339  	// order to normalize, we need to "compact" the full 256-bit value to
 340  	// the right while propagating any carries through to the high order
 341  	// word.
 342  	//
 343  	// Since this field is doing arithmetic modulo the secp256k1 prime, we
 344  	// also need to perform modular reduction over the prime.
 345  	//
 346  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
 347  	// when the modulus is of the special form m = b^t - c, highly efficient
 348  	// reduction can be achieved.
 349  	//
 350  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
 351  	// this criteria.
 352  	//
 353  	// 4294968273 in field representation (base 2^26) is:
 354  	// n[0] = 977
 355  	// n[1] = 64
 356  	// That is to say (2^26 * 64) + 977 = 4294968273
 357  	//
 358  	// The algorithm presented in the referenced section typically repeats
 359  	// until the quotient is zero.  However, due to our field representation
 360  	// we already know to within one reduction how many times we would need
 361  	// to repeat as it's the uppermost bits of the high order word.  Thus we
 362  	// can simply multiply the magnitude by the field representation of the
 363  	// prime and do a single iteration.  After this step there might be an
 364  	// additional carry to bit 256 (bit 22 of the high order word).
 365  	t9 := f.n[9]
 366  	m := t9 >> fieldMSBBits
 367  	t9 &= fieldMSBMask
 368  	t0 := f.n[0] + m*977
 369  	t1 := (t0 >> fieldBase) + f.n[1] + (m << 6)
 370  	t0 &= fieldBaseMask
 371  	t2 := (t1 >> fieldBase) + f.n[2]
 372  	t1 &= fieldBaseMask
 373  	t3 := (t2 >> fieldBase) + f.n[3]
 374  	t2 &= fieldBaseMask
 375  	t4 := (t3 >> fieldBase) + f.n[4]
 376  	t3 &= fieldBaseMask
 377  	t5 := (t4 >> fieldBase) + f.n[5]
 378  	t4 &= fieldBaseMask
 379  	t6 := (t5 >> fieldBase) + f.n[6]
 380  	t5 &= fieldBaseMask
 381  	t7 := (t6 >> fieldBase) + f.n[7]
 382  	t6 &= fieldBaseMask
 383  	t8 := (t7 >> fieldBase) + f.n[8]
 384  	t7 &= fieldBaseMask
 385  	t9 = (t8 >> fieldBase) + t9
 386  	t8 &= fieldBaseMask
 387  
 388  	// At this point, the magnitude is guaranteed to be one, however, the
 389  	// value could still be greater than the prime if there was either a
 390  	// carry through to bit 256 (bit 22 of the higher order word) or the
 391  	// value is greater than or equal to the field characteristic.  The
 392  	// following determines if either or these conditions are true and does
 393  	// the final reduction in constant time.
 394  	//
 395  	// Also note that 'm' will be zero when neither of the aforementioned
 396  	// conditions are true and the value will not be changed when 'm' is zero.
 397  	m = constantTimeEq(t9, fieldMSBMask)
 398  	m &= constantTimeEq(t8&t7&t6&t5&t4&t3&t2, fieldBaseMask)
 399  	m &= constantTimeGreater(t1+64+((t0+977)>>fieldBase), fieldBaseMask)
 400  	m |= t9 >> fieldMSBBits
 401  	t0 += m * 977
 402  	t1 = (t0 >> fieldBase) + t1 + (m << 6)
 403  	t0 &= fieldBaseMask
 404  	t2 = (t1 >> fieldBase) + t2
 405  	t1 &= fieldBaseMask
 406  	t3 = (t2 >> fieldBase) + t3
 407  	t2 &= fieldBaseMask
 408  	t4 = (t3 >> fieldBase) + t4
 409  	t3 &= fieldBaseMask
 410  	t5 = (t4 >> fieldBase) + t5
 411  	t4 &= fieldBaseMask
 412  	t6 = (t5 >> fieldBase) + t6
 413  	t5 &= fieldBaseMask
 414  	t7 = (t6 >> fieldBase) + t7
 415  	t6 &= fieldBaseMask
 416  	t8 = (t7 >> fieldBase) + t8
 417  	t7 &= fieldBaseMask
 418  	t9 = (t8 >> fieldBase) + t9
 419  	t8 &= fieldBaseMask
 420  	t9 &= fieldMSBMask // Remove potential multiple of 2^256.
 421  
 422  	// Finally, set the normalized and reduced words.
 423  	f.n[0] = t0
 424  	f.n[1] = t1
 425  	f.n[2] = t2
 426  	f.n[3] = t3
 427  	f.n[4] = t4
 428  	f.n[5] = t5
 429  	f.n[6] = t6
 430  	f.n[7] = t7
 431  	f.n[8] = t8
 432  	f.n[9] = t9
 433  	return f
 434  }
 435  
 436  // PutBytesUnchecked unpacks the field value to a 32-byte big-endian value
 437  // directly into the passed byte slice in constant time.  The target slice must
 438  // have at least 32 bytes available or it will panic.
 439  //
 440  // There is a similar function, PutBytes, which unpacks the field value into a
 441  // 32-byte array directly.  This version is provided since it can be useful
 442  // to write directly into part of a larger buffer without needing a separate
 443  // allocation.
 444  //
 445  //	Preconditions:
 446  //	  - The field value MUST be normalized
 447  //	  - The target slice MUST have at least 32 bytes available
 448  func (f *FieldVal) PutBytesUnchecked(b []byte) {
 449  	// Unpack the 256 total bits from the 10 uint32 words with a max of
 450  	// 26-bits per word.  This could be done with a couple of for loops,
 451  	// but this unrolled version is a bit faster.  Benchmarks show this is
 452  	// about 10 times faster than the variant which uses loops.
 453  	b[31] = byte(f.n[0] & eightBitsMask)
 454  	b[30] = byte((f.n[0] >> 8) & eightBitsMask)
 455  	b[29] = byte((f.n[0] >> 16) & eightBitsMask)
 456  	b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
 457  	b[27] = byte((f.n[1] >> 6) & eightBitsMask)
 458  	b[26] = byte((f.n[1] >> 14) & eightBitsMask)
 459  	b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
 460  	b[24] = byte((f.n[2] >> 4) & eightBitsMask)
 461  	b[23] = byte((f.n[2] >> 12) & eightBitsMask)
 462  	b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
 463  	b[21] = byte((f.n[3] >> 2) & eightBitsMask)
 464  	b[20] = byte((f.n[3] >> 10) & eightBitsMask)
 465  	b[19] = byte((f.n[3] >> 18) & eightBitsMask)
 466  	b[18] = byte(f.n[4] & eightBitsMask)
 467  	b[17] = byte((f.n[4] >> 8) & eightBitsMask)
 468  	b[16] = byte((f.n[4] >> 16) & eightBitsMask)
 469  	b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
 470  	b[14] = byte((f.n[5] >> 6) & eightBitsMask)
 471  	b[13] = byte((f.n[5] >> 14) & eightBitsMask)
 472  	b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
 473  	b[11] = byte((f.n[6] >> 4) & eightBitsMask)
 474  	b[10] = byte((f.n[6] >> 12) & eightBitsMask)
 475  	b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
 476  	b[8] = byte((f.n[7] >> 2) & eightBitsMask)
 477  	b[7] = byte((f.n[7] >> 10) & eightBitsMask)
 478  	b[6] = byte((f.n[7] >> 18) & eightBitsMask)
 479  	b[5] = byte(f.n[8] & eightBitsMask)
 480  	b[4] = byte((f.n[8] >> 8) & eightBitsMask)
 481  	b[3] = byte((f.n[8] >> 16) & eightBitsMask)
 482  	b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
 483  	b[1] = byte((f.n[9] >> 6) & eightBitsMask)
 484  	b[0] = byte((f.n[9] >> 14) & eightBitsMask)
 485  }
 486  
 487  // PutBytes unpacks the field value to a 32-byte big-endian value using the
 488  // passed byte array in constant time.
 489  //
 490  // There is a similar function, PutBytesUnchecked, which unpacks the field value
 491  // into a slice that must have at least 32 bytes available.  This version is
 492  // provided since it can be useful to write directly into an array that is type
 493  // checked.
 494  //
 495  // Alternatively, there is also Bytes, which unpacks the field value into a new
 496  // array and returns that which can sometimes be more ergonomic in applications
 497  // that aren't concerned about an additional copy.
 498  //
 499  //	Preconditions:
 500  //	  - The field value MUST be normalized
 501  func (f *FieldVal) PutBytes(b *[32]byte) {
 502  	f.PutBytesUnchecked(b[:])
 503  }
 504  
 505  // Bytes unpacks the field value to a 32-byte big-endian value in constant time.
 506  //
 507  // See PutBytes and PutBytesUnchecked for variants that allow an array or slice
 508  // to be passed which can be useful to cut down on the number of allocations by
 509  // allowing the caller to reuse a buffer or write directly into part of a larger
 510  // buffer.
 511  //
 512  //	Preconditions:
 513  //	  - The field value MUST be normalized
 514  func (f *FieldVal) Bytes() *[32]byte {
 515  	b := new([32]byte)
 516  	f.PutBytesUnchecked(b[:])
 517  	return b
 518  }
 519  
 520  // IsZeroBit returns 1 when the field value is equal to zero or 0 otherwise in
 521  // constant time.
 522  //
 523  // Note that a bool is not used here because it is not possible in Go to convert
 524  // from a bool to numeric value in constant time and many constant-time
 525  // operations require a numeric value.  See IsZero for the version that returns
 526  // a bool.
 527  //
 528  //	Preconditions:
 529  //	  - The field value MUST be normalized
 530  func (f *FieldVal) IsZeroBit() uint32 {
 531  	// The value can only be zero if no bits are set in any of the words.
 532  	// This is a constant time implementation.
 533  	bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
 534  		f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
 535  
 536  	return constantTimeEq(bits, 0)
 537  }
 538  
 539  // IsZero returns whether or not the field value is equal to zero in constant
 540  // time.
 541  //
 542  //	Preconditions:
 543  //	  - The field value MUST be normalized
 544  func (f *FieldVal) IsZero() bool {
 545  	// The value can only be zero if no bits are set in any of the words.
 546  	// This is a constant time implementation.
 547  	bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
 548  		f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
 549  
 550  	return bits == 0
 551  }
 552  
 553  // IsOneBit returns 1 when the field value is equal to one or 0 otherwise in
 554  // constant time.
 555  //
 556  // Note that a bool is not used here because it is not possible in Go to convert
 557  // from a bool to numeric value in constant time and many constant-time
 558  // operations require a numeric value.  See IsOne for the version that returns a
 559  // bool.
 560  //
 561  //	Preconditions:
 562  //	   - The field value MUST be normalized
 563  func (f *FieldVal) IsOneBit() uint32 {
 564  	// The value can only be one if the single lowest significant bit is set in
 565  	// the first word and no other bits are set in any of the other words.
 566  	// This is a constant time implementation.
 567  	bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] |
 568  		f.n[6] | f.n[7] | f.n[8] | f.n[9]
 569  
 570  	return constantTimeEq(bits, 0)
 571  }
 572  
 573  // IsOne returns whether or not the field value is equal to one in constant
 574  // time.
 575  //
 576  //	Preconditions:
 577  //	  - The field value MUST be normalized
 578  func (f *FieldVal) IsOne() bool {
 579  	// The value can only be one if the single lowest significant bit is set in
 580  	// the first word and no other bits are set in any of the other words.
 581  	// This is a constant time implementation.
 582  	bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] |
 583  		f.n[6] | f.n[7] | f.n[8] | f.n[9]
 584  
 585  	return bits == 0
 586  }
 587  
 588  // IsOddBit returns 1 when the field value is an odd number or 0 otherwise in
 589  // constant time.
 590  //
 591  // Note that a bool is not used here because it is not possible in Go to convert
 592  // from a bool to numeric value in constant time and many constant-time
 593  // operations require a numeric value.  See IsOdd for the version that returns a
 594  // bool.
 595  //
 596  //	Preconditions:
 597  //	  - The field value MUST be normalized
 598  func (f *FieldVal) IsOddBit() uint32 {
 599  	// Only odd numbers have the bottom bit set.
 600  	return f.n[0] & 1
 601  }
 602  
 603  // IsOdd returns whether or not the field value is an odd number in constant
 604  // time.
 605  //
 606  //	Preconditions:
 607  //	  - The field value MUST be normalized
 608  func (f *FieldVal) IsOdd() bool {
 609  	// Only odd numbers have the bottom bit set.
 610  	return f.n[0]&1 == 1
 611  }
 612  
 613  // Equals returns whether or not the two field values are the same in constant
 614  // time.
 615  //
 616  //	Preconditions:
 617  //	  - Both field values being compared MUST be normalized
 618  func (f *FieldVal) Equals(val *FieldVal) bool {
 619  	// Xor only sets bits when they are different, so the two field values
 620  	// can only be the same if no bits are set after xoring each word.
 621  	// This is a constant time implementation.
 622  	bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
 623  		(f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
 624  		(f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
 625  		(f.n[9] ^ val.n[9])
 626  
 627  	return bits == 0
 628  }
 629  
 630  // NegateVal negates the passed value and stores the result in f in constant
 631  // time.  The caller must provide the maximum magnitude of the passed value for
 632  // a correct result.
 633  //
 634  // The field value is returned to support chaining.  This enables syntax like:
 635  // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
 636  //
 637  //	Preconditions:
 638  //	  - The max magnitude MUST be 31
 639  //	Output Normalized: No
 640  //	Output Max Magnitude: Input magnitude + 1
 641  func (f *FieldVal) NegateVal(val *FieldVal, magnitude uint32) *FieldVal {
 642  	// Negation in the field is just the prime minus the value.  However,
 643  	// in order to allow negation against a field value without having to
 644  	// normalize/reduce it first, multiply by the magnitude (that is how
 645  	// "far" away it is from the normalized value) to adjust.  Also, since
 646  	// negating a value pushes it one more order of magnitude away from the
 647  	// normalized range, add 1 to compensate.
 648  	//
 649  	// For some intuition here, imagine you're performing mod 12 arithmetic
 650  	// (picture a clock) and you are negating the number 7.  So you start at
 651  	// 12 (which is of course 0 under mod 12) and count backwards (left on
 652  	// the clock) 7 times to arrive at 5.  Notice this is just 12-7 = 5.
 653  	// Now, assume you're starting with 19, which is a number that is
 654  	// already larger than the modulus and congruent to 7 (mod 12).  When a
 655  	// value is already in the desired range, its magnitude is 1.  Since 19
 656  	// is an additional "step", its magnitude (mod 12) is 2.  Since any
 657  	// multiple of the modulus is congruent to zero (mod m), the answer can
 658  	// be shortcut by simply multiplying the magnitude by the modulus and
 659  	// subtracting.  Keeping with the example, this would be (2*12)-19 = 5.
 660  	f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
 661  	f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
 662  	f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
 663  	f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
 664  	f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
 665  	f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
 666  	f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
 667  	f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
 668  	f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
 669  	f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
 670  
 671  	return f
 672  }
 673  
 674  // Negate negates the field value in constant time.  The existing field value is
 675  // modified.  The caller must provide the maximum magnitude of the field value
 676  // for a correct result.
 677  //
 678  // The field value is returned to support chaining.  This enables syntax like:
 679  // f.Negate().AddInt(1) so that f = -f + 1.
 680  //
 681  //	Preconditions:
 682  //	  - The max magnitude MUST be 31
 683  //	Output Normalized: No
 684  //	Output Max Magnitude: Input magnitude + 1
 685  func (f *FieldVal) Negate(magnitude uint32) *FieldVal {
 686  	return f.NegateVal(f, magnitude)
 687  }
 688  
 689  // AddInt adds the passed integer to the existing field value and stores the
 690  // result in f in constant time.  This is a convenience function since it is
 691  // fairly common to perform some arithmetic with small native integers.
 692  //
 693  // The field value is returned to support chaining.  This enables syntax like:
 694  // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
 695  //
 696  //	Preconditions:
 697  //	  - The field value MUST have a max magnitude of 31
 698  //	  - The integer MUST be a max of 32767
 699  //	Output Normalized: No
 700  //	Output Max Magnitude: Existing field magnitude + 1
 701  func (f *FieldVal) AddInt(ui uint16) *FieldVal {
 702  	// Since the field representation intentionally provides overflow bits,
 703  	// it's ok to use carryless addition as the carry bit is safely part of
 704  	// the word and will be normalized out.
 705  	f.n[0] += uint32(ui)
 706  
 707  	return f
 708  }
 709  
 710  // Add adds the passed value to the existing field value and stores the result
 711  // in f in constant time.
 712  //
 713  // The field value is returned to support chaining.  This enables syntax like:
 714  // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
 715  //
 716  //	Preconditions:
 717  //	  - The sum of the magnitudes of the two field values MUST be a max of 32
 718  //	Output Normalized: No
 719  //	Output Max Magnitude: Sum of the magnitude of the two individual field values
 720  func (f *FieldVal) Add(val *FieldVal) *FieldVal {
 721  	// Since the field representation intentionally provides overflow bits,
 722  	// it's ok to use carryless addition as the carry bit is safely part of
 723  	// each word and will be normalized out.  This could obviously be done
 724  	// in a loop, but the unrolled version is faster.
 725  	f.n[0] += val.n[0]
 726  	f.n[1] += val.n[1]
 727  	f.n[2] += val.n[2]
 728  	f.n[3] += val.n[3]
 729  	f.n[4] += val.n[4]
 730  	f.n[5] += val.n[5]
 731  	f.n[6] += val.n[6]
 732  	f.n[7] += val.n[7]
 733  	f.n[8] += val.n[8]
 734  	f.n[9] += val.n[9]
 735  
 736  	return f
 737  }
 738  
 739  // Add2 adds the passed two field values together and stores the result in f in
 740  // constant time.
 741  //
 742  // The field value is returned to support chaining.  This enables syntax like:
 743  // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
 744  //
 745  //	Preconditions:
 746  //	  - The sum of the magnitudes of the two field values MUST be a max of 32
 747  //	Output Normalized: No
 748  //	Output Max Magnitude: Sum of the magnitude of the two field values
 749  func (f *FieldVal) Add2(val *FieldVal, val2 *FieldVal) *FieldVal {
 750  	// Since the field representation intentionally provides overflow bits,
 751  	// it's ok to use carryless addition as the carry bit is safely part of
 752  	// each word and will be normalized out.  This could obviously be done
 753  	// in a loop, but the unrolled version is faster.
 754  	f.n[0] = val.n[0] + val2.n[0]
 755  	f.n[1] = val.n[1] + val2.n[1]
 756  	f.n[2] = val.n[2] + val2.n[2]
 757  	f.n[3] = val.n[3] + val2.n[3]
 758  	f.n[4] = val.n[4] + val2.n[4]
 759  	f.n[5] = val.n[5] + val2.n[5]
 760  	f.n[6] = val.n[6] + val2.n[6]
 761  	f.n[7] = val.n[7] + val2.n[7]
 762  	f.n[8] = val.n[8] + val2.n[8]
 763  	f.n[9] = val.n[9] + val2.n[9]
 764  
 765  	return f
 766  }
 767  
 768  // MulInt multiplies the field value by the passed int and stores the result in
 769  // f in constant time.  Note that this function can overflow if multiplying the
 770  // value by any of the individual words exceeds a max uint32.  Therefore it is
 771  // important that the caller ensures no overflows will occur before using this
 772  // function.
 773  //
 774  // The field value is returned to support chaining.  This enables syntax like:
 775  // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
 776  //
 777  //	Preconditions:
 778  //	  - The field value magnitude multiplied by given val MUST be a max of 32
 779  //	Output Normalized: No
 780  //	Output Max Magnitude: Existing field magnitude times the provided integer val
 781  func (f *FieldVal) MulInt(val uint8) *FieldVal {
 782  	// Since each word of the field representation can hold up to
 783  	// 32 - fieldBase extra bits which will be normalized out, it's safe
 784  	// to multiply each word without using a larger type or carry
 785  	// propagation so long as the values won't overflow a uint32.  This
 786  	// could obviously be done in a loop, but the unrolled version is
 787  	// faster.
 788  	ui := uint32(val)
 789  	f.n[0] *= ui
 790  	f.n[1] *= ui
 791  	f.n[2] *= ui
 792  	f.n[3] *= ui
 793  	f.n[4] *= ui
 794  	f.n[5] *= ui
 795  	f.n[6] *= ui
 796  	f.n[7] *= ui
 797  	f.n[8] *= ui
 798  	f.n[9] *= ui
 799  
 800  	return f
 801  }
 802  
 803  // Mul multiplies the passed value to the existing field value and stores the
 804  // result in f in constant time.  Note that this function can overflow if
 805  // multiplying any of the individual words exceeds a max uint32.  In practice,
 806  // this means the magnitude of either value involved in the multiplication must
 807  // be a max of 8.
 808  //
 809  // The field value is returned to support chaining.  This enables syntax like:
 810  // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
 811  //
 812  //	Preconditions:
 813  //	  - Both field values MUST have a max magnitude of 8
 814  //	Output Normalized: No
 815  //	Output Max Magnitude: 1
 816  func (f *FieldVal) Mul(val *FieldVal) *FieldVal {
 817  	return f.Mul2(f, val)
 818  }
 819  
 820  // Mul2 multiplies the passed two field values together and stores the result in
 821  // f in constant time.  Note that this function can overflow if multiplying any
 822  // of the individual words exceeds a max uint32.  In practice, this means the
 823  // magnitude of either value involved in the multiplication must be a max of 8.
 824  //
 825  // The field value is returned to support chaining.  This enables syntax like:
 826  // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
 827  //
 828  //	Preconditions:
 829  //	  - Both input field values MUST have a max magnitude of 8
 830  //	Output Normalized: No
 831  //	Output Max Magnitude: 1
 832  func (f *FieldVal) Mul2(val *FieldVal, val2 *FieldVal) *FieldVal {
 833  	// This could be done with a couple of for loops and an array to store
 834  	// the intermediate terms, but this unrolled version is significantly
 835  	// faster.
 836  
 837  	// Terms for 2^(fieldBase*0).
 838  	m := uint64(val.n[0]) * uint64(val2.n[0])
 839  	t0 := m & fieldBaseMask
 840  
 841  	// Terms for 2^(fieldBase*1).
 842  	m = (m >> fieldBase) +
 843  		uint64(val.n[0])*uint64(val2.n[1]) +
 844  		uint64(val.n[1])*uint64(val2.n[0])
 845  	t1 := m & fieldBaseMask
 846  
 847  	// Terms for 2^(fieldBase*2).
 848  	m = (m >> fieldBase) +
 849  		uint64(val.n[0])*uint64(val2.n[2]) +
 850  		uint64(val.n[1])*uint64(val2.n[1]) +
 851  		uint64(val.n[2])*uint64(val2.n[0])
 852  	t2 := m & fieldBaseMask
 853  
 854  	// Terms for 2^(fieldBase*3).
 855  	m = (m >> fieldBase) +
 856  		uint64(val.n[0])*uint64(val2.n[3]) +
 857  		uint64(val.n[1])*uint64(val2.n[2]) +
 858  		uint64(val.n[2])*uint64(val2.n[1]) +
 859  		uint64(val.n[3])*uint64(val2.n[0])
 860  	t3 := m & fieldBaseMask
 861  
 862  	// Terms for 2^(fieldBase*4).
 863  	m = (m >> fieldBase) +
 864  		uint64(val.n[0])*uint64(val2.n[4]) +
 865  		uint64(val.n[1])*uint64(val2.n[3]) +
 866  		uint64(val.n[2])*uint64(val2.n[2]) +
 867  		uint64(val.n[3])*uint64(val2.n[1]) +
 868  		uint64(val.n[4])*uint64(val2.n[0])
 869  	t4 := m & fieldBaseMask
 870  
 871  	// Terms for 2^(fieldBase*5).
 872  	m = (m >> fieldBase) +
 873  		uint64(val.n[0])*uint64(val2.n[5]) +
 874  		uint64(val.n[1])*uint64(val2.n[4]) +
 875  		uint64(val.n[2])*uint64(val2.n[3]) +
 876  		uint64(val.n[3])*uint64(val2.n[2]) +
 877  		uint64(val.n[4])*uint64(val2.n[1]) +
 878  		uint64(val.n[5])*uint64(val2.n[0])
 879  	t5 := m & fieldBaseMask
 880  
 881  	// Terms for 2^(fieldBase*6).
 882  	m = (m >> fieldBase) +
 883  		uint64(val.n[0])*uint64(val2.n[6]) +
 884  		uint64(val.n[1])*uint64(val2.n[5]) +
 885  		uint64(val.n[2])*uint64(val2.n[4]) +
 886  		uint64(val.n[3])*uint64(val2.n[3]) +
 887  		uint64(val.n[4])*uint64(val2.n[2]) +
 888  		uint64(val.n[5])*uint64(val2.n[1]) +
 889  		uint64(val.n[6])*uint64(val2.n[0])
 890  	t6 := m & fieldBaseMask
 891  
 892  	// Terms for 2^(fieldBase*7).
 893  	m = (m >> fieldBase) +
 894  		uint64(val.n[0])*uint64(val2.n[7]) +
 895  		uint64(val.n[1])*uint64(val2.n[6]) +
 896  		uint64(val.n[2])*uint64(val2.n[5]) +
 897  		uint64(val.n[3])*uint64(val2.n[4]) +
 898  		uint64(val.n[4])*uint64(val2.n[3]) +
 899  		uint64(val.n[5])*uint64(val2.n[2]) +
 900  		uint64(val.n[6])*uint64(val2.n[1]) +
 901  		uint64(val.n[7])*uint64(val2.n[0])
 902  	t7 := m & fieldBaseMask
 903  
 904  	// Terms for 2^(fieldBase*8).
 905  	m = (m >> fieldBase) +
 906  		uint64(val.n[0])*uint64(val2.n[8]) +
 907  		uint64(val.n[1])*uint64(val2.n[7]) +
 908  		uint64(val.n[2])*uint64(val2.n[6]) +
 909  		uint64(val.n[3])*uint64(val2.n[5]) +
 910  		uint64(val.n[4])*uint64(val2.n[4]) +
 911  		uint64(val.n[5])*uint64(val2.n[3]) +
 912  		uint64(val.n[6])*uint64(val2.n[2]) +
 913  		uint64(val.n[7])*uint64(val2.n[1]) +
 914  		uint64(val.n[8])*uint64(val2.n[0])
 915  	t8 := m & fieldBaseMask
 916  
 917  	// Terms for 2^(fieldBase*9).
 918  	m = (m >> fieldBase) +
 919  		uint64(val.n[0])*uint64(val2.n[9]) +
 920  		uint64(val.n[1])*uint64(val2.n[8]) +
 921  		uint64(val.n[2])*uint64(val2.n[7]) +
 922  		uint64(val.n[3])*uint64(val2.n[6]) +
 923  		uint64(val.n[4])*uint64(val2.n[5]) +
 924  		uint64(val.n[5])*uint64(val2.n[4]) +
 925  		uint64(val.n[6])*uint64(val2.n[3]) +
 926  		uint64(val.n[7])*uint64(val2.n[2]) +
 927  		uint64(val.n[8])*uint64(val2.n[1]) +
 928  		uint64(val.n[9])*uint64(val2.n[0])
 929  	t9 := m & fieldBaseMask
 930  
 931  	// Terms for 2^(fieldBase*10).
 932  	m = (m >> fieldBase) +
 933  		uint64(val.n[1])*uint64(val2.n[9]) +
 934  		uint64(val.n[2])*uint64(val2.n[8]) +
 935  		uint64(val.n[3])*uint64(val2.n[7]) +
 936  		uint64(val.n[4])*uint64(val2.n[6]) +
 937  		uint64(val.n[5])*uint64(val2.n[5]) +
 938  		uint64(val.n[6])*uint64(val2.n[4]) +
 939  		uint64(val.n[7])*uint64(val2.n[3]) +
 940  		uint64(val.n[8])*uint64(val2.n[2]) +
 941  		uint64(val.n[9])*uint64(val2.n[1])
 942  	t10 := m & fieldBaseMask
 943  
 944  	// Terms for 2^(fieldBase*11).
 945  	m = (m >> fieldBase) +
 946  		uint64(val.n[2])*uint64(val2.n[9]) +
 947  		uint64(val.n[3])*uint64(val2.n[8]) +
 948  		uint64(val.n[4])*uint64(val2.n[7]) +
 949  		uint64(val.n[5])*uint64(val2.n[6]) +
 950  		uint64(val.n[6])*uint64(val2.n[5]) +
 951  		uint64(val.n[7])*uint64(val2.n[4]) +
 952  		uint64(val.n[8])*uint64(val2.n[3]) +
 953  		uint64(val.n[9])*uint64(val2.n[2])
 954  	t11 := m & fieldBaseMask
 955  
 956  	// Terms for 2^(fieldBase*12).
 957  	m = (m >> fieldBase) +
 958  		uint64(val.n[3])*uint64(val2.n[9]) +
 959  		uint64(val.n[4])*uint64(val2.n[8]) +
 960  		uint64(val.n[5])*uint64(val2.n[7]) +
 961  		uint64(val.n[6])*uint64(val2.n[6]) +
 962  		uint64(val.n[7])*uint64(val2.n[5]) +
 963  		uint64(val.n[8])*uint64(val2.n[4]) +
 964  		uint64(val.n[9])*uint64(val2.n[3])
 965  	t12 := m & fieldBaseMask
 966  
 967  	// Terms for 2^(fieldBase*13).
 968  	m = (m >> fieldBase) +
 969  		uint64(val.n[4])*uint64(val2.n[9]) +
 970  		uint64(val.n[5])*uint64(val2.n[8]) +
 971  		uint64(val.n[6])*uint64(val2.n[7]) +
 972  		uint64(val.n[7])*uint64(val2.n[6]) +
 973  		uint64(val.n[8])*uint64(val2.n[5]) +
 974  		uint64(val.n[9])*uint64(val2.n[4])
 975  	t13 := m & fieldBaseMask
 976  
 977  	// Terms for 2^(fieldBase*14).
 978  	m = (m >> fieldBase) +
 979  		uint64(val.n[5])*uint64(val2.n[9]) +
 980  		uint64(val.n[6])*uint64(val2.n[8]) +
 981  		uint64(val.n[7])*uint64(val2.n[7]) +
 982  		uint64(val.n[8])*uint64(val2.n[6]) +
 983  		uint64(val.n[9])*uint64(val2.n[5])
 984  	t14 := m & fieldBaseMask
 985  
 986  	// Terms for 2^(fieldBase*15).
 987  	m = (m >> fieldBase) +
 988  		uint64(val.n[6])*uint64(val2.n[9]) +
 989  		uint64(val.n[7])*uint64(val2.n[8]) +
 990  		uint64(val.n[8])*uint64(val2.n[7]) +
 991  		uint64(val.n[9])*uint64(val2.n[6])
 992  	t15 := m & fieldBaseMask
 993  
 994  	// Terms for 2^(fieldBase*16).
 995  	m = (m >> fieldBase) +
 996  		uint64(val.n[7])*uint64(val2.n[9]) +
 997  		uint64(val.n[8])*uint64(val2.n[8]) +
 998  		uint64(val.n[9])*uint64(val2.n[7])
 999  	t16 := m & fieldBaseMask
1000  
1001  	// Terms for 2^(fieldBase*17).
1002  	m = (m >> fieldBase) +
1003  		uint64(val.n[8])*uint64(val2.n[9]) +
1004  		uint64(val.n[9])*uint64(val2.n[8])
1005  	t17 := m & fieldBaseMask
1006  
1007  	// Terms for 2^(fieldBase*18).
1008  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
1009  	t18 := m & fieldBaseMask
1010  
1011  	// What's left is for 2^(fieldBase*19).
1012  	t19 := m >> fieldBase
1013  
1014  	// At this point, all of the terms are grouped into their respective
1015  	// base.
1016  	//
1017  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
1018  	// when the modulus is of the special form m = b^t - c, highly efficient
1019  	// reduction can be achieved per the provided algorithm.
1020  	//
1021  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
1022  	// this criteria.
1023  	//
1024  	// 4294968273 in field representation (base 2^26) is:
1025  	// n[0] = 977
1026  	// n[1] = 64
1027  	// That is to say (2^26 * 64) + 977 = 4294968273
1028  	//
1029  	// Since each word is in base 26, the upper terms (t10 and up) start
1030  	// at 260 bits (versus the final desired range of 256 bits), so the
1031  	// field representation of 'c' from above needs to be adjusted for the
1032  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
1033  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
1034  	// n[0] = 977 * 16 = 15632
1035  	// n[1] = 64 * 16 = 1024
1036  	// That is to say (2^26 * 1024) + 15632 = 68719492368
1037  	//
1038  	// To reduce the final term, t19, the entire 'c' value is needed instead
1039  	// of only n[0] because there are no more terms left to handle n[1].
1040  	// This means there might be some magnitude left in the upper bits that
1041  	// is handled below.
1042  	m = t0 + t10*15632
1043  	t0 = m & fieldBaseMask
1044  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
1045  	t1 = m & fieldBaseMask
1046  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
1047  	t2 = m & fieldBaseMask
1048  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
1049  	t3 = m & fieldBaseMask
1050  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
1051  	t4 = m & fieldBaseMask
1052  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
1053  	t5 = m & fieldBaseMask
1054  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
1055  	t6 = m & fieldBaseMask
1056  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
1057  	t7 = m & fieldBaseMask
1058  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
1059  	t8 = m & fieldBaseMask
1060  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
1061  	t9 = m & fieldMSBMask
1062  	m >>= fieldMSBBits
1063  
1064  	// At this point, if the magnitude is greater than 0, the overall value
1065  	// is greater than the max possible 256-bit value.  In particular, it is
1066  	// "how many times larger" than the max value it is.
1067  	//
1068  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
1069  	// quotient is zero.  However, due to the above, we already know at
1070  	// least how many times we would need to repeat as it's the value
1071  	// currently in m.  Thus we can simply multiply the magnitude by the
1072  	// field representation of the prime and do a single iteration.  Notice
1073  	// that nothing will be changed when the magnitude is zero, so we could
1074  	// skip this in that case, however always running regardless allows it
1075  	// to run in constant time.  The final result will be in the range
1076  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
1077  	// magnitude of 1, but it is denormalized.
1078  	d := t0 + m*977
1079  	f.n[0] = uint32(d & fieldBaseMask)
1080  	d = (d >> fieldBase) + t1 + m*64
1081  	f.n[1] = uint32(d & fieldBaseMask)
1082  	f.n[2] = uint32((d >> fieldBase) + t2)
1083  	f.n[3] = uint32(t3)
1084  	f.n[4] = uint32(t4)
1085  	f.n[5] = uint32(t5)
1086  	f.n[6] = uint32(t6)
1087  	f.n[7] = uint32(t7)
1088  	f.n[8] = uint32(t8)
1089  	f.n[9] = uint32(t9)
1090  
1091  	return f
1092  }
1093  
1094  // SquareRootVal either calculates the square root of the passed value when it
1095  // exists or the square root of the negation of the value when it does not exist
1096  // and stores the result in f in constant time.  The return flag is true when
1097  // the calculated square root is for the passed value itself and false when it
1098  // is for its negation.
1099  //
1100  // Note that this function can overflow if multiplying any of the individual
1101  // words exceeds a max uint32.  In practice, this means the magnitude of the
1102  // field must be a max of 8 to prevent overflow.  The magnitude of the result
1103  // will be 1.
1104  //
1105  //	Preconditions:
1106  //	  - The input field value MUST have a max magnitude of 8
1107  //	Output Normalized: No
1108  //	Output Max Magnitude: 1
1109  func (f *FieldVal) SquareRootVal(val *FieldVal) bool {
1110  	// This uses the Tonelli-Shanks method for calculating the square root of
1111  	// the value when it exists.  The key principles of the method follow.
1112  	//
1113  	// Fermat's little theorem states that for a nonzero number 'a' and prime
1114  	// 'p', a^(p-1) ≡ 1 (mod p).
1115  	//
1116  	// Further, Euler's criterion states that an integer 'a' has a square root
1117  	// (aka is a quadratic residue) modulo a prime if a^((p-1)/2) ≡ 1 (mod p)
1118  	// and, conversely, when it does NOT have a square root (aka 'a' is a
1119  	// non-residue) a^((p-1)/2) ≡ -1 (mod p).
1120  	//
1121  	// This can be seen by considering that Fermat's little theorem can be
1122  	// written as (a^((p-1)/2) - 1)(a^((p-1)/2) + 1) ≡ 0 (mod p).  Therefore,
1123  	// one of the two factors must be 0.  Then, when a ≡ x^2 (aka 'a' is a
1124  	// quadratic residue), (x^2)^((p-1)/2) ≡ x^(p-1) ≡ 1 (mod p) which implies
1125  	// the first factor must be zero.  Finally, per Lagrange's theorem, the
1126  	// non-residues are the only remaining possible solutions and thus must make
1127  	// the second factor zero to satisfy Fermat's little theorem implying that
1128  	// a^((p-1)/2) ≡ -1 (mod p) for that case.
1129  	//
1130  	// The Tonelli-Shanks method uses these facts along with factoring out
1131  	// powers of two to solve a congruence that results in either the solution
1132  	// when the square root exists or the square root of the negation of the
1133  	// value when it does not.  In the case of primes that are ≡ 3 (mod 4), the
1134  	// possible solutions are r = ±a^((p+1)/4) (mod p).  Therefore, either r^2 ≡
1135  	// a (mod p) is true in which case ±r are the two solutions, or r^2 ≡ -a
1136  	// (mod p) in which case 'a' is a non-residue and there are no solutions.
1137  	//
1138  	// The secp256k1 prime is ≡ 3 (mod 4), so this result applies.
1139  	//
1140  	// In other words, calculate a^((p+1)/4) and then square it and check it
1141  	// against the original value to determine if it is actually the square
1142  	// root.
1143  	//
1144  	// In order to efficiently compute a^((p+1)/4), (p+1)/4 needs to be split
1145  	// into a sequence of squares and multiplications that minimizes the number
1146  	// of multiplications needed (since they are more costly than squarings).
1147  	//
1148  	// The secp256k1 prime + 1 / 4 is 2^254 - 2^30 - 244.  In binary, that is:
1149  	//
1150  	// 00111111 11111111 11111111 11111111
1151  	// 11111111 11111111 11111111 11111111
1152  	// 11111111 11111111 11111111 11111111
1153  	// 11111111 11111111 11111111 11111111
1154  	// 11111111 11111111 11111111 11111111
1155  	// 11111111 11111111 11111111 11111111
1156  	// 11111111 11111111 11111111 11111111
1157  	// 10111111 11111111 11111111 00001100
1158  	//
1159  	// Notice that can be broken up into three windows of consecutive 1s (in
1160  	// order of least to most significant) as:
1161  	//
1162  	//   6-bit window with two bits set (bits 4, 5, 6, 7 unset)
1163  	//   23-bit window with 22 bits set (bit 30 unset)
1164  	//   223-bit window with all 223 bits set
1165  	//
1166  	// Thus, the groups of 1 bits in each window forms the set:
1167  	// S = {2, 22, 223}.
1168  	//
1169  	// The strategy is to calculate a^(2^n - 1) for each grouping via an
1170  	// addition chain with a sliding window.
1171  	//
1172  	// The addition chain used is (credits to Peter Dettman):
1173  	// (0,0),(1,0),(2,2),(3,2),(4,1),(5,5),(6,6),(7,7),(8,8),(9,7),(10,2)
1174  	// => 2^1 2^[2] 2^3 2^6 2^9 2^11 2^[22] 2^44 2^88 2^176 2^220 2^[223]
1175  	//
1176  	// This has a cost of 254 field squarings and 13 field multiplications.
1177  	var a, a2, a3, a6, a9, a11, a22, a44, a88, a176, a220, a223 FieldVal
1178  	a.Set(val)
1179  	a2.SquareVal(&a).Mul(&a)                                  // a2 = a^(2^2 - 1)
1180  	a3.SquareVal(&a2).Mul(&a)                                 // a3 = a^(2^3 - 1)
1181  	a6.SquareVal(&a3).Square().Square()                       // a6 = a^(2^6 - 2^3)
1182  	a6.Mul(&a3)                                               // a6 = a^(2^6 - 1)
1183  	a9.SquareVal(&a6).Square().Square()                       // a9 = a^(2^9 - 2^3)
1184  	a9.Mul(&a3)                                               // a9 = a^(2^9 - 1)
1185  	a11.SquareVal(&a9).Square()                               // a11 = a^(2^11 - 2^2)
1186  	a11.Mul(&a2)                                              // a11 = a^(2^11 - 1)
1187  	a22.SquareVal(&a11).Square().Square().Square().Square()   // a22 = a^(2^16 - 2^5)
1188  	a22.Square().Square().Square().Square().Square()          // a22 = a^(2^21 - 2^10)
1189  	a22.Square()                                              // a22 = a^(2^22 - 2^11)
1190  	a22.Mul(&a11)                                             // a22 = a^(2^22 - 1)
1191  	a44.SquareVal(&a22).Square().Square().Square().Square()   // a44 = a^(2^27 - 2^5)
1192  	a44.Square().Square().Square().Square().Square()          // a44 = a^(2^32 - 2^10)
1193  	a44.Square().Square().Square().Square().Square()          // a44 = a^(2^37 - 2^15)
1194  	a44.Square().Square().Square().Square().Square()          // a44 = a^(2^42 - 2^20)
1195  	a44.Square().Square()                                     // a44 = a^(2^44 - 2^22)
1196  	a44.Mul(&a22)                                             // a44 = a^(2^44 - 1)
1197  	a88.SquareVal(&a44).Square().Square().Square().Square()   // a88 = a^(2^49 - 2^5)
1198  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^54 - 2^10)
1199  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^59 - 2^15)
1200  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^64 - 2^20)
1201  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^69 - 2^25)
1202  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^74 - 2^30)
1203  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^79 - 2^35)
1204  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^84 - 2^40)
1205  	a88.Square().Square().Square().Square()                   // a88 = a^(2^88 - 2^44)
1206  	a88.Mul(&a44)                                             // a88 = a^(2^88 - 1)
1207  	a176.SquareVal(&a88).Square().Square().Square().Square()  // a176 = a^(2^93 - 2^5)
1208  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^98 - 2^10)
1209  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^103 - 2^15)
1210  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^108 - 2^20)
1211  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^113 - 2^25)
1212  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^118 - 2^30)
1213  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^123 - 2^35)
1214  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^128 - 2^40)
1215  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^133 - 2^45)
1216  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^138 - 2^50)
1217  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^143 - 2^55)
1218  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^148 - 2^60)
1219  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^153 - 2^65)
1220  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^158 - 2^70)
1221  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^163 - 2^75)
1222  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^168 - 2^80)
1223  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^173 - 2^85)
1224  	a176.Square().Square().Square()                           // a176 = a^(2^176 - 2^88)
1225  	a176.Mul(&a88)                                            // a176 = a^(2^176 - 1)
1226  	a220.SquareVal(&a176).Square().Square().Square().Square() // a220 = a^(2^181 - 2^5)
1227  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^186 - 2^10)
1228  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^191 - 2^15)
1229  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^196 - 2^20)
1230  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^201 - 2^25)
1231  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^206 - 2^30)
1232  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^211 - 2^35)
1233  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^216 - 2^40)
1234  	a220.Square().Square().Square().Square()                  // a220 = a^(2^220 - 2^44)
1235  	a220.Mul(&a44)                                            // a220 = a^(2^220 - 1)
1236  	a223.SquareVal(&a220).Square().Square()                   // a223 = a^(2^223 - 2^3)
1237  	a223.Mul(&a3)                                             // a223 = a^(2^223 - 1)
1238  
1239  	f.SquareVal(&a223).Square().Square().Square().Square() // f = a^(2^228 - 2^5)
1240  	f.Square().Square().Square().Square().Square()         // f = a^(2^233 - 2^10)
1241  	f.Square().Square().Square().Square().Square()         // f = a^(2^238 - 2^15)
1242  	f.Square().Square().Square().Square().Square()         // f = a^(2^243 - 2^20)
1243  	f.Square().Square().Square()                           // f = a^(2^246 - 2^23)
1244  	f.Mul(&a22)                                            // f = a^(2^246 - 2^22 - 1)
1245  	f.Square().Square().Square().Square().Square()         // f = a^(2^251 - 2^27 - 2^5)
1246  	f.Square()                                             // f = a^(2^252 - 2^28 - 2^6)
1247  	f.Mul(&a2)                                             // f = a^(2^252 - 2^28 - 2^6 - 2^1 - 1)
1248  	f.Square().Square()                                    // f = a^(2^254 - 2^30 - 2^8 - 2^3 - 2^2)
1249  	//                                                     //   = a^(2^254 - 2^30 - 244)
1250  	//                                                     //   = a^((p+1)/4)
1251  
1252  	// Ensure the calculated result is actually the square root by squaring it
1253  	// and checking against the original value.
1254  	var sqr FieldVal
1255  	return sqr.SquareVal(f).Normalize().Equals(val.Normalize())
1256  }
1257  
1258  // Square squares the field value in constant time.  The existing field value is
1259  // modified.  Note that this function can overflow if multiplying any of the
1260  // individual words exceeds a max uint32.  In practice, this means the magnitude
1261  // of the field must be a max of 8 to prevent overflow.
1262  //
1263  // The field value is returned to support chaining.  This enables syntax like:
1264  // f.Square().Mul(f2) so that f = f^2 * f2.
1265  //
1266  //	Preconditions:
1267  //	  - The field value MUST have a max magnitude of 8
1268  //	Output Normalized: No
1269  //	Output Max Magnitude: 1
1270  func (f *FieldVal) Square() *FieldVal {
1271  	return f.SquareVal(f)
1272  }
1273  
1274  // SquareVal squares the passed value and stores the result in f in constant
1275  // time.  Note that this function can overflow if multiplying any of the
1276  // individual words exceeds a max uint32.  In practice, this means the magnitude
1277  // of the field being squared must be a max of 8 to prevent overflow.
1278  //
1279  // The field value is returned to support chaining.  This enables syntax like:
1280  // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
1281  //
1282  //	Preconditions:
1283  //	  - The input field value MUST have a max magnitude of 8
1284  //	Output Normalized: No
1285  //	Output Max Magnitude: 1
1286  func (f *FieldVal) SquareVal(val *FieldVal) *FieldVal {
1287  	// This could be done with a couple of for loops and an array to store
1288  	// the intermediate terms, but this unrolled version is significantly
1289  	// faster.
1290  
1291  	// Terms for 2^(fieldBase*0).
1292  	m := uint64(val.n[0]) * uint64(val.n[0])
1293  	t0 := m & fieldBaseMask
1294  
1295  	// Terms for 2^(fieldBase*1).
1296  	m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
1297  	t1 := m & fieldBaseMask
1298  
1299  	// Terms for 2^(fieldBase*2).
1300  	m = (m >> fieldBase) +
1301  		2*uint64(val.n[0])*uint64(val.n[2]) +
1302  		uint64(val.n[1])*uint64(val.n[1])
1303  	t2 := m & fieldBaseMask
1304  
1305  	// Terms for 2^(fieldBase*3).
1306  	m = (m >> fieldBase) +
1307  		2*uint64(val.n[0])*uint64(val.n[3]) +
1308  		2*uint64(val.n[1])*uint64(val.n[2])
1309  	t3 := m & fieldBaseMask
1310  
1311  	// Terms for 2^(fieldBase*4).
1312  	m = (m >> fieldBase) +
1313  		2*uint64(val.n[0])*uint64(val.n[4]) +
1314  		2*uint64(val.n[1])*uint64(val.n[3]) +
1315  		uint64(val.n[2])*uint64(val.n[2])
1316  	t4 := m & fieldBaseMask
1317  
1318  	// Terms for 2^(fieldBase*5).
1319  	m = (m >> fieldBase) +
1320  		2*uint64(val.n[0])*uint64(val.n[5]) +
1321  		2*uint64(val.n[1])*uint64(val.n[4]) +
1322  		2*uint64(val.n[2])*uint64(val.n[3])
1323  	t5 := m & fieldBaseMask
1324  
1325  	// Terms for 2^(fieldBase*6).
1326  	m = (m >> fieldBase) +
1327  		2*uint64(val.n[0])*uint64(val.n[6]) +
1328  		2*uint64(val.n[1])*uint64(val.n[5]) +
1329  		2*uint64(val.n[2])*uint64(val.n[4]) +
1330  		uint64(val.n[3])*uint64(val.n[3])
1331  	t6 := m & fieldBaseMask
1332  
1333  	// Terms for 2^(fieldBase*7).
1334  	m = (m >> fieldBase) +
1335  		2*uint64(val.n[0])*uint64(val.n[7]) +
1336  		2*uint64(val.n[1])*uint64(val.n[6]) +
1337  		2*uint64(val.n[2])*uint64(val.n[5]) +
1338  		2*uint64(val.n[3])*uint64(val.n[4])
1339  	t7 := m & fieldBaseMask
1340  
1341  	// Terms for 2^(fieldBase*8).
1342  	m = (m >> fieldBase) +
1343  		2*uint64(val.n[0])*uint64(val.n[8]) +
1344  		2*uint64(val.n[1])*uint64(val.n[7]) +
1345  		2*uint64(val.n[2])*uint64(val.n[6]) +
1346  		2*uint64(val.n[3])*uint64(val.n[5]) +
1347  		uint64(val.n[4])*uint64(val.n[4])
1348  	t8 := m & fieldBaseMask
1349  
1350  	// Terms for 2^(fieldBase*9).
1351  	m = (m >> fieldBase) +
1352  		2*uint64(val.n[0])*uint64(val.n[9]) +
1353  		2*uint64(val.n[1])*uint64(val.n[8]) +
1354  		2*uint64(val.n[2])*uint64(val.n[7]) +
1355  		2*uint64(val.n[3])*uint64(val.n[6]) +
1356  		2*uint64(val.n[4])*uint64(val.n[5])
1357  	t9 := m & fieldBaseMask
1358  
1359  	// Terms for 2^(fieldBase*10).
1360  	m = (m >> fieldBase) +
1361  		2*uint64(val.n[1])*uint64(val.n[9]) +
1362  		2*uint64(val.n[2])*uint64(val.n[8]) +
1363  		2*uint64(val.n[3])*uint64(val.n[7]) +
1364  		2*uint64(val.n[4])*uint64(val.n[6]) +
1365  		uint64(val.n[5])*uint64(val.n[5])
1366  	t10 := m & fieldBaseMask
1367  
1368  	// Terms for 2^(fieldBase*11).
1369  	m = (m >> fieldBase) +
1370  		2*uint64(val.n[2])*uint64(val.n[9]) +
1371  		2*uint64(val.n[3])*uint64(val.n[8]) +
1372  		2*uint64(val.n[4])*uint64(val.n[7]) +
1373  		2*uint64(val.n[5])*uint64(val.n[6])
1374  	t11 := m & fieldBaseMask
1375  
1376  	// Terms for 2^(fieldBase*12).
1377  	m = (m >> fieldBase) +
1378  		2*uint64(val.n[3])*uint64(val.n[9]) +
1379  		2*uint64(val.n[4])*uint64(val.n[8]) +
1380  		2*uint64(val.n[5])*uint64(val.n[7]) +
1381  		uint64(val.n[6])*uint64(val.n[6])
1382  	t12 := m & fieldBaseMask
1383  
1384  	// Terms for 2^(fieldBase*13).
1385  	m = (m >> fieldBase) +
1386  		2*uint64(val.n[4])*uint64(val.n[9]) +
1387  		2*uint64(val.n[5])*uint64(val.n[8]) +
1388  		2*uint64(val.n[6])*uint64(val.n[7])
1389  	t13 := m & fieldBaseMask
1390  
1391  	// Terms for 2^(fieldBase*14).
1392  	m = (m >> fieldBase) +
1393  		2*uint64(val.n[5])*uint64(val.n[9]) +
1394  		2*uint64(val.n[6])*uint64(val.n[8]) +
1395  		uint64(val.n[7])*uint64(val.n[7])
1396  	t14 := m & fieldBaseMask
1397  
1398  	// Terms for 2^(fieldBase*15).
1399  	m = (m >> fieldBase) +
1400  		2*uint64(val.n[6])*uint64(val.n[9]) +
1401  		2*uint64(val.n[7])*uint64(val.n[8])
1402  	t15 := m & fieldBaseMask
1403  
1404  	// Terms for 2^(fieldBase*16).
1405  	m = (m >> fieldBase) +
1406  		2*uint64(val.n[7])*uint64(val.n[9]) +
1407  		uint64(val.n[8])*uint64(val.n[8])
1408  	t16 := m & fieldBaseMask
1409  
1410  	// Terms for 2^(fieldBase*17).
1411  	m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
1412  	t17 := m & fieldBaseMask
1413  
1414  	// Terms for 2^(fieldBase*18).
1415  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
1416  	t18 := m & fieldBaseMask
1417  
1418  	// What's left is for 2^(fieldBase*19).
1419  	t19 := m >> fieldBase
1420  
1421  	// At this point, all of the terms are grouped into their respective
1422  	// base.
1423  	//
1424  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
1425  	// when the modulus is of the special form m = b^t - c, highly efficient
1426  	// reduction can be achieved per the provided algorithm.
1427  	//
1428  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
1429  	// this criteria.
1430  	//
1431  	// 4294968273 in field representation (base 2^26) is:
1432  	// n[0] = 977
1433  	// n[1] = 64
1434  	// That is to say (2^26 * 64) + 977 = 4294968273
1435  	//
1436  	// Since each word is in base 26, the upper terms (t10 and up) start
1437  	// at 260 bits (versus the final desired range of 256 bits), so the
1438  	// field representation of 'c' from above needs to be adjusted for the
1439  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
1440  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
1441  	// n[0] = 977 * 16 = 15632
1442  	// n[1] = 64 * 16 = 1024
1443  	// That is to say (2^26 * 1024) + 15632 = 68719492368
1444  	//
1445  	// To reduce the final term, t19, the entire 'c' value is needed instead
1446  	// of only n[0] because there are no more terms left to handle n[1].
1447  	// This means there might be some magnitude left in the upper bits that
1448  	// is handled below.
1449  	m = t0 + t10*15632
1450  	t0 = m & fieldBaseMask
1451  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
1452  	t1 = m & fieldBaseMask
1453  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
1454  	t2 = m & fieldBaseMask
1455  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
1456  	t3 = m & fieldBaseMask
1457  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
1458  	t4 = m & fieldBaseMask
1459  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
1460  	t5 = m & fieldBaseMask
1461  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
1462  	t6 = m & fieldBaseMask
1463  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
1464  	t7 = m & fieldBaseMask
1465  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
1466  	t8 = m & fieldBaseMask
1467  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
1468  	t9 = m & fieldMSBMask
1469  	m >>= fieldMSBBits
1470  
1471  	// At this point, if the magnitude is greater than 0, the overall value
1472  	// is greater than the max possible 256-bit value.  In particular, it is
1473  	// "how many times larger" than the max value it is.
1474  	//
1475  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
1476  	// quotient is zero.  However, due to the above, we already know at
1477  	// least how many times we would need to repeat as it's the value
1478  	// currently in m.  Thus we can simply multiply the magnitude by the
1479  	// field representation of the prime and do a single iteration.  Notice
1480  	// that nothing will be changed when the magnitude is zero, so we could
1481  	// skip this in that case, however always running regardless allows it
1482  	// to run in constant time.  The final result will be in the range
1483  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
1484  	// magnitude of 1, but it is denormalized.
1485  	n := t0 + m*977
1486  	f.n[0] = uint32(n & fieldBaseMask)
1487  	n = (n >> fieldBase) + t1 + m*64
1488  	f.n[1] = uint32(n & fieldBaseMask)
1489  	f.n[2] = uint32((n >> fieldBase) + t2)
1490  	f.n[3] = uint32(t3)
1491  	f.n[4] = uint32(t4)
1492  	f.n[5] = uint32(t5)
1493  	f.n[6] = uint32(t6)
1494  	f.n[7] = uint32(t7)
1495  	f.n[8] = uint32(t8)
1496  	f.n[9] = uint32(t9)
1497  
1498  	return f
1499  }
1500  
1501  // Inverse finds the modular multiplicative inverse of the field value in
1502  // constant time.  The existing field value is modified.
1503  //
1504  // The field value is returned to support chaining.  This enables syntax like:
1505  // f.Inverse().Mul(f2) so that f = f^-1 * f2.
1506  //
1507  //	Preconditions:
1508  //	  - The field value MUST have a max magnitude of 8
1509  //	Output Normalized: No
1510  //	Output Max Magnitude: 1
1511  func (f *FieldVal) Inverse() *FieldVal {
1512  	// Fermat's little theorem states that for a nonzero number 'a' and prime
1513  	// 'p', a^(p-1) ≡ 1 (mod p).  Multiplying both sides of the equation by the
1514  	// multiplicative inverse a^-1 yields a^(p-2) ≡ a^-1 (mod p).  Thus, a^(p-2)
1515  	// is the multiplicative inverse.
1516  	//
1517  	// In order to efficiently compute a^(p-2), p-2 needs to be split into a
1518  	// sequence of squares and multiplications that minimizes the number of
1519  	// multiplications needed (since they are more costly than squarings).
1520  	// Intermediate results are saved and reused as well.
1521  	//
1522  	// The secp256k1 prime - 2 is 2^256 - 4294968275.  In binary, that is:
1523  	//
1524  	// 11111111 11111111 11111111 11111111
1525  	// 11111111 11111111 11111111 11111111
1526  	// 11111111 11111111 11111111 11111111
1527  	// 11111111 11111111 11111111 11111111
1528  	// 11111111 11111111 11111111 11111111
1529  	// 11111111 11111111 11111111 11111111
1530  	// 11111111 11111111 11111111 11111110
1531  	// 11111111 11111111 11111100 00101101
1532  	//
1533  	// Notice that can be broken up into five windows of consecutive 1s (in
1534  	// order of least to most significant) as:
1535  	//
1536  	//   2-bit window with 1 bit set (bit 1 unset)
1537  	//   3-bit window with 2 bits set (bit 4 unset)
1538  	//   5-bit window with 1 bit set (bits 6, 7, 8, 9 unset)
1539  	//   23-bit window with 22 bits set (bit 32 unset)
1540  	//   223-bit window with all 223 bits set
1541  	//
1542  	// Thus, the groups of 1 bits in each window forms the set:
1543  	// S = {1, 2, 22, 223}.
1544  	//
1545  	// The strategy is to calculate a^(2^n - 1) for each grouping via an
1546  	// addition chain with a sliding window.
1547  	//
1548  	// The addition chain used is (credits to Peter Dettman):
1549  	// (0,0),(1,0),(2,2),(3,2),(4,1),(5,5),(6,6),(7,7),(8,8),(9,7),(10,2)
1550  	// => 2^[1] 2^[2] 2^3 2^6 2^9 2^11 2^[22] 2^44 2^88 2^176 2^220 2^[223]
1551  	//
1552  	// This has a cost of 255 field squarings and 15 field multiplications.
1553  	var a, a2, a3, a6, a9, a11, a22, a44, a88, a176, a220, a223 FieldVal
1554  	a.Set(f)
1555  	a2.SquareVal(&a).Mul(&a)                                  // a2  = a^(2^2 - 1)
1556  	a3.SquareVal(&a2).Mul(&a)                                 // a3  = a^(2^3 - 1)
1557  	a6.SquareVal(&a3).Square().Square()                       // a6 = a^(2^6 - 2^3)
1558  	a6.Mul(&a3)                                               // a6 = a^(2^6 - 1)
1559  	a9.SquareVal(&a6).Square().Square()                       // a9 = a^(2^9 - 2^3)
1560  	a9.Mul(&a3)                                               // a9 = a^(2^9 - 1)
1561  	a11.SquareVal(&a9).Square()                               // a11 = a^(2^11 - 2^2)
1562  	a11.Mul(&a2)                                              // a11 = a^(2^11 - 1)
1563  	a22.SquareVal(&a11).Square().Square().Square().Square()   // a22 = a^(2^16 - 2^5)
1564  	a22.Square().Square().Square().Square().Square()          // a22 = a^(2^21 - 2^10)
1565  	a22.Square()                                              // a22 = a^(2^22 - 2^11)
1566  	a22.Mul(&a11)                                             // a22 = a^(2^22 - 1)
1567  	a44.SquareVal(&a22).Square().Square().Square().Square()   // a44 = a^(2^27 - 2^5)
1568  	a44.Square().Square().Square().Square().Square()          // a44 = a^(2^32 - 2^10)
1569  	a44.Square().Square().Square().Square().Square()          // a44 = a^(2^37 - 2^15)
1570  	a44.Square().Square().Square().Square().Square()          // a44 = a^(2^42 - 2^20)
1571  	a44.Square().Square()                                     // a44 = a^(2^44 - 2^22)
1572  	a44.Mul(&a22)                                             // a44 = a^(2^44 - 1)
1573  	a88.SquareVal(&a44).Square().Square().Square().Square()   // a88 = a^(2^49 - 2^5)
1574  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^54 - 2^10)
1575  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^59 - 2^15)
1576  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^64 - 2^20)
1577  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^69 - 2^25)
1578  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^74 - 2^30)
1579  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^79 - 2^35)
1580  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^84 - 2^40)
1581  	a88.Square().Square().Square().Square()                   // a88 = a^(2^88 - 2^44)
1582  	a88.Mul(&a44)                                             // a88 = a^(2^88 - 1)
1583  	a176.SquareVal(&a88).Square().Square().Square().Square()  // a176 = a^(2^93 - 2^5)
1584  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^98 - 2^10)
1585  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^103 - 2^15)
1586  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^108 - 2^20)
1587  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^113 - 2^25)
1588  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^118 - 2^30)
1589  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^123 - 2^35)
1590  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^128 - 2^40)
1591  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^133 - 2^45)
1592  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^138 - 2^50)
1593  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^143 - 2^55)
1594  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^148 - 2^60)
1595  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^153 - 2^65)
1596  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^158 - 2^70)
1597  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^163 - 2^75)
1598  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^168 - 2^80)
1599  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^173 - 2^85)
1600  	a176.Square().Square().Square()                           // a176 = a^(2^176 - 2^88)
1601  	a176.Mul(&a88)                                            // a176 = a^(2^176 - 1)
1602  	a220.SquareVal(&a176).Square().Square().Square().Square() // a220 = a^(2^181 - 2^5)
1603  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^186 - 2^10)
1604  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^191 - 2^15)
1605  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^196 - 2^20)
1606  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^201 - 2^25)
1607  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^206 - 2^30)
1608  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^211 - 2^35)
1609  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^216 - 2^40)
1610  	a220.Square().Square().Square().Square()                  // a220 = a^(2^220 - 2^44)
1611  	a220.Mul(&a44)                                            // a220 = a^(2^220 - 1)
1612  	a223.SquareVal(&a220).Square().Square()                   // a223 = a^(2^223 - 2^3)
1613  	a223.Mul(&a3)                                             // a223 = a^(2^223 - 1)
1614  
1615  	f.SquareVal(&a223).Square().Square().Square().Square() // f = a^(2^228 - 2^5)
1616  	f.Square().Square().Square().Square().Square()         // f = a^(2^233 - 2^10)
1617  	f.Square().Square().Square().Square().Square()         // f = a^(2^238 - 2^15)
1618  	f.Square().Square().Square().Square().Square()         // f = a^(2^243 - 2^20)
1619  	f.Square().Square().Square()                           // f = a^(2^246 - 2^23)
1620  	f.Mul(&a22)                                            // f = a^(2^246 - 4194305)
1621  	f.Square().Square().Square().Square().Square()         // f = a^(2^251 - 134217760)
1622  	f.Mul(&a)                                              // f = a^(2^251 - 134217759)
1623  	f.Square().Square().Square()                           // f = a^(2^254 - 1073742072)
1624  	f.Mul(&a2)                                             // f = a^(2^254 - 1073742069)
1625  	f.Square().Square()                                    // f = a^(2^256 - 4294968276)
1626  	return f.Mul(&a)                                       // f = a^(2^256 - 4294968275) = a^(p-2)
1627  }
1628  
1629  // IsGtOrEqPrimeMinusOrder returns whether or not the field value is greater
1630  // than or equal to the field prime minus the secp256k1 group order in constant
1631  // time.
1632  //
1633  //	Preconditions:
1634  //	  - The field value MUST be normalized
1635  func (f *FieldVal) IsGtOrEqPrimeMinusOrder() bool {
1636  	// The secp256k1 prime is equivalent to 2^256 - 4294968273 and the group
1637  	// order is 2^256 - 432420386565659656852420866394968145599.  Thus,
1638  	// the prime minus the group order is:
1639  	// 432420386565659656852420866390673177326
1640  	//
1641  	// In hex that is:
1642  	// 0x00000000 00000000 00000000 00000001 45512319 50b75fc4 402da172 2fc9baee
1643  	//
1644  	// Converting that to field representation (base 2^26) is:
1645  	//
1646  	// n[0] = 0x03c9baee
1647  	// n[1] = 0x03685c8b
1648  	// n[2] = 0x01fc4402
1649  	// n[3] = 0x006542dd
1650  	// n[4] = 0x01455123
1651  	//
1652  	// This can be verified with the following test code:
1653  	//   pMinusN := new(big.Int).Sub(curveParams.P, curveParams.N)
1654  	//   var fv FieldVal
1655  	//   fv.SetByteSlice(pMinusN.Bytes())
1656  	//   t.Logf("%x", fv.n)
1657  	//
1658  	//   Outputs: [3c9baee 3685c8b 1fc4402 6542dd 1455123 0 0 0 0 0]
1659  	const (
1660  		pMinusNWordZero  = 0x03c9baee
1661  		pMinusNWordOne   = 0x03685c8b
1662  		pMinusNWordTwo   = 0x01fc4402
1663  		pMinusNWordThree = 0x006542dd
1664  		pMinusNWordFour  = 0x01455123
1665  		pMinusNWordFive  = 0x00000000
1666  		pMinusNWordSix   = 0x00000000
1667  		pMinusNWordSeven = 0x00000000
1668  		pMinusNWordEight = 0x00000000
1669  		pMinusNWordNine  = 0x00000000
1670  	)
1671  
1672  	// The intuition here is that the value is greater than field prime minus
1673  	// the group order if one of the higher individual words is greater than the
1674  	// corresponding word and all higher words in the value are equal.
1675  	result := constantTimeGreater(f.n[9], pMinusNWordNine)
1676  	highWordsEqual := constantTimeEq(f.n[9], pMinusNWordNine)
1677  	result |= highWordsEqual & constantTimeGreater(f.n[8], pMinusNWordEight)
1678  	highWordsEqual &= constantTimeEq(f.n[8], pMinusNWordEight)
1679  	result |= highWordsEqual & constantTimeGreater(f.n[7], pMinusNWordSeven)
1680  	highWordsEqual &= constantTimeEq(f.n[7], pMinusNWordSeven)
1681  	result |= highWordsEqual & constantTimeGreater(f.n[6], pMinusNWordSix)
1682  	highWordsEqual &= constantTimeEq(f.n[6], pMinusNWordSix)
1683  	result |= highWordsEqual & constantTimeGreater(f.n[5], pMinusNWordFive)
1684  	highWordsEqual &= constantTimeEq(f.n[5], pMinusNWordFive)
1685  	result |= highWordsEqual & constantTimeGreater(f.n[4], pMinusNWordFour)
1686  	highWordsEqual &= constantTimeEq(f.n[4], pMinusNWordFour)
1687  	result |= highWordsEqual & constantTimeGreater(f.n[3], pMinusNWordThree)
1688  	highWordsEqual &= constantTimeEq(f.n[3], pMinusNWordThree)
1689  	result |= highWordsEqual & constantTimeGreater(f.n[2], pMinusNWordTwo)
1690  	highWordsEqual &= constantTimeEq(f.n[2], pMinusNWordTwo)
1691  	result |= highWordsEqual & constantTimeGreater(f.n[1], pMinusNWordOne)
1692  	highWordsEqual &= constantTimeEq(f.n[1], pMinusNWordOne)
1693  	result |= highWordsEqual & constantTimeGreaterOrEq(f.n[0], pMinusNWordZero)
1694  
1695  	return result != 0
1696  }
1697