1 // Copyright (c) 2013-2014 The btcsuite developers
2 // Copyright (c) 2015-2023 The Decred developers
3 // Copyright (c) 2013-2023 Dave Collins
4 // Use of this source code is governed by an ISC
5 // license that can be found in the LICENSE file.
6 7 package secp256k1
8 9 // References:
10 // [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
11 // http://cacr.uwaterloo.ca/hac/
12 13 // All elliptic curve operations for secp256k1 are done in a finite field
14 // characterized by a 256-bit prime. Given this precision is larger than the
15 // biggest available native type, obviously some form of bignum math is needed.
16 // This package implements specialized fixed-precision field arithmetic rather
17 // than relying on an arbitrary-precision arithmetic package such as math/big
18 // for dealing with the field math since the size is known. As a result, rather
19 // large performance gains are achieved by taking advantage of many
20 // optimizations not available to arbitrary-precision arithmetic and generic
21 // modular arithmetic algorithms.
22 //
23 // There are various ways to internally represent each finite field element.
24 // For example, the most obvious representation would be to use an array of 4
25 // uint64s (64 bits * 4 = 256 bits). However, that representation suffers from
26 // a couple of issues. First, there is no native Go type large enough to handle
27 // the intermediate results while adding or multiplying two 64-bit numbers, and
28 // second there is no space left for overflows when performing the intermediate
29 // arithmetic between each array element which would lead to expensive carry
30 // propagation.
31 //
32 // Given the above, this implementation represents the field elements as
33 // 10 uint32s with each word (array entry) treated as base 2^26. This was
34 // chosen for the following reasons:
35 // 1) Most systems at the current time are 64-bit (or at least have 64-bit
36 // registers available for specialized purposes such as MMX) so the
37 // intermediate results can typically be done using a native register (and
38 // using uint64s to avoid the need for additional half-word arithmetic)
39 // 2) In order to allow addition of the internal words without having to
40 // propagate the carry, the max normalized value for each register must
41 // be less than the number of bits available in the register
42 // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
43 // reasonable choice for #2
44 // 4) Given the need for 256-bits of precision and the properties stated in #1,
45 // #2, and #3, the representation which best accommodates this is 10 uint32s
46 // with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
47 // bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
48 // overflow
49 //
50 // Since it is so important that the field arithmetic is extremely fast for high
51 // performance crypto, this type does not perform any validation where it
52 // ordinarily would. See the documentation for FieldVal for more details.
53 54 import (
55 "smesh.lol/pkg/nostr/hex"
56 )
57 58 // Constants used to make the code more readable.
59 const (
60 twoBitsMask = 0x3
61 fourBitsMask = 0xf
62 sixBitsMask = 0x3f
63 eightBitsMask = 0xff
64 )
65 66 // Constants related to the field representation.
67 const (
68 // fieldWords is the number of words used to internally represent the
69 // 256-bit value.
70 fieldWords = 10
71 // fieldBase is the exponent used to form the numeric base of each word.
72 // 2^(fieldBase*i) where i is the word position.
73 fieldBase = 26
74 // fieldBaseMask is the mask for the bits in each word needed to
75 // represent the numeric base of each word (except the most significant
76 // word).
77 fieldBaseMask = (1 << fieldBase) - 1
78 // fieldMSBBits is the number of bits in the most significant word used
79 // to represent the value.
80 fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
81 // fieldMSBMask is the mask for the bits in the most significant word
82 // needed to represent the value.
83 fieldMSBMask = (1 << fieldMSBBits) - 1
84 // These fields provide convenient access to each of the words of the
85 // secp256k1 prime in the internal field representation to improve code
86 // readability.
87 fieldPrimeWordZero = 0x03fffc2f
88 fieldPrimeWordOne = 0x03ffffbf
89 fieldPrimeWordTwo = 0x03ffffff
90 fieldPrimeWordThree = 0x03ffffff
91 fieldPrimeWordFour = 0x03ffffff
92 fieldPrimeWordFive = 0x03ffffff
93 fieldPrimeWordSix = 0x03ffffff
94 fieldPrimeWordSeven = 0x03ffffff
95 fieldPrimeWordEight = 0x03ffffff
96 fieldPrimeWordNine = 0x003fffff
97 )
98 99 // FieldVal implements optimized fixed-precision arithmetic over the
100 // secp256k1 finite field. This means all arithmetic is performed modulo
101 //
102 // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.
103 //
104 // WARNING: Since it is so important for the field arithmetic to be extremely
105 // fast for high performance crypto, this type does not perform any validation
106 // of documented preconditions where it ordinarily would. As a result, it is
107 // IMPERATIVE for callers to understand some key concepts that are described
108 // below and ensure the methods are called with the necessary preconditions that
109 // each method is documented with. For example, some methods only give the
110 // correct result if the field value is normalized and others require the field
111 // values involved to have a maximum magnitude and THERE ARE NO EXPLICIT CHECKS
112 // TO ENSURE THOSE PRECONDITIONS ARE SATISFIED. This does, unfortunately, make
113 // the type more difficult to use correctly and while I typically prefer to
114 // ensure all state and input is valid for most code, this is a bit of an
115 // exception because those extra checks really add up in what ends up being
116 // critical hot paths.
117 //
118 // The first key concept when working with this type is normalization. In order
119 // to avoid the need to propagate a ton of carries, the internal representation
120 // provides additional overflow bits for each word of the overall 256-bit value.
121 // This means that there are multiple internal representations for the same
122 // value and, as a result, any methods that rely on comparison of the value,
123 // such as equality and oddness determination, require the caller to provide a
124 // normalized value.
125 //
126 // The second key concept when working with this type is magnitude. As
127 // previously mentioned, the internal representation provides additional
128 // overflow bits which means that the more math operations that are performed on
129 // the field value between normalizations, the more those overflow bits
130 // accumulate. The magnitude is effectively that maximum possible number of
131 // those overflow bits that could possibly be required as a result of a given
132 // operation. Since there are only a limited number of overflow bits available,
133 // this implies that the max possible magnitude MUST be tracked by the caller
134 // and the caller MUST normalize the field value if a given operation would
135 // cause the magnitude of the result to exceed the max allowed value.
136 //
137 // IMPORTANT: The max allowed magnitude of a field value is 64.
138 type FieldVal struct {
139 // Each 256-bit value is represented as 10 32-bit integers in base 2^26.
140 // This provides 6 bits of overflow in each word (10 bits in the most
141 // significant word) for a total of 64 bits of overflow (9*6 + 10 = 64). It
142 // only implements the arithmetic needed for elliptic curve operations.
143 //
144 // The following depicts the internal representation:
145 // -----------------------------------------------------------------
146 // | n[9] | n[8] | ... | n[0] |
147 // | 32 bits available | 32 bits available | ... | 32 bits available |
148 // | 22 bits for value | 26 bits for value | ... | 26 bits for value |
149 // | 10 bits overflow | 6 bits overflow | ... | 6 bits overflow |
150 // | Mult: 2^(26*9) | Mult: 2^(26*8) | ... | Mult: 2^(26*0) |
151 // -----------------------------------------------------------------
152 //
153 // For example, consider the number 2^49 + 1. It would be represented as:
154 // n[0] = 1
155 // n[1] = 2^23
156 // n[2..9] = 0
157 //
158 // The full 256-bit value is then calculated by looping i from 9..0 and
159 // doing sum(n[i] * 2^(26i)) like so:
160 // n[9] * 2^(26*9) = 0 * 2^234 = 0
161 // n[8] * 2^(26*8) = 0 * 2^208 = 0
162 // ...
163 // n[1] * 2^(26*1) = 2^23 * 2^26 = 2^49
164 // n[0] * 2^(26*0) = 1 * 2^0 = 1
165 // Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
166 n [10]uint32
167 }
168 169 // String returns the field value as a normalized human-readable hex string.
170 //
171 // Preconditions: None
172 // Output Normalized: Field is not modified -- same as input value
173 // Output Max Magnitude: Field is not modified -- same as input value
174 func (f FieldVal) String() string {
175 // f is a copy, so it's safe to normalize it without mutating the original.
176 f.Normalize()
177 return hex.Enc(f.Bytes()[:])
178 }
179 180 // Zero sets the field value to zero in constant time. A newly created field
181 // value is already set to zero. This function can be useful to clear an
182 // existing field value for reuse.
183 //
184 // Preconditions: None
185 // Output Normalized: Yes
186 // Output Max Magnitude: 1
187 func (f *FieldVal) Zero() {
188 f.n[0] = 0
189 f.n[1] = 0
190 f.n[2] = 0
191 f.n[3] = 0
192 f.n[4] = 0
193 f.n[5] = 0
194 f.n[6] = 0
195 f.n[7] = 0
196 f.n[8] = 0
197 f.n[9] = 0
198 }
199 200 // Set sets the field value equal to the passed value in constant time. The
201 // normalization and magnitude of the two fields will be identical.
202 //
203 // The field value is returned to support chaining. This enables syntax like:
204 // f := new(FieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
205 // modified.
206 //
207 // Preconditions: None
208 // Output Normalized: Same as input value
209 // Output Max Magnitude: Same as input value
210 func (f *FieldVal) Set(val *FieldVal) *FieldVal {
211 *f = *val
212 return f
213 }
214 215 // SetInt sets the field value to the passed integer in constant time. This is
216 // a convenience function since it is fairly common to perform some arithmetic
217 // with small native integers.
218 //
219 // The field value is returned to support chaining. This enables syntax such
220 // as f := new(FieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
221 //
222 // Preconditions: None
223 // Output Normalized: Yes
224 // Output Max Magnitude: 1
225 func (f *FieldVal) SetInt(ui uint16) *FieldVal {
226 f.Zero()
227 f.n[0] = uint32(ui)
228 return f
229 }
230 231 // SetBytes packs the passed 32-byte big-endian value into the internal field
232 // value representation in constant time. SetBytes interprets the provided
233 // array as a 256-bit big-endian unsigned integer, packs it into the internal
234 // field value representation, and returns either 1 if it is greater than or
235 // equal to the field prime (aka it overflowed) or 0 otherwise in constant time.
236 //
237 // Note that a bool is not used here because it is not possible in Go to convert
238 // from a bool to numeric value in constant time and many constant-time
239 // operations require a numeric value.
240 //
241 // Preconditions: None
242 // Output Normalized: Yes if no overflow, no otherwise
243 // Output Max Magnitude: 1
244 func (f *FieldVal) SetBytes(b *[32]byte) uint32 {
245 // Pack the 256 total bits across the 10 uint32 words with a max of
246 // 26-bits per word. This could be done with a couple of for loops,
247 // but this unrolled version is significantly faster. Benchmarks show
248 // this is about 34 times faster than the variant which uses loops.
249 f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
250 (uint32(b[28])&twoBitsMask)<<24
251 f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
252 (uint32(b[25])&fourBitsMask)<<22
253 f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
254 (uint32(b[22])&sixBitsMask)<<20
255 f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
256 uint32(b[19])<<18
257 f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
258 (uint32(b[15])&twoBitsMask)<<24
259 f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
260 (uint32(b[12])&fourBitsMask)<<22
261 f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
262 (uint32(b[9])&sixBitsMask)<<20
263 f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
264 uint32(b[6])<<18
265 f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
266 (uint32(b[2])&twoBitsMask)<<24
267 f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
268 // The intuition here is that the field value is greater than the prime if
269 // one of the higher individual words is greater than corresponding word of
270 // the prime and all higher words in the field value are equal to their
271 // corresponding word of the prime. Since this type is modulo the prime,
272 // being equal is also an overflow back to 0.
273 //
274 // Note that because the input is 32 bytes and it was just packed into the
275 // field representation, the only words that can possibly be greater are
276 // zero and one, because ceil(log_2(2^256 - 1 - P)) = 33 bits max and the
277 // internal field representation encodes 26 bits with each word.
278 //
279 // Thus, there is no need to test if the upper words of the field value
280 // exceeds them, hence, only equality is checked for them.
281 highWordsEq := constantTimeEq(f.n[9], fieldPrimeWordNine)
282 highWordsEq &= constantTimeEq(f.n[8], fieldPrimeWordEight)
283 highWordsEq &= constantTimeEq(f.n[7], fieldPrimeWordSeven)
284 highWordsEq &= constantTimeEq(f.n[6], fieldPrimeWordSix)
285 highWordsEq &= constantTimeEq(f.n[5], fieldPrimeWordFive)
286 highWordsEq &= constantTimeEq(f.n[4], fieldPrimeWordFour)
287 highWordsEq &= constantTimeEq(f.n[3], fieldPrimeWordThree)
288 highWordsEq &= constantTimeEq(f.n[2], fieldPrimeWordTwo)
289 overflow := highWordsEq & constantTimeGreater(f.n[1], fieldPrimeWordOne)
290 highWordsEq &= constantTimeEq(f.n[1], fieldPrimeWordOne)
291 overflow |= highWordsEq & constantTimeGreaterOrEq(
292 f.n[0],
293 fieldPrimeWordZero,
294 )
295 return overflow
296 }
297 298 // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
299 // integer (meaning it is truncated to the first 32 bytes), packs it into the
300 // internal field value representation, and returns whether or not the resulting
301 // truncated 256-bit integer is greater than or equal to the field prime (aka it
302 // overflowed) in constant time.
303 //
304 // Note that since passing a slice with more than 32 bytes is truncated, it is
305 // possible that the truncated value is less than the field prime and hence it
306 // will not be reported as having overflowed in that case. It is up to the
307 // caller to decide whether it needs to provide numbers of the appropriate size
308 // or it if is acceptable to use this function with the described truncation and
309 // overflow behavior.
310 //
311 // Preconditions: None
312 // Output Normalized: Yes if no overflow, no otherwise
313 // Output Max Magnitude: 1
314 func (f *FieldVal) SetByteSlice(b []byte) bool {
315 var b32 [32]byte
316 b = b[:constantTimeMin(uint32(len(b)), 32)]
317 copy(b32[:], b32[:32-len(b)])
318 copy(b32[32-len(b):], b)
319 result := f.SetBytes(&b32)
320 zeroArray32(&b32)
321 return result != 0
322 }
323 324 // Normalize normalizes the internal field words into the desired range and
325 // performs fast modular reduction over the secp256k1 prime by making use of the
326 // special form of the prime in constant time.
327 //
328 // Preconditions: None
329 // Output Normalized: Yes
330 // Output Max Magnitude: 1
331 func (f *FieldVal) Normalize() *FieldVal {
332 // The field representation leaves 6 bits of overflow in each word so
333 // intermediate calculations can be performed without needing to
334 // propagate the carry to each higher word during the calculations. In
335 // order to normalize, we need to "compact" the full 256-bit value to
336 // the right while propagating any carries through to the high order
337 // word.
338 //
339 // Since this field is doing arithmetic modulo the secp256k1 prime, we
340 // also need to perform modular reduction over the prime.
341 //
342 // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
343 // when the modulus is of the special form m = b^t - c, highly efficient
344 // reduction can be achieved.
345 //
346 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
347 // this criteria.
348 //
349 // 4294968273 in field representation (base 2^26) is:
350 // n[0] = 977
351 // n[1] = 64
352 // That is to say (2^26 * 64) + 977 = 4294968273
353 //
354 // The algorithm presented in the referenced section typically repeats
355 // until the quotient is zero. However, due to our field representation
356 // we already know to within one reduction how many times we would need
357 // to repeat as it's the uppermost bits of the high order word. Thus we
358 // can simply multiply the magnitude by the field representation of the
359 // prime and do a single iteration. After this step there might be an
360 // additional carry to bit 256 (bit 22 of the high order word).
361 t9 := f.n[9]
362 m := t9 >> fieldMSBBits
363 t9 = t9 & fieldMSBMask
364 t0 := f.n[0] + m*977
365 t1 := (t0 >> fieldBase) + f.n[1] + (m << 6)
366 t0 = t0 & fieldBaseMask
367 t2 := (t1 >> fieldBase) + f.n[2]
368 t1 = t1 & fieldBaseMask
369 t3 := (t2 >> fieldBase) + f.n[3]
370 t2 = t2 & fieldBaseMask
371 t4 := (t3 >> fieldBase) + f.n[4]
372 t3 = t3 & fieldBaseMask
373 t5 := (t4 >> fieldBase) + f.n[5]
374 t4 = t4 & fieldBaseMask
375 t6 := (t5 >> fieldBase) + f.n[6]
376 t5 = t5 & fieldBaseMask
377 t7 := (t6 >> fieldBase) + f.n[7]
378 t6 = t6 & fieldBaseMask
379 t8 := (t7 >> fieldBase) + f.n[8]
380 t7 = t7 & fieldBaseMask
381 t9 = (t8 >> fieldBase) + t9
382 t8 = t8 & fieldBaseMask
383 // At this point, the magnitude is guaranteed to be one, however, the
384 // value could still be greater than the prime if there was either a
385 // carry through to bit 256 (bit 22 of the higher order word) or the
386 // value is greater than or equal to the field characteristic. The
387 // following determines if either or these conditions are true and does
388 // the final reduction in constant time.
389 //
390 // Also note that 'm' will be zero when neither of the aforementioned
391 // conditions are true and the value will not be changed when 'm' is zero.
392 m = constantTimeEq(t9, fieldMSBMask)
393 m &= constantTimeEq(t8&t7&t6&t5&t4&t3&t2, fieldBaseMask)
394 m &= constantTimeGreater(t1+64+((t0+977)>>fieldBase), fieldBaseMask)
395 m |= t9 >> fieldMSBBits
396 t0 = t0 + m*977
397 t1 = (t0 >> fieldBase) + t1 + (m << 6)
398 t0 = t0 & fieldBaseMask
399 t2 = (t1 >> fieldBase) + t2
400 t1 = t1 & fieldBaseMask
401 t3 = (t2 >> fieldBase) + t3
402 t2 = t2 & fieldBaseMask
403 t4 = (t3 >> fieldBase) + t4
404 t3 = t3 & fieldBaseMask
405 t5 = (t4 >> fieldBase) + t5
406 t4 = t4 & fieldBaseMask
407 t6 = (t5 >> fieldBase) + t6
408 t5 = t5 & fieldBaseMask
409 t7 = (t6 >> fieldBase) + t7
410 t6 = t6 & fieldBaseMask
411 t8 = (t7 >> fieldBase) + t8
412 t7 = t7 & fieldBaseMask
413 t9 = (t8 >> fieldBase) + t9
414 t8 = t8 & fieldBaseMask
415 t9 = t9 & fieldMSBMask // Remove potential multiple of 2^256.
416 // Finally, set the normalized and reduced words.
417 f.n[0] = t0
418 f.n[1] = t1
419 f.n[2] = t2
420 f.n[3] = t3
421 f.n[4] = t4
422 f.n[5] = t5
423 f.n[6] = t6
424 f.n[7] = t7
425 f.n[8] = t8
426 f.n[9] = t9
427 return f
428 }
429 430 // PutBytesUnchecked unpacks the field value to a 32-byte big-endian value
431 // directly into the passed byte slice in constant time. The target slice must
432 // have at least 32 bytes available or it will panic.
433 //
434 // There is a similar function, PutBytes, which unpacks the field value into a
435 // 32-byte array directly. This version is provided since it can be useful
436 // to write directly into part of a larger buffer without needing a separate
437 // allocation.
438 //
439 // Preconditions:
440 // - The field value MUST be normalized
441 // - The target slice MUST have at least 32 bytes available
442 func (f *FieldVal) PutBytesUnchecked(b []byte) {
443 // Unpack the 256 total bits from the 10 uint32 words with a max of
444 // 26-bits per word. This could be done with a couple of for loops,
445 // but this unrolled version is a bit faster. Benchmarks show this is
446 // about 10 times faster than the variant which uses loops.
447 b[31] = byte(f.n[0] & eightBitsMask)
448 b[30] = byte((f.n[0] >> 8) & eightBitsMask)
449 b[29] = byte((f.n[0] >> 16) & eightBitsMask)
450 b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
451 b[27] = byte((f.n[1] >> 6) & eightBitsMask)
452 b[26] = byte((f.n[1] >> 14) & eightBitsMask)
453 b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
454 b[24] = byte((f.n[2] >> 4) & eightBitsMask)
455 b[23] = byte((f.n[2] >> 12) & eightBitsMask)
456 b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
457 b[21] = byte((f.n[3] >> 2) & eightBitsMask)
458 b[20] = byte((f.n[3] >> 10) & eightBitsMask)
459 b[19] = byte((f.n[3] >> 18) & eightBitsMask)
460 b[18] = byte(f.n[4] & eightBitsMask)
461 b[17] = byte((f.n[4] >> 8) & eightBitsMask)
462 b[16] = byte((f.n[4] >> 16) & eightBitsMask)
463 b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
464 b[14] = byte((f.n[5] >> 6) & eightBitsMask)
465 b[13] = byte((f.n[5] >> 14) & eightBitsMask)
466 b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
467 b[11] = byte((f.n[6] >> 4) & eightBitsMask)
468 b[10] = byte((f.n[6] >> 12) & eightBitsMask)
469 b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
470 b[8] = byte((f.n[7] >> 2) & eightBitsMask)
471 b[7] = byte((f.n[7] >> 10) & eightBitsMask)
472 b[6] = byte((f.n[7] >> 18) & eightBitsMask)
473 b[5] = byte(f.n[8] & eightBitsMask)
474 b[4] = byte((f.n[8] >> 8) & eightBitsMask)
475 b[3] = byte((f.n[8] >> 16) & eightBitsMask)
476 b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
477 b[1] = byte((f.n[9] >> 6) & eightBitsMask)
478 b[0] = byte((f.n[9] >> 14) & eightBitsMask)
479 }
480 481 // PutBytes unpacks the field value to a 32-byte big-endian value using the
482 // passed byte array in constant time.
483 //
484 // There is a similar function, PutBytesUnchecked, which unpacks the field value
485 // into a slice that must have at least 32 bytes available. This version is
486 // provided since it can be useful to write directly into an array that is type
487 // checked.
488 //
489 // Alternatively, there is also Bytes, which unpacks the field value into a new
490 // array and returns that which can sometimes be more ergonomic in applications
491 // that aren't concerned about an additional copy.
492 //
493 // Preconditions:
494 // - The field value MUST be normalized
495 func (f *FieldVal) PutBytes(b *[32]byte) { f.PutBytesUnchecked(b[:]) }
496 497 // Bytes unpacks the field value to a 32-byte big-endian value in constant time.
498 //
499 // See PutBytes and PutBytesUnchecked for variants that allow an array or slice
500 // to be passed which can be useful to cut down on the number of allocations by
501 // allowing the caller to reuse a buffer or write directly into part of a larger
502 // buffer.
503 //
504 // Preconditions:
505 // - The field value MUST be normalized
506 func (f *FieldVal) Bytes() *[32]byte {
507 b := &[32]byte{}
508 f.PutBytesUnchecked(b[:])
509 return b
510 }
511 512 // IsZeroBit returns 1 when the field value is equal to zero or 0 otherwise in
513 // constant time.
514 //
515 // Note that a bool is not used here because it is not possible in Go to convert
516 // from a bool to numeric value in constant time and many constant-time
517 // operations require a numeric value. See IsZero for the version that returns
518 // a bool.
519 //
520 // Preconditions:
521 // - The field value MUST be normalized
522 func (f *FieldVal) IsZeroBit() uint32 {
523 // The value can only be zero if no bits are set in any of the words.
524 // This is a constant time implementation.
525 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
526 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
527 return constantTimeEq(bits, 0)
528 }
529 530 // IsZero returns whether or not the field value is equal to zero in constant
531 // time.
532 //
533 // Preconditions:
534 // - The field value MUST be normalized
535 func (f *FieldVal) IsZero() bool {
536 // The value can only be zero if no bits are set in any of the words.
537 // This is a constant time implementation.
538 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
539 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
540 return bits == 0
541 }
542 543 // IsOneBit returns 1 when the field value is equal to one or 0 otherwise in
544 // constant time.
545 //
546 // Note that a bool is not used here because it is not possible in Go to convert
547 // from a bool to numeric value in constant time and many constant-time
548 // operations require a numeric value. See IsOne for the version that returns a
549 // bool.
550 //
551 // Preconditions:
552 // - The field value MUST be normalized
553 func (f *FieldVal) IsOneBit() uint32 {
554 // The value can only be one if the single lowest significant bit is set in
555 // the first word and no other bits are set in any of the other words.
556 // This is a constant time implementation.
557 bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] |
558 f.n[6] | f.n[7] | f.n[8] | f.n[9]
559 560 return constantTimeEq(bits, 0)
561 }
562 563 // IsOne returns whether the field value is equal to one in constant
564 // time.
565 //
566 // Preconditions:
567 // - The field value MUST be normalized
568 func (f *FieldVal) IsOne() bool {
569 // The value can only be one if the single lowest significant bit is set in
570 // the first word and no other bits are set in any of the other words.
571 // This is a constant time implementation.
572 bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] |
573 f.n[6] | f.n[7] | f.n[8] | f.n[9]
574 575 return bits == 0
576 }
577 578 // IsOddBit returns 1 when the field value is an odd number or 0 otherwise in
579 // constant time.
580 //
581 // Note that a bool is not used here because it is not possible in Go to convert
582 // from a bool to numeric value in constant time and many constant-time
583 // operations require a numeric value. See IsOdd for the version that returns a
584 // bool.
585 //
586 // Preconditions:
587 // - The field value MUST be normalized
588 func (f *FieldVal) IsOddBit() uint32 {
589 // Only odd numbers have the bottom bit set.
590 return f.n[0] & 1
591 }
592 593 // IsOdd returns whether the field value is an odd number in constant
594 // time.
595 //
596 // Preconditions:
597 // - The field value MUST be normalized
598 func (f *FieldVal) IsOdd() bool {
599 // Only odd numbers have the bottom bit set.
600 return f.n[0]&1 == 1
601 }
602 603 // Equals returns whether or not the two field values are the same in constant
604 // time.
605 //
606 // Preconditions:
607 // - Both field values being compared MUST be normalized
608 func (f *FieldVal) Equals(val *FieldVal) bool {
609 // Xor only sets bits when they are different, so the two field values
610 // can only be the same if no bits are set after xoring each word.
611 // This is a constant time implementation.
612 bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
613 (f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
614 (f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
615 (f.n[9] ^ val.n[9])
616 617 return bits == 0
618 }
619 620 // NegateVal negates the passed value and stores the result in f in constant
621 // time. The caller must provide the magnitude of the passed value for a
622 // correct result.
623 //
624 // The field value is returned to support chaining. This enables syntax like:
625 // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
626 //
627 // Preconditions:
628 // - The max magnitude MUST be 63
629 // Output Normalized: No
630 // Output Max Magnitude: Input magnitude + 1
631 func (f *FieldVal) NegateVal(val *FieldVal, magnitude uint32) *FieldVal {
632 // Negation in the field is just the prime minus the value. However,
633 // in order to allow negation against a field value without having to
634 // normalize/reduce it first, multiply by the magnitude (that is how
635 // "far" away it is from the normalized value) to adjust. Also, since
636 // negating a value pushes it one more order of magnitude away from the
637 // normalized range, add 1 to compensate.
638 //
639 // For some intuition here, imagine you're performing mod 12 arithmetic
640 // (picture a clock) and you are negating the number 7. So you start at
641 // 12 (which is of course 0 under mod 12) and count backwards (left on
642 // the clock) 7 times to arrive at 5. Notice this is just 12-7 = 5.
643 // Now, assume you're starting with 19, which is a number that is
644 // already larger than the modulus and congruent to 7 (mod 12). When a
645 // value is already in the desired range, its magnitude is 1. Since 19
646 // is an additional "step", its magnitude (mod 12) is 2. Since any
647 // multiple of the modulus is congruent to zero (mod m), the answer can
648 // be shortcut by simply multiplying the magnitude by the modulus and
649 // subtracting. Keeping with the example, this would be (2*12)-19 = 5.
650 f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
651 f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
652 f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
653 f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
654 f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
655 f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
656 f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
657 f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
658 f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
659 f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
660 return f
661 }
662 663 // Negate negates the field value in constant time. The existing field value is
664 // modified. The caller must provide the magnitude of the field value for a
665 // correct result.
666 //
667 // The field value is returned to support chaining. This enables syntax like:
668 // f.Negate().AddInt(1) so that f = -f + 1.
669 //
670 // Preconditions:
671 // - The max magnitude MUST be 63
672 // Output Normalized: No
673 // Output Max Magnitude: Input magnitude + 1
674 func (f *FieldVal) Negate(magnitude uint32) *FieldVal {
675 return f.NegateVal(f, magnitude)
676 }
677 678 // AddInt adds the passed integer to the existing field value and stores the
679 // result in f in constant time. This is a convenience function since it is
680 // fairly common to perform some arithmetic with small native integers.
681 //
682 // The field value is returned to support chaining. This enables syntax like:
683 // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
684 //
685 // Preconditions:
686 // - The field value MUST have a max magnitude of 63
687 // Output Normalized: No
688 // Output Max Magnitude: Existing field magnitude + 1
689 func (f *FieldVal) AddInt(ui uint16) *FieldVal {
690 // Since the field representation intentionally provides overflow bits,
691 // it's ok to use carryless addition as the carry bit is safely part of
692 // the word and will be normalized out.
693 f.n[0] += uint32(ui)
694 return f
695 }
696 697 // Add adds the passed value to the existing field value and stores the result
698 // in f in constant time.
699 //
700 // The field value is returned to support chaining. This enables syntax like:
701 // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
702 //
703 // Preconditions:
704 // - The sum of the magnitudes of the two field values MUST be a max of 64
705 // Output Normalized: No
706 // Output Max Magnitude: Sum of the magnitude of the two individual field values
707 func (f *FieldVal) Add(val *FieldVal) *FieldVal {
708 // Since the field representation intentionally provides overflow bits,
709 // it's ok to use carryless addition as the carry bit is safely part of
710 // each word and will be normalized out. This could obviously be done
711 // in a loop, but the unrolled version is faster.
712 f.n[0] += val.n[0]
713 f.n[1] += val.n[1]
714 f.n[2] += val.n[2]
715 f.n[3] += val.n[3]
716 f.n[4] += val.n[4]
717 f.n[5] += val.n[5]
718 f.n[6] += val.n[6]
719 f.n[7] += val.n[7]
720 f.n[8] += val.n[8]
721 f.n[9] += val.n[9]
722 return f
723 }
724 725 // Add2 adds the passed two field values together and stores the result in f in
726 // constant time.
727 //
728 // The field value is returned to support chaining. This enables syntax like:
729 // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
730 //
731 // Preconditions:
732 // - The sum of the magnitudes of the two field values MUST be a max of 64
733 // Output Normalized: No
734 // Output Max Magnitude: Sum of the magnitude of the two field values
735 func (f *FieldVal) Add2(val *FieldVal, val2 *FieldVal) *FieldVal {
736 // Since the field representation intentionally provides overflow bits,
737 // it's ok to use carryless addition as the carry bit is safely part of
738 // each word and will be normalized out. This could obviously be done
739 // in a loop, but the unrolled version is faster.
740 f.n[0] = val.n[0] + val2.n[0]
741 f.n[1] = val.n[1] + val2.n[1]
742 f.n[2] = val.n[2] + val2.n[2]
743 f.n[3] = val.n[3] + val2.n[3]
744 f.n[4] = val.n[4] + val2.n[4]
745 f.n[5] = val.n[5] + val2.n[5]
746 f.n[6] = val.n[6] + val2.n[6]
747 f.n[7] = val.n[7] + val2.n[7]
748 f.n[8] = val.n[8] + val2.n[8]
749 f.n[9] = val.n[9] + val2.n[9]
750 return f
751 }
752 753 // MulInt multiplies the field value by the passed int and stores the result in
754 // f in constant time. Note that this function can overflow if multiplying the
755 // value by any of the individual words exceeds a max uint32. Therefore it is
756 // important that the caller ensures no overflows will occur before using this
757 // function.
758 //
759 // The field value is returned to support chaining. This enables syntax like:
760 // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
761 //
762 // Preconditions:
763 // - The field value magnitude multiplied by given val MUST be a max of 64
764 // Output Normalized: No
765 // Output Max Magnitude: Existing field magnitude times the provided integer val
766 func (f *FieldVal) MulInt(val uint8) *FieldVal {
767 // Since each word of the field representation can hold up to
768 // 32 - fieldBase extra bits which will be normalized out, it's safe
769 // to multiply each word without using a larger type or carry
770 // propagation so long as the values won't overflow a uint32. This
771 // could obviously be done in a loop, but the unrolled version is
772 // faster.
773 ui := uint32(val)
774 f.n[0] *= ui
775 f.n[1] *= ui
776 f.n[2] *= ui
777 f.n[3] *= ui
778 f.n[4] *= ui
779 f.n[5] *= ui
780 f.n[6] *= ui
781 f.n[7] *= ui
782 f.n[8] *= ui
783 f.n[9] *= ui
784 return f
785 }
786 787 // Mul multiplies the passed value to the existing field value and stores the
788 // result in f in constant time. Note that this function can overflow if
789 // multiplying any of the individual words exceeds a max uint32. In practice,
790 // this means the magnitude of either value involved in the multiplication must
791 // be a max of 8.
792 //
793 // The field value is returned to support chaining. This enables syntax like:
794 // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
795 //
796 // Preconditions:
797 // - Both field values MUST have a max magnitude of 8
798 // Output Normalized: No
799 // Output Max Magnitude: 1
800 func (f *FieldVal) Mul(val *FieldVal) *FieldVal {
801 return f.Mul2(f, val)
802 }
803 804 // Mul2 multiplies the passed two field values together and stores the result in
805 // f in constant time. Note that this function can overflow if multiplying any
806 // of the individual words exceeds a max uint32. In practice, this means the
807 // magnitude of either value involved in the multiplication must be a max of 8.
808 //
809 // The field value is returned to support chaining. This enables syntax like:
810 // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
811 //
812 // Preconditions:
813 // - Both input field values MUST have a max magnitude of 8
814 // Output Normalized: No
815 // Output Max Magnitude: 1
816 func (f *FieldVal) Mul2(val *FieldVal, val2 *FieldVal) *FieldVal {
817 // This could be done with a couple of for loops and an array to store
818 // the intermediate terms, but this unrolled version is significantly
819 // faster.
820 // Terms for 2^(fieldBase*0).
821 m := uint64(val.n[0]) * uint64(val2.n[0])
822 t0 := m & fieldBaseMask
823 // Terms for 2^(fieldBase*1).
824 m = (m >> fieldBase) +
825 uint64(val.n[0])*uint64(val2.n[1]) +
826 uint64(val.n[1])*uint64(val2.n[0])
827 t1 := m & fieldBaseMask
828 // Terms for 2^(fieldBase*2).
829 m = (m >> fieldBase) +
830 uint64(val.n[0])*uint64(val2.n[2]) +
831 uint64(val.n[1])*uint64(val2.n[1]) +
832 uint64(val.n[2])*uint64(val2.n[0])
833 t2 := m & fieldBaseMask
834 // Terms for 2^(fieldBase*3).
835 m = (m >> fieldBase) +
836 uint64(val.n[0])*uint64(val2.n[3]) +
837 uint64(val.n[1])*uint64(val2.n[2]) +
838 uint64(val.n[2])*uint64(val2.n[1]) +
839 uint64(val.n[3])*uint64(val2.n[0])
840 t3 := m & fieldBaseMask
841 // Terms for 2^(fieldBase*4).
842 m = (m >> fieldBase) +
843 uint64(val.n[0])*uint64(val2.n[4]) +
844 uint64(val.n[1])*uint64(val2.n[3]) +
845 uint64(val.n[2])*uint64(val2.n[2]) +
846 uint64(val.n[3])*uint64(val2.n[1]) +
847 uint64(val.n[4])*uint64(val2.n[0])
848 t4 := m & fieldBaseMask
849 // Terms for 2^(fieldBase*5).
850 m = (m >> fieldBase) +
851 uint64(val.n[0])*uint64(val2.n[5]) +
852 uint64(val.n[1])*uint64(val2.n[4]) +
853 uint64(val.n[2])*uint64(val2.n[3]) +
854 uint64(val.n[3])*uint64(val2.n[2]) +
855 uint64(val.n[4])*uint64(val2.n[1]) +
856 uint64(val.n[5])*uint64(val2.n[0])
857 t5 := m & fieldBaseMask
858 // Terms for 2^(fieldBase*6).
859 m = (m >> fieldBase) +
860 uint64(val.n[0])*uint64(val2.n[6]) +
861 uint64(val.n[1])*uint64(val2.n[5]) +
862 uint64(val.n[2])*uint64(val2.n[4]) +
863 uint64(val.n[3])*uint64(val2.n[3]) +
864 uint64(val.n[4])*uint64(val2.n[2]) +
865 uint64(val.n[5])*uint64(val2.n[1]) +
866 uint64(val.n[6])*uint64(val2.n[0])
867 t6 := m & fieldBaseMask
868 // Terms for 2^(fieldBase*7).
869 m = (m >> fieldBase) +
870 uint64(val.n[0])*uint64(val2.n[7]) +
871 uint64(val.n[1])*uint64(val2.n[6]) +
872 uint64(val.n[2])*uint64(val2.n[5]) +
873 uint64(val.n[3])*uint64(val2.n[4]) +
874 uint64(val.n[4])*uint64(val2.n[3]) +
875 uint64(val.n[5])*uint64(val2.n[2]) +
876 uint64(val.n[6])*uint64(val2.n[1]) +
877 uint64(val.n[7])*uint64(val2.n[0])
878 t7 := m & fieldBaseMask
879 // Terms for 2^(fieldBase*8).
880 m = (m >> fieldBase) +
881 uint64(val.n[0])*uint64(val2.n[8]) +
882 uint64(val.n[1])*uint64(val2.n[7]) +
883 uint64(val.n[2])*uint64(val2.n[6]) +
884 uint64(val.n[3])*uint64(val2.n[5]) +
885 uint64(val.n[4])*uint64(val2.n[4]) +
886 uint64(val.n[5])*uint64(val2.n[3]) +
887 uint64(val.n[6])*uint64(val2.n[2]) +
888 uint64(val.n[7])*uint64(val2.n[1]) +
889 uint64(val.n[8])*uint64(val2.n[0])
890 t8 := m & fieldBaseMask
891 // Terms for 2^(fieldBase*9).
892 m = (m >> fieldBase) +
893 uint64(val.n[0])*uint64(val2.n[9]) +
894 uint64(val.n[1])*uint64(val2.n[8]) +
895 uint64(val.n[2])*uint64(val2.n[7]) +
896 uint64(val.n[3])*uint64(val2.n[6]) +
897 uint64(val.n[4])*uint64(val2.n[5]) +
898 uint64(val.n[5])*uint64(val2.n[4]) +
899 uint64(val.n[6])*uint64(val2.n[3]) +
900 uint64(val.n[7])*uint64(val2.n[2]) +
901 uint64(val.n[8])*uint64(val2.n[1]) +
902 uint64(val.n[9])*uint64(val2.n[0])
903 t9 := m & fieldBaseMask
904 // Terms for 2^(fieldBase*10).
905 m = (m >> fieldBase) +
906 uint64(val.n[1])*uint64(val2.n[9]) +
907 uint64(val.n[2])*uint64(val2.n[8]) +
908 uint64(val.n[3])*uint64(val2.n[7]) +
909 uint64(val.n[4])*uint64(val2.n[6]) +
910 uint64(val.n[5])*uint64(val2.n[5]) +
911 uint64(val.n[6])*uint64(val2.n[4]) +
912 uint64(val.n[7])*uint64(val2.n[3]) +
913 uint64(val.n[8])*uint64(val2.n[2]) +
914 uint64(val.n[9])*uint64(val2.n[1])
915 t10 := m & fieldBaseMask
916 // Terms for 2^(fieldBase*11).
917 m = (m >> fieldBase) +
918 uint64(val.n[2])*uint64(val2.n[9]) +
919 uint64(val.n[3])*uint64(val2.n[8]) +
920 uint64(val.n[4])*uint64(val2.n[7]) +
921 uint64(val.n[5])*uint64(val2.n[6]) +
922 uint64(val.n[6])*uint64(val2.n[5]) +
923 uint64(val.n[7])*uint64(val2.n[4]) +
924 uint64(val.n[8])*uint64(val2.n[3]) +
925 uint64(val.n[9])*uint64(val2.n[2])
926 t11 := m & fieldBaseMask
927 // Terms for 2^(fieldBase*12).
928 m = (m >> fieldBase) +
929 uint64(val.n[3])*uint64(val2.n[9]) +
930 uint64(val.n[4])*uint64(val2.n[8]) +
931 uint64(val.n[5])*uint64(val2.n[7]) +
932 uint64(val.n[6])*uint64(val2.n[6]) +
933 uint64(val.n[7])*uint64(val2.n[5]) +
934 uint64(val.n[8])*uint64(val2.n[4]) +
935 uint64(val.n[9])*uint64(val2.n[3])
936 t12 := m & fieldBaseMask
937 // Terms for 2^(fieldBase*13).
938 m = (m >> fieldBase) +
939 uint64(val.n[4])*uint64(val2.n[9]) +
940 uint64(val.n[5])*uint64(val2.n[8]) +
941 uint64(val.n[6])*uint64(val2.n[7]) +
942 uint64(val.n[7])*uint64(val2.n[6]) +
943 uint64(val.n[8])*uint64(val2.n[5]) +
944 uint64(val.n[9])*uint64(val2.n[4])
945 t13 := m & fieldBaseMask
946 // Terms for 2^(fieldBase*14).
947 m = (m >> fieldBase) +
948 uint64(val.n[5])*uint64(val2.n[9]) +
949 uint64(val.n[6])*uint64(val2.n[8]) +
950 uint64(val.n[7])*uint64(val2.n[7]) +
951 uint64(val.n[8])*uint64(val2.n[6]) +
952 uint64(val.n[9])*uint64(val2.n[5])
953 t14 := m & fieldBaseMask
954 // Terms for 2^(fieldBase*15).
955 m = (m >> fieldBase) +
956 uint64(val.n[6])*uint64(val2.n[9]) +
957 uint64(val.n[7])*uint64(val2.n[8]) +
958 uint64(val.n[8])*uint64(val2.n[7]) +
959 uint64(val.n[9])*uint64(val2.n[6])
960 t15 := m & fieldBaseMask
961 // Terms for 2^(fieldBase*16).
962 m = (m >> fieldBase) +
963 uint64(val.n[7])*uint64(val2.n[9]) +
964 uint64(val.n[8])*uint64(val2.n[8]) +
965 uint64(val.n[9])*uint64(val2.n[7])
966 t16 := m & fieldBaseMask
967 // Terms for 2^(fieldBase*17).
968 m = (m >> fieldBase) +
969 uint64(val.n[8])*uint64(val2.n[9]) +
970 uint64(val.n[9])*uint64(val2.n[8])
971 t17 := m & fieldBaseMask
972 // Terms for 2^(fieldBase*18).
973 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
974 t18 := m & fieldBaseMask
975 // What's left is for 2^(fieldBase*19).
976 t19 := m >> fieldBase
977 // At this point, all of the terms are grouped into their respective
978 // base.
979 //
980 // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
981 // when the modulus is of the special form m = b^t - c, highly efficient
982 // reduction can be achieved per the provided algorithm.
983 //
984 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
985 // this criteria.
986 //
987 // 4294968273 in field representation (base 2^26) is:
988 // n[0] = 977
989 // n[1] = 64
990 // That is to say (2^26 * 64) + 977 = 4294968273
991 //
992 // Since each word is in base 26, the upper terms (t10 and up) start
993 // at 260 bits (versus the final desired range of 256 bits), so the
994 // field representation of 'c' from above needs to be adjusted for the
995 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 =
996 // 68719492368. Thus, the adjusted field representation of 'c' is:
997 // n[0] = 977 * 16 = 15632
998 // n[1] = 64 * 16 = 1024
999 // That is to say (2^26 * 1024) + 15632 = 68719492368
1000 //
1001 // To reduce the final term, t19, the entire 'c' value is needed instead
1002 // of only n[0] because there are no more terms left to handle n[1].
1003 // This means there might be some magnitude left in the upper bits that
1004 // is handled below.
1005 m = t0 + t10*15632
1006 t0 = m & fieldBaseMask
1007 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
1008 t1 = m & fieldBaseMask
1009 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
1010 t2 = m & fieldBaseMask
1011 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
1012 t3 = m & fieldBaseMask
1013 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
1014 t4 = m & fieldBaseMask
1015 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
1016 t5 = m & fieldBaseMask
1017 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
1018 t6 = m & fieldBaseMask
1019 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
1020 t7 = m & fieldBaseMask
1021 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
1022 t8 = m & fieldBaseMask
1023 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
1024 t9 = m & fieldMSBMask
1025 m = m >> fieldMSBBits
1026 // At this point, if the magnitude is greater than 0, the overall value
1027 // is greater than the max possible 256-bit value. In particular, it is
1028 // "how many times larger" than the max value it is.
1029 //
1030 // The algorithm presented in [HAC] section 14.3.4 repeats until the
1031 // quotient is zero. However, due to the above, we already know at
1032 // least how many times we would need to repeat as it's the value
1033 // currently in m. Thus we can simply multiply the magnitude by the
1034 // field representation of the prime and do a single iteration. Notice
1035 // that nothing will be changed when the magnitude is zero, so we could
1036 // skip this in that case, however always running regardless allows it
1037 // to run in constant time. The final result will be in the range
1038 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
1039 // magnitude of 1, but it is denormalized.
1040 d := t0 + m*977
1041 f.n[0] = uint32(d & fieldBaseMask)
1042 d = (d >> fieldBase) + t1 + m*64
1043 f.n[1] = uint32(d & fieldBaseMask)
1044 f.n[2] = uint32((d >> fieldBase) + t2)
1045 f.n[3] = uint32(t3)
1046 f.n[4] = uint32(t4)
1047 f.n[5] = uint32(t5)
1048 f.n[6] = uint32(t6)
1049 f.n[7] = uint32(t7)
1050 f.n[8] = uint32(t8)
1051 f.n[9] = uint32(t9)
1052 return f
1053 }
1054 1055 // SquareRootVal either calculates the square root of the passed value when it
1056 // exists or the square root of the negation of the value when it does not exist
1057 // and stores the result in f in constant time. The return flag is true when
1058 // the calculated square root is for the passed value itself and false when it
1059 // is for its negation.
1060 //
1061 // Note that this function can overflow if multiplying any of the individual
1062 // words exceeds a max uint32. In practice, this means the magnitude of the
1063 // field must be a max of 8 to prevent overflow. The magnitude of the result
1064 // will be 1.
1065 //
1066 // Preconditions:
1067 // - The input field value MUST have a max magnitude of 8
1068 // Output Normalized: No
1069 // Output Max Magnitude: 1
1070 func (f *FieldVal) SquareRootVal(val *FieldVal) bool {
1071 // This uses the Tonelli-Shanks method for calculating the square root of
1072 // the value when it exists. The key principles of the method follow.
1073 //
1074 // Fermat's little theorem states that for a nonzero number 'a' and prime
1075 // 'p', a^(p-1) ≡ 1 (mod p).
1076 //
1077 // Further, Euler's criterion states that an integer 'a' has a square root
1078 // (aka is a quadratic residue) modulo a prime if a^((p-1)/2) ≡ 1 (mod p)
1079 // and, conversely, when it does NOT have a square root (aka 'a' is a
1080 // non-residue) a^((p-1)/2) ≡ -1 (mod p).
1081 //
1082 // This can be seen by considering that Fermat's little theorem can be
1083 // written as (a^((p-1)/2) - 1)(a^((p-1)/2) + 1) ≡ 0 (mod p). Therefore,
1084 // one of the two factors must be 0. Then, when a ≡ x^2 (aka 'a' is a
1085 // quadratic residue), (x^2)^((p-1)/2) ≡ x^(p-1) ≡ 1 (mod p) which implies
1086 // the first factor must be zero. Finally, per Lagrange's theorem, the
1087 // non-residues are the only remaining possible solutions and thus must make
1088 // the second factor zero to satisfy Fermat's little theorem implying that
1089 // a^((p-1)/2) ≡ -1 (mod p) for that case.
1090 //
1091 // The Tonelli-Shanks method uses these facts along with factoring out
1092 // powers of two to solve a congruence that results in either the solution
1093 // when the square root exists or the square root of the negation of the
1094 // value when it does not. In the case of primes that are ≡ 3 (mod 4), the
1095 // possible solutions are r = ±a^((p+1)/4) (mod p). Therefore, either r^2 ≡
1096 // a (mod p) is true in which case ±r are the two solutions, or r^2 ≡ -a
1097 // (mod p) in which case 'a' is a non-residue and there are no solutions.
1098 //
1099 // The secp256k1 prime is ≡ 3 (mod 4), so this result applies.
1100 //
1101 // In other words, calculate a^((p+1)/4) and then square it and check it
1102 // against the original value to determine if it is actually the square
1103 // root.
1104 //
1105 // In order to efficiently compute a^((p+1)/4), (p+1)/4 needs to be split
1106 // into a sequence of squares and multiplications that minimizes the number
1107 // of multiplications needed (since they are more costly than squarings).
1108 //
1109 // The secp256k1 prime + 1 / 4 is 2^254 - 2^30 - 244. In binary, that is:
1110 //
1111 // 00111111 11111111 11111111 11111111
1112 // 11111111 11111111 11111111 11111111
1113 // 11111111 11111111 11111111 11111111
1114 // 11111111 11111111 11111111 11111111
1115 // 11111111 11111111 11111111 11111111
1116 // 11111111 11111111 11111111 11111111
1117 // 11111111 11111111 11111111 11111111
1118 // 10111111 11111111 11111111 00001100
1119 //
1120 // Notice that can be broken up into three windows of consecutive 1s (in
1121 // order of least to most significant) as:
1122 //
1123 // 6-bit window with two bits set (bits 4, 5, 6, 7 unset)
1124 // 23-bit window with 22 bits set (bit 30 unset)
1125 // 223-bit window with all 223 bits set
1126 //
1127 // Thus, the groups of 1 bits in each window forms the set:
1128 // S = {2, 22, 223}.
1129 //
1130 // The strategy is to calculate a^(2^n - 1) for each grouping via an
1131 // addition chain with a sliding window.
1132 //
1133 // The addition chain used is (credits to Peter Dettman):
1134 // (0,0),(1,0),(2,2),(3,2),(4,1),(5,5),(6,6),(7,7),(8,8),(9,7),(10,2)
1135 // => 2^1 2^[2] 2^3 2^6 2^9 2^11 2^[22] 2^44 2^88 2^176 2^220 2^[223]
1136 //
1137 // This has a cost of 254 field squarings and 13 field multiplications.
1138 var a, a2, a3, a6, a9, a11, a22, a44, a88, a176, a220, a223 FieldVal
1139 a.Set(val)
1140 a2.SquareVal(&a).Mul(&a) // a2 = a^(2^2 - 1)
1141 a3.SquareVal(&a2).Mul(&a) // a3 = a^(2^3 - 1)
1142 a6.SquareVal(&a3).Square().Square() // a6 = a^(2^6 - 2^3)
1143 a6.Mul(&a3) // a6 = a^(2^6 - 1)
1144 a9.SquareVal(&a6).Square().Square() // a9 = a^(2^9 - 2^3)
1145 a9.Mul(&a3) // a9 = a^(2^9 - 1)
1146 a11.SquareVal(&a9).Square() // a11 = a^(2^11 - 2^2)
1147 a11.Mul(&a2) // a11 = a^(2^11 - 1)
1148 a22.SquareVal(&a11).Square().Square().Square().Square() // a22 = a^(2^16 - 2^5)
1149 a22.Square().Square().Square().Square().Square() // a22 = a^(2^21 - 2^10)
1150 a22.Square() // a22 = a^(2^22 - 2^11)
1151 a22.Mul(&a11) // a22 = a^(2^22 - 1)
1152 a44.SquareVal(&a22).Square().Square().Square().Square() // a44 = a^(2^27 - 2^5)
1153 a44.Square().Square().Square().Square().Square() // a44 = a^(2^32 - 2^10)
1154 a44.Square().Square().Square().Square().Square() // a44 = a^(2^37 - 2^15)
1155 a44.Square().Square().Square().Square().Square() // a44 = a^(2^42 - 2^20)
1156 a44.Square().Square() // a44 = a^(2^44 - 2^22)
1157 a44.Mul(&a22) // a44 = a^(2^44 - 1)
1158 a88.SquareVal(&a44).Square().Square().Square().Square() // a88 = a^(2^49 - 2^5)
1159 a88.Square().Square().Square().Square().Square() // a88 = a^(2^54 - 2^10)
1160 a88.Square().Square().Square().Square().Square() // a88 = a^(2^59 - 2^15)
1161 a88.Square().Square().Square().Square().Square() // a88 = a^(2^64 - 2^20)
1162 a88.Square().Square().Square().Square().Square() // a88 = a^(2^69 - 2^25)
1163 a88.Square().Square().Square().Square().Square() // a88 = a^(2^74 - 2^30)
1164 a88.Square().Square().Square().Square().Square() // a88 = a^(2^79 - 2^35)
1165 a88.Square().Square().Square().Square().Square() // a88 = a^(2^84 - 2^40)
1166 a88.Square().Square().Square().Square() // a88 = a^(2^88 - 2^44)
1167 a88.Mul(&a44) // a88 = a^(2^88 - 1)
1168 a176.SquareVal(&a88).Square().Square().Square().Square() // a176 = a^(2^93 - 2^5)
1169 a176.Square().Square().Square().Square().Square() // a176 = a^(2^98 - 2^10)
1170 a176.Square().Square().Square().Square().Square() // a176 = a^(2^103 - 2^15)
1171 a176.Square().Square().Square().Square().Square() // a176 = a^(2^108 - 2^20)
1172 a176.Square().Square().Square().Square().Square() // a176 = a^(2^113 - 2^25)
1173 a176.Square().Square().Square().Square().Square() // a176 = a^(2^118 - 2^30)
1174 a176.Square().Square().Square().Square().Square() // a176 = a^(2^123 - 2^35)
1175 a176.Square().Square().Square().Square().Square() // a176 = a^(2^128 - 2^40)
1176 a176.Square().Square().Square().Square().Square() // a176 = a^(2^133 - 2^45)
1177 a176.Square().Square().Square().Square().Square() // a176 = a^(2^138 - 2^50)
1178 a176.Square().Square().Square().Square().Square() // a176 = a^(2^143 - 2^55)
1179 a176.Square().Square().Square().Square().Square() // a176 = a^(2^148 - 2^60)
1180 a176.Square().Square().Square().Square().Square() // a176 = a^(2^153 - 2^65)
1181 a176.Square().Square().Square().Square().Square() // a176 = a^(2^158 - 2^70)
1182 a176.Square().Square().Square().Square().Square() // a176 = a^(2^163 - 2^75)
1183 a176.Square().Square().Square().Square().Square() // a176 = a^(2^168 - 2^80)
1184 a176.Square().Square().Square().Square().Square() // a176 = a^(2^173 - 2^85)
1185 a176.Square().Square().Square() // a176 = a^(2^176 - 2^88)
1186 a176.Mul(&a88) // a176 = a^(2^176 - 1)
1187 a220.SquareVal(&a176).Square().Square().Square().Square() // a220 = a^(2^181 - 2^5)
1188 a220.Square().Square().Square().Square().Square() // a220 = a^(2^186 - 2^10)
1189 a220.Square().Square().Square().Square().Square() // a220 = a^(2^191 - 2^15)
1190 a220.Square().Square().Square().Square().Square() // a220 = a^(2^196 - 2^20)
1191 a220.Square().Square().Square().Square().Square() // a220 = a^(2^201 - 2^25)
1192 a220.Square().Square().Square().Square().Square() // a220 = a^(2^206 - 2^30)
1193 a220.Square().Square().Square().Square().Square() // a220 = a^(2^211 - 2^35)
1194 a220.Square().Square().Square().Square().Square() // a220 = a^(2^216 - 2^40)
1195 a220.Square().Square().Square().Square() // a220 = a^(2^220 - 2^44)
1196 a220.Mul(&a44) // a220 = a^(2^220 - 1)
1197 a223.SquareVal(&a220).Square().Square() // a223 = a^(2^223 - 2^3)
1198 a223.Mul(&a3) // a223 = a^(2^223 - 1)
1199 1200 f.SquareVal(&a223).Square().Square().Square().Square() // f = a^(2^228 - 2^5)
1201 f.Square().Square().Square().Square().Square() // f = a^(2^233 - 2^10)
1202 f.Square().Square().Square().Square().Square() // f = a^(2^238 - 2^15)
1203 f.Square().Square().Square().Square().Square() // f = a^(2^243 - 2^20)
1204 f.Square().Square().Square() // f = a^(2^246 - 2^23)
1205 f.Mul(&a22) // f = a^(2^246 - 2^22 - 1)
1206 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 2^27 - 2^5)
1207 f.Square() // f = a^(2^252 - 2^28 - 2^6)
1208 f.Mul(&a2) // f = a^(2^252 - 2^28 - 2^6 - 2^1 - 1)
1209 f.Square().Square() // f = a^(2^254 - 2^30 - 2^8 - 2^3 - 2^2)
1210 // // = a^(2^254 - 2^30 - 244)
1211 // // = a^((p+1)/4)
1212 // Ensure the calculated result is actually the square root by squaring it
1213 // and checking against the original value.
1214 var sqr FieldVal
1215 return sqr.SquareVal(f).Normalize().Equals(val.Normalize())
1216 }
1217 1218 // Square squares the field value in constant time. The existing field value is
1219 // modified. Note that this function can overflow if multiplying any of the
1220 // individual words exceeds a max uint32. In practice, this means the magnitude
1221 // of the field must be a max of 8 to prevent overflow.
1222 //
1223 // The field value is returned to support chaining. This enables syntax like:
1224 // f.Square().Mul(f2) so that f = f^2 * f2.
1225 //
1226 // Preconditions:
1227 // - The field value MUST have a max magnitude of 8
1228 // Output Normalized: No
1229 // Output Max Magnitude: 1
1230 func (f *FieldVal) Square() *FieldVal { return f.SquareVal(f) }
1231 1232 // SquareVal squares the passed value and stores the result in f in constant
1233 // time. Note that this function can overflow if multiplying any of the
1234 // individual words exceeds a max uint32. In practice, this means the magnitude
1235 // of the field being squared must be a max of 8 to prevent overflow.
1236 //
1237 // The field value is returned to support chaining. This enables syntax like:
1238 // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
1239 //
1240 // Preconditions:
1241 // - The input field value MUST have a max magnitude of 8
1242 // Output Normalized: No
1243 // Output Max Magnitude: 1
1244 func (f *FieldVal) SquareVal(val *FieldVal) *FieldVal {
1245 // This could be done with a couple of for loops and an array to store
1246 // the intermediate terms, but this unrolled version is significantly
1247 // faster.
1248 //
1249 // Terms for 2^(fieldBase*0).
1250 m := uint64(val.n[0]) * uint64(val.n[0])
1251 t0 := m & fieldBaseMask
1252 // Terms for 2^(fieldBase*1).
1253 m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
1254 t1 := m & fieldBaseMask
1255 // Terms for 2^(fieldBase*2).
1256 m = (m >> fieldBase) +
1257 2*uint64(val.n[0])*uint64(val.n[2]) +
1258 uint64(val.n[1])*uint64(val.n[1])
1259 t2 := m & fieldBaseMask
1260 // Terms for 2^(fieldBase*3).
1261 m = (m >> fieldBase) +
1262 2*uint64(val.n[0])*uint64(val.n[3]) +
1263 2*uint64(val.n[1])*uint64(val.n[2])
1264 t3 := m & fieldBaseMask
1265 // Terms for 2^(fieldBase*4).
1266 m = (m >> fieldBase) +
1267 2*uint64(val.n[0])*uint64(val.n[4]) +
1268 2*uint64(val.n[1])*uint64(val.n[3]) +
1269 uint64(val.n[2])*uint64(val.n[2])
1270 t4 := m & fieldBaseMask
1271 // Terms for 2^(fieldBase*5).
1272 m = (m >> fieldBase) +
1273 2*uint64(val.n[0])*uint64(val.n[5]) +
1274 2*uint64(val.n[1])*uint64(val.n[4]) +
1275 2*uint64(val.n[2])*uint64(val.n[3])
1276 t5 := m & fieldBaseMask
1277 // Terms for 2^(fieldBase*6).
1278 m = (m >> fieldBase) +
1279 2*uint64(val.n[0])*uint64(val.n[6]) +
1280 2*uint64(val.n[1])*uint64(val.n[5]) +
1281 2*uint64(val.n[2])*uint64(val.n[4]) +
1282 uint64(val.n[3])*uint64(val.n[3])
1283 t6 := m & fieldBaseMask
1284 // Terms for 2^(fieldBase*7).
1285 m = (m >> fieldBase) +
1286 2*uint64(val.n[0])*uint64(val.n[7]) +
1287 2*uint64(val.n[1])*uint64(val.n[6]) +
1288 2*uint64(val.n[2])*uint64(val.n[5]) +
1289 2*uint64(val.n[3])*uint64(val.n[4])
1290 t7 := m & fieldBaseMask
1291 // Terms for 2^(fieldBase*8).
1292 m = (m >> fieldBase) +
1293 2*uint64(val.n[0])*uint64(val.n[8]) +
1294 2*uint64(val.n[1])*uint64(val.n[7]) +
1295 2*uint64(val.n[2])*uint64(val.n[6]) +
1296 2*uint64(val.n[3])*uint64(val.n[5]) +
1297 uint64(val.n[4])*uint64(val.n[4])
1298 t8 := m & fieldBaseMask
1299 // Terms for 2^(fieldBase*9).
1300 m = (m >> fieldBase) +
1301 2*uint64(val.n[0])*uint64(val.n[9]) +
1302 2*uint64(val.n[1])*uint64(val.n[8]) +
1303 2*uint64(val.n[2])*uint64(val.n[7]) +
1304 2*uint64(val.n[3])*uint64(val.n[6]) +
1305 2*uint64(val.n[4])*uint64(val.n[5])
1306 t9 := m & fieldBaseMask
1307 // Terms for 2^(fieldBase*10).
1308 m = (m >> fieldBase) +
1309 2*uint64(val.n[1])*uint64(val.n[9]) +
1310 2*uint64(val.n[2])*uint64(val.n[8]) +
1311 2*uint64(val.n[3])*uint64(val.n[7]) +
1312 2*uint64(val.n[4])*uint64(val.n[6]) +
1313 uint64(val.n[5])*uint64(val.n[5])
1314 t10 := m & fieldBaseMask
1315 // Terms for 2^(fieldBase*11).
1316 m = (m >> fieldBase) +
1317 2*uint64(val.n[2])*uint64(val.n[9]) +
1318 2*uint64(val.n[3])*uint64(val.n[8]) +
1319 2*uint64(val.n[4])*uint64(val.n[7]) +
1320 2*uint64(val.n[5])*uint64(val.n[6])
1321 t11 := m & fieldBaseMask
1322 // Terms for 2^(fieldBase*12).
1323 m = (m >> fieldBase) +
1324 2*uint64(val.n[3])*uint64(val.n[9]) +
1325 2*uint64(val.n[4])*uint64(val.n[8]) +
1326 2*uint64(val.n[5])*uint64(val.n[7]) +
1327 uint64(val.n[6])*uint64(val.n[6])
1328 t12 := m & fieldBaseMask
1329 // Terms for 2^(fieldBase*13).
1330 m = (m >> fieldBase) +
1331 2*uint64(val.n[4])*uint64(val.n[9]) +
1332 2*uint64(val.n[5])*uint64(val.n[8]) +
1333 2*uint64(val.n[6])*uint64(val.n[7])
1334 t13 := m & fieldBaseMask
1335 // Terms for 2^(fieldBase*14).
1336 m = (m >> fieldBase) +
1337 2*uint64(val.n[5])*uint64(val.n[9]) +
1338 2*uint64(val.n[6])*uint64(val.n[8]) +
1339 uint64(val.n[7])*uint64(val.n[7])
1340 t14 := m & fieldBaseMask
1341 // Terms for 2^(fieldBase*15).
1342 m = (m >> fieldBase) +
1343 2*uint64(val.n[6])*uint64(val.n[9]) +
1344 2*uint64(val.n[7])*uint64(val.n[8])
1345 t15 := m & fieldBaseMask
1346 // Terms for 2^(fieldBase*16).
1347 m = (m >> fieldBase) +
1348 2*uint64(val.n[7])*uint64(val.n[9]) +
1349 uint64(val.n[8])*uint64(val.n[8])
1350 t16 := m & fieldBaseMask
1351 // Terms for 2^(fieldBase*17).
1352 m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
1353 t17 := m & fieldBaseMask
1354 // Terms for 2^(fieldBase*18).
1355 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
1356 t18 := m & fieldBaseMask
1357 // What's left is for 2^(fieldBase*19).
1358 t19 := m >> fieldBase
1359 // At this point, all of the terms are grouped into their respective
1360 // base.
1361 //
1362 // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
1363 // when the modulus is of the special form m = b^t - c, highly efficient
1364 // reduction can be achieved per the provided algorithm.
1365 //
1366 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
1367 // this criteria.
1368 //
1369 // 4294968273 in field representation (base 2^26) is:
1370 // n[0] = 977
1371 // n[1] = 64
1372 // That is to say (2^26 * 64) + 977 = 4294968273
1373 //
1374 // Since each word is in base 26, the upper terms (t10 and up) start
1375 // at 260 bits (versus the final desired range of 256 bits), so the
1376 // field representation of 'c' from above needs to be adjusted for the
1377 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 =
1378 // 68719492368. Thus, the adjusted field representation of 'c' is:
1379 // n[0] = 977 * 16 = 15632
1380 // n[1] = 64 * 16 = 1024
1381 // That is to say (2^26 * 1024) + 15632 = 68719492368
1382 //
1383 // To reduce the final term, t19, the entire 'c' value is needed instead
1384 // of only n[0] because there are no more terms left to handle n[1].
1385 // This means there might be some magnitude left in the upper bits that
1386 // is handled below.
1387 m = t0 + t10*15632
1388 t0 = m & fieldBaseMask
1389 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
1390 t1 = m & fieldBaseMask
1391 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
1392 t2 = m & fieldBaseMask
1393 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
1394 t3 = m & fieldBaseMask
1395 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
1396 t4 = m & fieldBaseMask
1397 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
1398 t5 = m & fieldBaseMask
1399 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
1400 t6 = m & fieldBaseMask
1401 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
1402 t7 = m & fieldBaseMask
1403 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
1404 t8 = m & fieldBaseMask
1405 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
1406 t9 = m & fieldMSBMask
1407 m = m >> fieldMSBBits
1408 // At this point, if the magnitude is greater than 0, the overall value
1409 // is greater than the max possible 256-bit value. In particular, it is
1410 // "how many times larger" than the max value it is.
1411 //
1412 // The algorithm presented in [HAC] section 14.3.4 repeats until the
1413 // quotient is zero. However, due to the above, we already know at
1414 // least how many times we would need to repeat as it's the value
1415 // currently in m. Thus we can simply multiply the magnitude by the
1416 // field representation of the prime and do a single iteration. Notice
1417 // that nothing will be changed when the magnitude is zero, so we could
1418 // skip this in that case, however always running regardless allows it
1419 // to run in constant time. The final result will be in the range
1420 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
1421 // magnitude of 1, but it is denormalized.
1422 n := t0 + m*977
1423 f.n[0] = uint32(n & fieldBaseMask)
1424 n = (n >> fieldBase) + t1 + m*64
1425 f.n[1] = uint32(n & fieldBaseMask)
1426 f.n[2] = uint32((n >> fieldBase) + t2)
1427 f.n[3] = uint32(t3)
1428 f.n[4] = uint32(t4)
1429 f.n[5] = uint32(t5)
1430 f.n[6] = uint32(t6)
1431 f.n[7] = uint32(t7)
1432 f.n[8] = uint32(t8)
1433 f.n[9] = uint32(t9)
1434 return f
1435 }
1436 1437 // Inverse finds the modular multiplicative inverse of the field value in
1438 // constant time. The existing field value is modified.
1439 //
1440 // The field value is returned to support chaining. This enables syntax like:
1441 // f.Inverse().Mul(f2) so that f = f^-1 * f2.
1442 //
1443 // Preconditions:
1444 // - The field value MUST have a max magnitude of 8
1445 // Output Normalized: No
1446 // Output Max Magnitude: 1
1447 func (f *FieldVal) Inverse() *FieldVal {
1448 // Fermat's little theorem states that for a nonzero number a and prime
1449 // p, a^(p-1) ≡ 1 (mod p). Since the multiplicative inverse is
1450 // a*b ≡ 1 (mod p), it follows that b ≡ a*a^(p-2) ≡ a^(p-1) ≡ 1 (mod p).
1451 // Thus, a^(p-2) is the multiplicative inverse.
1452 //
1453 // In order to efficiently compute a^(p-2), p-2 needs to be split into
1454 // a sequence of squares and multiplications that minimizes the number
1455 // of multiplications needed (since they are more costly than
1456 // squarings). Intermediate results are saved and reused as well.
1457 //
1458 // The secp256k1 prime - 2 is 2^256 - 4294968275.
1459 //
1460 // This has a cost of 258 field squarings and 33 field multiplications.
1461 var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 FieldVal
1462 a2.SquareVal(f)
1463 a3.Mul2(&a2, f)
1464 a4.SquareVal(&a2)
1465 a10.SquareVal(&a4).Mul(&a2)
1466 a11.Mul2(&a10, f)
1467 a21.Mul2(&a10, &a11)
1468 a42.SquareVal(&a21)
1469 a45.Mul2(&a42, &a3)
1470 a63.Mul2(&a42, &a21)
1471 a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11)
1472 a1023.Mul2(&a1019, &a4)
1473 f.Set(&a63) // f = a^(2^6 - 1)
1474 f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32)
1475 f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024)
1476 f.Mul(&a1023) // f = a^(2^16 - 1)
1477 f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32)
1478 f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024)
1479 f.Mul(&a1023) // f = a^(2^26 - 1)
1480 f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32)
1481 f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024)
1482 f.Mul(&a1023) // f = a^(2^36 - 1)
1483 f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32)
1484 f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024)
1485 f.Mul(&a1023) // f = a^(2^46 - 1)
1486 f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32)
1487 f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024)
1488 f.Mul(&a1023) // f = a^(2^56 - 1)
1489 f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32)
1490 f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024)
1491 f.Mul(&a1023) // f = a^(2^66 - 1)
1492 f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32)
1493 f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024)
1494 f.Mul(&a1023) // f = a^(2^76 - 1)
1495 f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32)
1496 f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024)
1497 f.Mul(&a1023) // f = a^(2^86 - 1)
1498 f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32)
1499 f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024)
1500 f.Mul(&a1023) // f = a^(2^96 - 1)
1501 f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32)
1502 f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024)
1503 f.Mul(&a1023) // f = a^(2^106 - 1)
1504 f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32)
1505 f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024)
1506 f.Mul(&a1023) // f = a^(2^116 - 1)
1507 f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32)
1508 f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024)
1509 f.Mul(&a1023) // f = a^(2^126 - 1)
1510 f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32)
1511 f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024)
1512 f.Mul(&a1023) // f = a^(2^136 - 1)
1513 f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32)
1514 f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024)
1515 f.Mul(&a1023) // f = a^(2^146 - 1)
1516 f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32)
1517 f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024)
1518 f.Mul(&a1023) // f = a^(2^156 - 1)
1519 f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32)
1520 f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024)
1521 f.Mul(&a1023) // f = a^(2^166 - 1)
1522 f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32)
1523 f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024)
1524 f.Mul(&a1023) // f = a^(2^176 - 1)
1525 f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32)
1526 f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024)
1527 f.Mul(&a1023) // f = a^(2^186 - 1)
1528 f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32)
1529 f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024)
1530 f.Mul(&a1023) // f = a^(2^196 - 1)
1531 f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32)
1532 f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024)
1533 f.Mul(&a1023) // f = a^(2^206 - 1)
1534 f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32)
1535 f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024)
1536 f.Mul(&a1023) // f = a^(2^216 - 1)
1537 f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32)
1538 f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024)
1539 f.Mul(&a1019) // f = a^(2^226 - 5)
1540 f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160)
1541 f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120)
1542 f.Mul(&a1023) // f = a^(2^236 - 4097)
1543 f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104)
1544 f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328)
1545 f.Mul(&a1023) // f = a^(2^246 - 4194305)
1546 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760)
1547 f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320)
1548 return f.Mul(&a45) // f = a^(2^256 - 4294968275) = a^(p-2)
1549 }
1550 1551 // IsGtOrEqPrimeMinusOrder returns whether the field value exceeds the
1552 // group order divided by 2 in constant time.
1553 //
1554 // Preconditions:
1555 // - The field value MUST be normalized
1556 func (f *FieldVal) IsGtOrEqPrimeMinusOrder() bool {
1557 // The secp256k1 prime is equivalent to 2^256 - 4294968273 and the group
1558 // order is 2^256 - 432420386565659656852420866394968145599. Thus,
1559 // the prime minus the group order is:
1560 // 432420386565659656852420866390673177326
1561 //
1562 // In hex that is:
1563 // 0x00000000 00000000 00000000 00000001 45512319 50b75fc4 402da172 2fc9baee
1564 //
1565 // Converting that to field representation (base 2^26) is:
1566 //
1567 // n[0] = 0x03c9baee
1568 // n[1] = 0x03685c8b
1569 // n[2] = 0x01fc4402
1570 // n[3] = 0x006542dd
1571 // n[4] = 0x01455123
1572 //
1573 // This can be verified with the following test code:
1574 // pMinusN := new(big.Int).Sub(curveParams.P, curveParams.N)
1575 // var fv FieldVal
1576 // fv.SetByteSlice(pMinusN.Bytes())
1577 // t.Logf("%x", fv.n)
1578 //
1579 // Outputs: [3c9baee 3685c8b 1fc4402 6542dd 1455123 0 0 0 0 0]
1580 const (
1581 pMinusNWordZero = 0x03c9baee
1582 pMinusNWordOne = 0x03685c8b
1583 pMinusNWordTwo = 0x01fc4402
1584 pMinusNWordThree = 0x006542dd
1585 pMinusNWordFour = 0x01455123
1586 pMinusNWordFive = 0x00000000
1587 pMinusNWordSix = 0x00000000
1588 pMinusNWordSeven = 0x00000000
1589 pMinusNWordEight = 0x00000000
1590 pMinusNWordNine = 0x00000000
1591 )
1592 // The intuition here is that the value is greater than field prime minus
1593 // the group order if one of the higher individual words is greater than the
1594 // corresponding word and all higher words in the value are equal.
1595 result := constantTimeGreater(f.n[9], pMinusNWordNine)
1596 highWordsEqual := constantTimeEq(f.n[9], pMinusNWordNine)
1597 result |= highWordsEqual & constantTimeGreater(f.n[8], pMinusNWordEight)
1598 highWordsEqual &= constantTimeEq(f.n[8], pMinusNWordEight)
1599 result |= highWordsEqual & constantTimeGreater(f.n[7], pMinusNWordSeven)
1600 highWordsEqual &= constantTimeEq(f.n[7], pMinusNWordSeven)
1601 result |= highWordsEqual & constantTimeGreater(f.n[6], pMinusNWordSix)
1602 highWordsEqual &= constantTimeEq(f.n[6], pMinusNWordSix)
1603 result |= highWordsEqual & constantTimeGreater(f.n[5], pMinusNWordFive)
1604 highWordsEqual &= constantTimeEq(f.n[5], pMinusNWordFive)
1605 result |= highWordsEqual & constantTimeGreater(f.n[4], pMinusNWordFour)
1606 highWordsEqual &= constantTimeEq(f.n[4], pMinusNWordFour)
1607 result |= highWordsEqual & constantTimeGreater(f.n[3], pMinusNWordThree)
1608 highWordsEqual &= constantTimeEq(f.n[3], pMinusNWordThree)
1609 result |= highWordsEqual & constantTimeGreater(f.n[2], pMinusNWordTwo)
1610 highWordsEqual &= constantTimeEq(f.n[2], pMinusNWordTwo)
1611 result |= highWordsEqual & constantTimeGreater(f.n[1], pMinusNWordOne)
1612 highWordsEqual &= constantTimeEq(f.n[1], pMinusNWordOne)
1613 result |= highWordsEqual & constantTimeGreaterOrEq(f.n[0], pMinusNWordZero)
1614 return result != 0
1615 }
1616