1 // Copyright (c) 2013-2014 The btcsuite developers
2 // Copyright (c) 2015-2024 The Decred developers
3 // Copyright (c) 2013-2024 Dave Collins
4 // Use of this source code is governed by an ISC
5 // license that can be found in the LICENSE file.
6 7 package secp256k1
8 9 // References:
10 // [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
11 // http://cacr.uwaterloo.ca/hac/
12 13 // All elliptic curve operations for secp256k1 are done in a finite field
14 // characterized by a 256-bit prime. Given this precision is larger than the
15 // biggest available native type, obviously some form of bignum math is needed.
16 // This package implements specialized fixed-precision field arithmetic rather
17 // than relying on an arbitrary-precision arithmetic package such as math/big
18 // for dealing with the field math since the size is known. As a result, rather
19 // large performance gains are achieved by taking advantage of many
20 // optimizations not available to arbitrary-precision arithmetic and generic
21 // modular arithmetic algorithms.
22 //
23 // There are various ways to internally represent each finite field element.
24 // For example, the most obvious representation would be to use an array of 4
25 // uint64s (64 bits * 4 = 256 bits). However, that representation suffers from
26 // a couple of issues. First, there is no native Go type large enough to handle
27 // the intermediate results while adding or multiplying two 64-bit numbers, and
28 // second there is no space left for overflows when performing the intermediate
29 // arithmetic between each array element which would lead to expensive carry
30 // propagation.
31 //
32 // Given the above, this implementation represents the field elements as
33 // 10 uint32s with each word (array entry) treated as base 2^26. This was
34 // chosen for the following reasons:
35 // 1) Most systems at the current time are 64-bit (or at least have 64-bit
36 // registers available for specialized purposes such as MMX) so the
37 // intermediate results can typically be done using a native register (and
38 // using uint64s to avoid the need for additional half-word arithmetic)
39 // 2) In order to allow addition of the internal words without having to
40 // propagate the carry, the max normalized value for each register must
41 // be less than the number of bits available in the register
42 // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
43 // reasonable choice for #2
44 // 4) Given the need for 256-bits of precision and the properties stated in #1,
45 // #2, and #3, the representation which best accommodates this is 10 uint32s
46 // with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
47 // bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
48 // overflow
49 //
50 // Since it is so important that the field arithmetic is extremely fast for high
51 // performance crypto, this type does not perform any validation where it
52 // ordinarily would. See the documentation for FieldVal for more details.
53 54 import (
55 "encoding/hex"
56 )
57 58 // Constants used to make the code more readable.
59 const (
60 twoBitsMask = 0x3
61 fourBitsMask = 0xf
62 sixBitsMask = 0x3f
63 eightBitsMask = 0xff
64 )
65 66 // Constants related to the field representation.
67 const (
68 // fieldWords is the number of words used to internally represent the
69 // 256-bit value.
70 fieldWords = 10
71 72 // fieldBase is the exponent used to form the numeric base of each word.
73 // 2^(fieldBase*i) where i is the word position.
74 fieldBase = 26
75 76 // fieldBaseMask is the mask for the bits in each word needed to
77 // represent the numeric base of each word (except the most significant
78 // word).
79 fieldBaseMask = (1 << fieldBase) - 1
80 81 // fieldMSBBits is the number of bits in the most significant word used
82 // to represent the value.
83 fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
84 85 // fieldMSBMask is the mask for the bits in the most significant word
86 // needed to represent the value.
87 fieldMSBMask = (1 << fieldMSBBits) - 1
88 89 // These fields provide convenient access to each of the words of the
90 // secp256k1 prime in the internal field representation to improve code
91 // readability.
92 fieldPrimeWordZero = 0x03fffc2f
93 fieldPrimeWordOne = 0x03ffffbf
94 fieldPrimeWordTwo = 0x03ffffff
95 fieldPrimeWordThree = 0x03ffffff
96 fieldPrimeWordFour = 0x03ffffff
97 fieldPrimeWordFive = 0x03ffffff
98 fieldPrimeWordSix = 0x03ffffff
99 fieldPrimeWordSeven = 0x03ffffff
100 fieldPrimeWordEight = 0x03ffffff
101 fieldPrimeWordNine = 0x003fffff
102 )
103 104 // FieldVal implements optimized fixed-precision arithmetic over the
105 // secp256k1 finite field. This means all arithmetic is performed modulo
106 //
107 // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.
108 //
109 // WARNING: Since it is so important for the field arithmetic to be extremely
110 // fast for high performance crypto, this type does not perform any validation
111 // of documented preconditions where it ordinarily would. As a result, it is
112 // IMPERATIVE for callers to understand some key concepts that are described
113 // below and ensure the methods are called with the necessary preconditions that
114 // each method is documented with. For example, some methods only give the
115 // correct result if the field value is normalized and others require the field
116 // values involved to have a maximum magnitude and THERE ARE NO EXPLICIT CHECKS
117 // TO ENSURE THOSE PRECONDITIONS ARE SATISFIED. This does, unfortunately, make
118 // the type more difficult to use correctly and while I typically prefer to
119 // ensure all state and input is valid for most code, this is a bit of an
120 // exception because those extra checks really add up in what ends up being
121 // critical hot paths.
122 //
123 // The first key concept when working with this type is normalization. In order
124 // to avoid the need to propagate a ton of carries, the internal representation
125 // provides additional overflow bits for each word of the overall 256-bit value.
126 // This means that there are multiple internal representations for the same
127 // value and, as a result, any methods that rely on comparison of the value,
128 // such as equality and oddness determination, require the caller to provide a
129 // normalized value.
130 //
131 // The second key concept when working with this type is magnitude. As
132 // previously mentioned, the internal representation provides additional
133 // overflow bits which means that the more math operations that are performed on
134 // the field value between normalizations, the more those overflow bits
135 // accumulate. The magnitude is effectively that maximum possible number of
136 // those overflow bits that could possibly be required as a result of a given
137 // operation. Since there are only a limited number of overflow bits available,
138 // this implies that the max possible magnitude MUST be tracked by the caller
139 // and the caller MUST normalize the field value if a given operation would
140 // cause the magnitude of the result to exceed the max allowed value.
141 //
142 // IMPORTANT: The max allowed magnitude of a field value is 32.
143 type FieldVal struct {
144 // Each 256-bit value is represented as 10 32-bit integers in base 2^26.
145 // This provides 6 bits of overflow in each word (10 bits in the most
146 // significant word) for a total of 64 bits of overflow (9*6 + 10 = 64). It
147 // only implements the arithmetic needed for elliptic curve operations.
148 //
149 // The following depicts the internal representation:
150 // -----------------------------------------------------------------
151 // | n[9] | n[8] | ... | n[0] |
152 // | 32 bits available | 32 bits available | ... | 32 bits available |
153 // | 22 bits for value | 26 bits for value | ... | 26 bits for value |
154 // | 10 bits overflow | 6 bits overflow | ... | 6 bits overflow |
155 // | Mult: 2^(26*9) | Mult: 2^(26*8) | ... | Mult: 2^(26*0) |
156 // -----------------------------------------------------------------
157 //
158 // For example, consider the number 2^49 + 1. It would be represented as:
159 // n[0] = 1
160 // n[1] = 2^23
161 // n[2..9] = 0
162 //
163 // The full 256-bit value is then calculated by looping i from 9..0 and
164 // doing sum(n[i] * 2^(26i)) like so:
165 // n[9] * 2^(26*9) = 0 * 2^234 = 0
166 // n[8] * 2^(26*8) = 0 * 2^208 = 0
167 // ...
168 // n[1] * 2^(26*1) = 2^23 * 2^26 = 2^49
169 // n[0] * 2^(26*0) = 1 * 2^0 = 1
170 // Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
171 n [10]uint32
172 }
173 174 // String returns the field value as a normalized human-readable hex string.
175 //
176 // Preconditions: None
177 // Output Normalized: Field is not modified -- same as input value
178 // Output Max Magnitude: Field is not modified -- same as input value
179 func (f FieldVal) String() string {
180 // f is a copy, so it's safe to normalize it without mutating the original.
181 f.Normalize()
182 return hex.EncodeToString(f.Bytes()[:])
183 }
184 185 // Zero sets the field value to zero in constant time. A newly created field
186 // value is already set to zero. This function can be useful to clear an
187 // existing field value for reuse.
188 //
189 // Preconditions: None
190 // Output Normalized: Yes
191 // Output Max Magnitude: 1
192 func (f *FieldVal) Zero() {
193 f.n[0] = 0
194 f.n[1] = 0
195 f.n[2] = 0
196 f.n[3] = 0
197 f.n[4] = 0
198 f.n[5] = 0
199 f.n[6] = 0
200 f.n[7] = 0
201 f.n[8] = 0
202 f.n[9] = 0
203 }
204 205 // Set sets the field value equal to the passed value in constant time. The
206 // normalization and magnitude of the two fields will be identical.
207 //
208 // The field value is returned to support chaining. This enables syntax like:
209 // f := new(FieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
210 // modified.
211 //
212 // Preconditions: None
213 // Output Normalized: Same as input value
214 // Output Max Magnitude: Same as input value
215 func (f *FieldVal) Set(val *FieldVal) *FieldVal {
216 *f = *val
217 return f
218 }
219 220 // SetInt sets the field value to the passed integer in constant time. This is
221 // a convenience function since it is fairly common to perform some arithmetic
222 // with small native integers.
223 //
224 // The field value is returned to support chaining. This enables syntax such
225 // as f := new(FieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
226 //
227 // Preconditions: None
228 // Output Normalized: Yes
229 // Output Max Magnitude: 1
230 func (f *FieldVal) SetInt(ui uint16) *FieldVal {
231 f.Zero()
232 f.n[0] = uint32(ui)
233 return f
234 }
235 236 // SetBytes packs the passed 32-byte big-endian value into the internal field
237 // value representation in constant time. SetBytes interprets the provided
238 // array as a 256-bit big-endian unsigned integer, packs it into the internal
239 // field value representation, and returns either 1 if it is greater than or
240 // equal to the field prime (aka it overflowed) or 0 otherwise in constant time.
241 //
242 // Note that a bool is not used here because it is not possible in Go to convert
243 // from a bool to numeric value in constant time and many constant-time
244 // operations require a numeric value.
245 //
246 // Preconditions: None
247 // Output Normalized: Yes if no overflow, no otherwise
248 // Output Max Magnitude: 1
249 func (f *FieldVal) SetBytes(b *[32]byte) uint32 {
250 // Pack the 256 total bits across the 10 uint32 words with a max of
251 // 26-bits per word. This could be done with a couple of for loops,
252 // but this unrolled version is significantly faster. Benchmarks show
253 // this is about 34 times faster than the variant which uses loops.
254 f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
255 (uint32(b[28])&twoBitsMask)<<24
256 f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
257 (uint32(b[25])&fourBitsMask)<<22
258 f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
259 (uint32(b[22])&sixBitsMask)<<20
260 f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
261 uint32(b[19])<<18
262 f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
263 (uint32(b[15])&twoBitsMask)<<24
264 f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
265 (uint32(b[12])&fourBitsMask)<<22
266 f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
267 (uint32(b[9])&sixBitsMask)<<20
268 f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
269 uint32(b[6])<<18
270 f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
271 (uint32(b[2])&twoBitsMask)<<24
272 f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
273 274 // The intuition here is that the field value is greater than the prime if
275 // one of the higher individual words is greater than corresponding word of
276 // the prime and all higher words in the field value are equal to their
277 // corresponding word of the prime. Since this type is modulo the prime,
278 // being equal is also an overflow back to 0.
279 //
280 // Note that because the input is 32 bytes and it was just packed into the
281 // field representation, the only words that can possibly be greater are
282 // zero and one, because ceil(log_2(2^256 - 1 - P)) = 33 bits max and the
283 // internal field representation encodes 26 bits with each word.
284 //
285 // Thus, there is no need to test if the upper words of the field value
286 // exceeds them, hence, only equality is checked for them.
287 highWordsEq := constantTimeEq(f.n[9], fieldPrimeWordNine)
288 highWordsEq &= constantTimeEq(f.n[8], fieldPrimeWordEight)
289 highWordsEq &= constantTimeEq(f.n[7], fieldPrimeWordSeven)
290 highWordsEq &= constantTimeEq(f.n[6], fieldPrimeWordSix)
291 highWordsEq &= constantTimeEq(f.n[5], fieldPrimeWordFive)
292 highWordsEq &= constantTimeEq(f.n[4], fieldPrimeWordFour)
293 highWordsEq &= constantTimeEq(f.n[3], fieldPrimeWordThree)
294 highWordsEq &= constantTimeEq(f.n[2], fieldPrimeWordTwo)
295 overflow := highWordsEq & constantTimeGreater(f.n[1], fieldPrimeWordOne)
296 highWordsEq &= constantTimeEq(f.n[1], fieldPrimeWordOne)
297 overflow |= highWordsEq & constantTimeGreaterOrEq(f.n[0], fieldPrimeWordZero)
298 299 return overflow
300 }
301 302 // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
303 // integer (meaning it is truncated to the first 32 bytes), packs it into the
304 // internal field value representation, and returns whether or not the resulting
305 // truncated 256-bit integer is greater than or equal to the field prime (aka it
306 // overflowed) in constant time.
307 //
308 // Note that since passing a slice with more than 32 bytes is truncated, it is
309 // possible that the truncated value is less than the field prime and hence it
310 // will not be reported as having overflowed in that case. It is up to the
311 // caller to decide whether it needs to provide numbers of the appropriate size
312 // or it if is acceptable to use this function with the described truncation and
313 // overflow behavior.
314 //
315 // Preconditions: None
316 // Output Normalized: Yes if no overflow, no otherwise
317 // Output Max Magnitude: 1
318 func (f *FieldVal) SetByteSlice(b []byte) bool {
319 var b32 [32]byte
320 b = b[:constantTimeMin(uint32(len(b)), 32)]
321 copy(b32[:], b32[:32-len(b)])
322 copy(b32[32-len(b):], b)
323 result := f.SetBytes(&b32)
324 zeroArray32(&b32)
325 return result != 0
326 }
327 328 // Normalize normalizes the internal field words into the desired range and
329 // performs fast modular reduction over the secp256k1 prime by making use of the
330 // special form of the prime in constant time.
331 //
332 // Preconditions: None
333 // Output Normalized: Yes
334 // Output Max Magnitude: 1
335 func (f *FieldVal) Normalize() *FieldVal {
336 // The field representation leaves 6 bits of overflow in each word so
337 // intermediate calculations can be performed without needing to
338 // propagate the carry to each higher word during the calculations. In
339 // order to normalize, we need to "compact" the full 256-bit value to
340 // the right while propagating any carries through to the high order
341 // word.
342 //
343 // Since this field is doing arithmetic modulo the secp256k1 prime, we
344 // also need to perform modular reduction over the prime.
345 //
346 // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
347 // when the modulus is of the special form m = b^t - c, highly efficient
348 // reduction can be achieved.
349 //
350 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
351 // this criteria.
352 //
353 // 4294968273 in field representation (base 2^26) is:
354 // n[0] = 977
355 // n[1] = 64
356 // That is to say (2^26 * 64) + 977 = 4294968273
357 //
358 // The algorithm presented in the referenced section typically repeats
359 // until the quotient is zero. However, due to our field representation
360 // we already know to within one reduction how many times we would need
361 // to repeat as it's the uppermost bits of the high order word. Thus we
362 // can simply multiply the magnitude by the field representation of the
363 // prime and do a single iteration. After this step there might be an
364 // additional carry to bit 256 (bit 22 of the high order word).
365 t9 := f.n[9]
366 m := t9 >> fieldMSBBits
367 t9 &= fieldMSBMask
368 t0 := f.n[0] + m*977
369 t1 := (t0 >> fieldBase) + f.n[1] + (m << 6)
370 t0 &= fieldBaseMask
371 t2 := (t1 >> fieldBase) + f.n[2]
372 t1 &= fieldBaseMask
373 t3 := (t2 >> fieldBase) + f.n[3]
374 t2 &= fieldBaseMask
375 t4 := (t3 >> fieldBase) + f.n[4]
376 t3 &= fieldBaseMask
377 t5 := (t4 >> fieldBase) + f.n[5]
378 t4 &= fieldBaseMask
379 t6 := (t5 >> fieldBase) + f.n[6]
380 t5 &= fieldBaseMask
381 t7 := (t6 >> fieldBase) + f.n[7]
382 t6 &= fieldBaseMask
383 t8 := (t7 >> fieldBase) + f.n[8]
384 t7 &= fieldBaseMask
385 t9 = (t8 >> fieldBase) + t9
386 t8 &= fieldBaseMask
387 388 // At this point, the magnitude is guaranteed to be one, however, the
389 // value could still be greater than the prime if there was either a
390 // carry through to bit 256 (bit 22 of the higher order word) or the
391 // value is greater than or equal to the field characteristic. The
392 // following determines if either or these conditions are true and does
393 // the final reduction in constant time.
394 //
395 // Also note that 'm' will be zero when neither of the aforementioned
396 // conditions are true and the value will not be changed when 'm' is zero.
397 m = constantTimeEq(t9, fieldMSBMask)
398 m &= constantTimeEq(t8&t7&t6&t5&t4&t3&t2, fieldBaseMask)
399 m &= constantTimeGreater(t1+64+((t0+977)>>fieldBase), fieldBaseMask)
400 m |= t9 >> fieldMSBBits
401 t0 += m * 977
402 t1 = (t0 >> fieldBase) + t1 + (m << 6)
403 t0 &= fieldBaseMask
404 t2 = (t1 >> fieldBase) + t2
405 t1 &= fieldBaseMask
406 t3 = (t2 >> fieldBase) + t3
407 t2 &= fieldBaseMask
408 t4 = (t3 >> fieldBase) + t4
409 t3 &= fieldBaseMask
410 t5 = (t4 >> fieldBase) + t5
411 t4 &= fieldBaseMask
412 t6 = (t5 >> fieldBase) + t6
413 t5 &= fieldBaseMask
414 t7 = (t6 >> fieldBase) + t7
415 t6 &= fieldBaseMask
416 t8 = (t7 >> fieldBase) + t8
417 t7 &= fieldBaseMask
418 t9 = (t8 >> fieldBase) + t9
419 t8 &= fieldBaseMask
420 t9 &= fieldMSBMask // Remove potential multiple of 2^256.
421 422 // Finally, set the normalized and reduced words.
423 f.n[0] = t0
424 f.n[1] = t1
425 f.n[2] = t2
426 f.n[3] = t3
427 f.n[4] = t4
428 f.n[5] = t5
429 f.n[6] = t6
430 f.n[7] = t7
431 f.n[8] = t8
432 f.n[9] = t9
433 return f
434 }
435 436 // PutBytesUnchecked unpacks the field value to a 32-byte big-endian value
437 // directly into the passed byte slice in constant time. The target slice must
438 // have at least 32 bytes available or it will panic.
439 //
440 // There is a similar function, PutBytes, which unpacks the field value into a
441 // 32-byte array directly. This version is provided since it can be useful
442 // to write directly into part of a larger buffer without needing a separate
443 // allocation.
444 //
445 // Preconditions:
446 // - The field value MUST be normalized
447 // - The target slice MUST have at least 32 bytes available
448 func (f *FieldVal) PutBytesUnchecked(b []byte) {
449 // Unpack the 256 total bits from the 10 uint32 words with a max of
450 // 26-bits per word. This could be done with a couple of for loops,
451 // but this unrolled version is a bit faster. Benchmarks show this is
452 // about 10 times faster than the variant which uses loops.
453 b[31] = byte(f.n[0] & eightBitsMask)
454 b[30] = byte((f.n[0] >> 8) & eightBitsMask)
455 b[29] = byte((f.n[0] >> 16) & eightBitsMask)
456 b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
457 b[27] = byte((f.n[1] >> 6) & eightBitsMask)
458 b[26] = byte((f.n[1] >> 14) & eightBitsMask)
459 b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
460 b[24] = byte((f.n[2] >> 4) & eightBitsMask)
461 b[23] = byte((f.n[2] >> 12) & eightBitsMask)
462 b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
463 b[21] = byte((f.n[3] >> 2) & eightBitsMask)
464 b[20] = byte((f.n[3] >> 10) & eightBitsMask)
465 b[19] = byte((f.n[3] >> 18) & eightBitsMask)
466 b[18] = byte(f.n[4] & eightBitsMask)
467 b[17] = byte((f.n[4] >> 8) & eightBitsMask)
468 b[16] = byte((f.n[4] >> 16) & eightBitsMask)
469 b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
470 b[14] = byte((f.n[5] >> 6) & eightBitsMask)
471 b[13] = byte((f.n[5] >> 14) & eightBitsMask)
472 b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
473 b[11] = byte((f.n[6] >> 4) & eightBitsMask)
474 b[10] = byte((f.n[6] >> 12) & eightBitsMask)
475 b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
476 b[8] = byte((f.n[7] >> 2) & eightBitsMask)
477 b[7] = byte((f.n[7] >> 10) & eightBitsMask)
478 b[6] = byte((f.n[7] >> 18) & eightBitsMask)
479 b[5] = byte(f.n[8] & eightBitsMask)
480 b[4] = byte((f.n[8] >> 8) & eightBitsMask)
481 b[3] = byte((f.n[8] >> 16) & eightBitsMask)
482 b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
483 b[1] = byte((f.n[9] >> 6) & eightBitsMask)
484 b[0] = byte((f.n[9] >> 14) & eightBitsMask)
485 }
486 487 // PutBytes unpacks the field value to a 32-byte big-endian value using the
488 // passed byte array in constant time.
489 //
490 // There is a similar function, PutBytesUnchecked, which unpacks the field value
491 // into a slice that must have at least 32 bytes available. This version is
492 // provided since it can be useful to write directly into an array that is type
493 // checked.
494 //
495 // Alternatively, there is also Bytes, which unpacks the field value into a new
496 // array and returns that which can sometimes be more ergonomic in applications
497 // that aren't concerned about an additional copy.
498 //
499 // Preconditions:
500 // - The field value MUST be normalized
501 func (f *FieldVal) PutBytes(b *[32]byte) {
502 f.PutBytesUnchecked(b[:])
503 }
504 505 // Bytes unpacks the field value to a 32-byte big-endian value in constant time.
506 //
507 // See PutBytes and PutBytesUnchecked for variants that allow an array or slice
508 // to be passed which can be useful to cut down on the number of allocations by
509 // allowing the caller to reuse a buffer or write directly into part of a larger
510 // buffer.
511 //
512 // Preconditions:
513 // - The field value MUST be normalized
514 func (f *FieldVal) Bytes() *[32]byte {
515 b := new([32]byte)
516 f.PutBytesUnchecked(b[:])
517 return b
518 }
519 520 // IsZeroBit returns 1 when the field value is equal to zero or 0 otherwise in
521 // constant time.
522 //
523 // Note that a bool is not used here because it is not possible in Go to convert
524 // from a bool to numeric value in constant time and many constant-time
525 // operations require a numeric value. See IsZero for the version that returns
526 // a bool.
527 //
528 // Preconditions:
529 // - The field value MUST be normalized
530 func (f *FieldVal) IsZeroBit() uint32 {
531 // The value can only be zero if no bits are set in any of the words.
532 // This is a constant time implementation.
533 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
534 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
535 536 return constantTimeEq(bits, 0)
537 }
538 539 // IsZero returns whether or not the field value is equal to zero in constant
540 // time.
541 //
542 // Preconditions:
543 // - The field value MUST be normalized
544 func (f *FieldVal) IsZero() bool {
545 // The value can only be zero if no bits are set in any of the words.
546 // This is a constant time implementation.
547 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
548 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
549 550 return bits == 0
551 }
552 553 // IsOneBit returns 1 when the field value is equal to one or 0 otherwise in
554 // constant time.
555 //
556 // Note that a bool is not used here because it is not possible in Go to convert
557 // from a bool to numeric value in constant time and many constant-time
558 // operations require a numeric value. See IsOne for the version that returns a
559 // bool.
560 //
561 // Preconditions:
562 // - The field value MUST be normalized
563 func (f *FieldVal) IsOneBit() uint32 {
564 // The value can only be one if the single lowest significant bit is set in
565 // the first word and no other bits are set in any of the other words.
566 // This is a constant time implementation.
567 bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] |
568 f.n[6] | f.n[7] | f.n[8] | f.n[9]
569 570 return constantTimeEq(bits, 0)
571 }
572 573 // IsOne returns whether or not the field value is equal to one in constant
574 // time.
575 //
576 // Preconditions:
577 // - The field value MUST be normalized
578 func (f *FieldVal) IsOne() bool {
579 // The value can only be one if the single lowest significant bit is set in
580 // the first word and no other bits are set in any of the other words.
581 // This is a constant time implementation.
582 bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] |
583 f.n[6] | f.n[7] | f.n[8] | f.n[9]
584 585 return bits == 0
586 }
587 588 // IsOddBit returns 1 when the field value is an odd number or 0 otherwise in
589 // constant time.
590 //
591 // Note that a bool is not used here because it is not possible in Go to convert
592 // from a bool to numeric value in constant time and many constant-time
593 // operations require a numeric value. See IsOdd for the version that returns a
594 // bool.
595 //
596 // Preconditions:
597 // - The field value MUST be normalized
598 func (f *FieldVal) IsOddBit() uint32 {
599 // Only odd numbers have the bottom bit set.
600 return f.n[0] & 1
601 }
602 603 // IsOdd returns whether or not the field value is an odd number in constant
604 // time.
605 //
606 // Preconditions:
607 // - The field value MUST be normalized
608 func (f *FieldVal) IsOdd() bool {
609 // Only odd numbers have the bottom bit set.
610 return f.n[0]&1 == 1
611 }
612 613 // Equals returns whether or not the two field values are the same in constant
614 // time.
615 //
616 // Preconditions:
617 // - Both field values being compared MUST be normalized
618 func (f *FieldVal) Equals(val *FieldVal) bool {
619 // Xor only sets bits when they are different, so the two field values
620 // can only be the same if no bits are set after xoring each word.
621 // This is a constant time implementation.
622 bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
623 (f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
624 (f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
625 (f.n[9] ^ val.n[9])
626 627 return bits == 0
628 }
629 630 // NegateVal negates the passed value and stores the result in f in constant
631 // time. The caller must provide the maximum magnitude of the passed value for
632 // a correct result.
633 //
634 // The field value is returned to support chaining. This enables syntax like:
635 // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
636 //
637 // Preconditions:
638 // - The max magnitude MUST be 31
639 // Output Normalized: No
640 // Output Max Magnitude: Input magnitude + 1
641 func (f *FieldVal) NegateVal(val *FieldVal, magnitude uint32) *FieldVal {
642 // Negation in the field is just the prime minus the value. However,
643 // in order to allow negation against a field value without having to
644 // normalize/reduce it first, multiply by the magnitude (that is how
645 // "far" away it is from the normalized value) to adjust. Also, since
646 // negating a value pushes it one more order of magnitude away from the
647 // normalized range, add 1 to compensate.
648 //
649 // For some intuition here, imagine you're performing mod 12 arithmetic
650 // (picture a clock) and you are negating the number 7. So you start at
651 // 12 (which is of course 0 under mod 12) and count backwards (left on
652 // the clock) 7 times to arrive at 5. Notice this is just 12-7 = 5.
653 // Now, assume you're starting with 19, which is a number that is
654 // already larger than the modulus and congruent to 7 (mod 12). When a
655 // value is already in the desired range, its magnitude is 1. Since 19
656 // is an additional "step", its magnitude (mod 12) is 2. Since any
657 // multiple of the modulus is congruent to zero (mod m), the answer can
658 // be shortcut by simply multiplying the magnitude by the modulus and
659 // subtracting. Keeping with the example, this would be (2*12)-19 = 5.
660 f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
661 f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
662 f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
663 f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
664 f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
665 f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
666 f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
667 f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
668 f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
669 f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
670 671 return f
672 }
673 674 // Negate negates the field value in constant time. The existing field value is
675 // modified. The caller must provide the maximum magnitude of the field value
676 // for a correct result.
677 //
678 // The field value is returned to support chaining. This enables syntax like:
679 // f.Negate().AddInt(1) so that f = -f + 1.
680 //
681 // Preconditions:
682 // - The max magnitude MUST be 31
683 // Output Normalized: No
684 // Output Max Magnitude: Input magnitude + 1
685 func (f *FieldVal) Negate(magnitude uint32) *FieldVal {
686 return f.NegateVal(f, magnitude)
687 }
688 689 // AddInt adds the passed integer to the existing field value and stores the
690 // result in f in constant time. This is a convenience function since it is
691 // fairly common to perform some arithmetic with small native integers.
692 //
693 // The field value is returned to support chaining. This enables syntax like:
694 // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
695 //
696 // Preconditions:
697 // - The field value MUST have a max magnitude of 31
698 // - The integer MUST be a max of 32767
699 // Output Normalized: No
700 // Output Max Magnitude: Existing field magnitude + 1
701 func (f *FieldVal) AddInt(ui uint16) *FieldVal {
702 // Since the field representation intentionally provides overflow bits,
703 // it's ok to use carryless addition as the carry bit is safely part of
704 // the word and will be normalized out.
705 f.n[0] += uint32(ui)
706 707 return f
708 }
709 710 // Add adds the passed value to the existing field value and stores the result
711 // in f in constant time.
712 //
713 // The field value is returned to support chaining. This enables syntax like:
714 // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
715 //
716 // Preconditions:
717 // - The sum of the magnitudes of the two field values MUST be a max of 32
718 // Output Normalized: No
719 // Output Max Magnitude: Sum of the magnitude of the two individual field values
720 func (f *FieldVal) Add(val *FieldVal) *FieldVal {
721 // Since the field representation intentionally provides overflow bits,
722 // it's ok to use carryless addition as the carry bit is safely part of
723 // each word and will be normalized out. This could obviously be done
724 // in a loop, but the unrolled version is faster.
725 f.n[0] += val.n[0]
726 f.n[1] += val.n[1]
727 f.n[2] += val.n[2]
728 f.n[3] += val.n[3]
729 f.n[4] += val.n[4]
730 f.n[5] += val.n[5]
731 f.n[6] += val.n[6]
732 f.n[7] += val.n[7]
733 f.n[8] += val.n[8]
734 f.n[9] += val.n[9]
735 736 return f
737 }
738 739 // Add2 adds the passed two field values together and stores the result in f in
740 // constant time.
741 //
742 // The field value is returned to support chaining. This enables syntax like:
743 // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
744 //
745 // Preconditions:
746 // - The sum of the magnitudes of the two field values MUST be a max of 32
747 // Output Normalized: No
748 // Output Max Magnitude: Sum of the magnitude of the two field values
749 func (f *FieldVal) Add2(val *FieldVal, val2 *FieldVal) *FieldVal {
750 // Since the field representation intentionally provides overflow bits,
751 // it's ok to use carryless addition as the carry bit is safely part of
752 // each word and will be normalized out. This could obviously be done
753 // in a loop, but the unrolled version is faster.
754 f.n[0] = val.n[0] + val2.n[0]
755 f.n[1] = val.n[1] + val2.n[1]
756 f.n[2] = val.n[2] + val2.n[2]
757 f.n[3] = val.n[3] + val2.n[3]
758 f.n[4] = val.n[4] + val2.n[4]
759 f.n[5] = val.n[5] + val2.n[5]
760 f.n[6] = val.n[6] + val2.n[6]
761 f.n[7] = val.n[7] + val2.n[7]
762 f.n[8] = val.n[8] + val2.n[8]
763 f.n[9] = val.n[9] + val2.n[9]
764 765 return f
766 }
767 768 // MulInt multiplies the field value by the passed int and stores the result in
769 // f in constant time. Note that this function can overflow if multiplying the
770 // value by any of the individual words exceeds a max uint32. Therefore it is
771 // important that the caller ensures no overflows will occur before using this
772 // function.
773 //
774 // The field value is returned to support chaining. This enables syntax like:
775 // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
776 //
777 // Preconditions:
778 // - The field value magnitude multiplied by given val MUST be a max of 32
779 // Output Normalized: No
780 // Output Max Magnitude: Existing field magnitude times the provided integer val
781 func (f *FieldVal) MulInt(val uint8) *FieldVal {
782 // Since each word of the field representation can hold up to
783 // 32 - fieldBase extra bits which will be normalized out, it's safe
784 // to multiply each word without using a larger type or carry
785 // propagation so long as the values won't overflow a uint32. This
786 // could obviously be done in a loop, but the unrolled version is
787 // faster.
788 ui := uint32(val)
789 f.n[0] *= ui
790 f.n[1] *= ui
791 f.n[2] *= ui
792 f.n[3] *= ui
793 f.n[4] *= ui
794 f.n[5] *= ui
795 f.n[6] *= ui
796 f.n[7] *= ui
797 f.n[8] *= ui
798 f.n[9] *= ui
799 800 return f
801 }
802 803 // Mul multiplies the passed value to the existing field value and stores the
804 // result in f in constant time. Note that this function can overflow if
805 // multiplying any of the individual words exceeds a max uint32. In practice,
806 // this means the magnitude of either value involved in the multiplication must
807 // be a max of 8.
808 //
809 // The field value is returned to support chaining. This enables syntax like:
810 // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
811 //
812 // Preconditions:
813 // - Both field values MUST have a max magnitude of 8
814 // Output Normalized: No
815 // Output Max Magnitude: 1
816 func (f *FieldVal) Mul(val *FieldVal) *FieldVal {
817 return f.Mul2(f, val)
818 }
819 820 // Mul2 multiplies the passed two field values together and stores the result in
821 // f in constant time. Note that this function can overflow if multiplying any
822 // of the individual words exceeds a max uint32. In practice, this means the
823 // magnitude of either value involved in the multiplication must be a max of 8.
824 //
825 // The field value is returned to support chaining. This enables syntax like:
826 // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
827 //
828 // Preconditions:
829 // - Both input field values MUST have a max magnitude of 8
830 // Output Normalized: No
831 // Output Max Magnitude: 1
832 func (f *FieldVal) Mul2(val *FieldVal, val2 *FieldVal) *FieldVal {
833 // This could be done with a couple of for loops and an array to store
834 // the intermediate terms, but this unrolled version is significantly
835 // faster.
836 837 // Terms for 2^(fieldBase*0).
838 m := uint64(val.n[0]) * uint64(val2.n[0])
839 t0 := m & fieldBaseMask
840 841 // Terms for 2^(fieldBase*1).
842 m = (m >> fieldBase) +
843 uint64(val.n[0])*uint64(val2.n[1]) +
844 uint64(val.n[1])*uint64(val2.n[0])
845 t1 := m & fieldBaseMask
846 847 // Terms for 2^(fieldBase*2).
848 m = (m >> fieldBase) +
849 uint64(val.n[0])*uint64(val2.n[2]) +
850 uint64(val.n[1])*uint64(val2.n[1]) +
851 uint64(val.n[2])*uint64(val2.n[0])
852 t2 := m & fieldBaseMask
853 854 // Terms for 2^(fieldBase*3).
855 m = (m >> fieldBase) +
856 uint64(val.n[0])*uint64(val2.n[3]) +
857 uint64(val.n[1])*uint64(val2.n[2]) +
858 uint64(val.n[2])*uint64(val2.n[1]) +
859 uint64(val.n[3])*uint64(val2.n[0])
860 t3 := m & fieldBaseMask
861 862 // Terms for 2^(fieldBase*4).
863 m = (m >> fieldBase) +
864 uint64(val.n[0])*uint64(val2.n[4]) +
865 uint64(val.n[1])*uint64(val2.n[3]) +
866 uint64(val.n[2])*uint64(val2.n[2]) +
867 uint64(val.n[3])*uint64(val2.n[1]) +
868 uint64(val.n[4])*uint64(val2.n[0])
869 t4 := m & fieldBaseMask
870 871 // Terms for 2^(fieldBase*5).
872 m = (m >> fieldBase) +
873 uint64(val.n[0])*uint64(val2.n[5]) +
874 uint64(val.n[1])*uint64(val2.n[4]) +
875 uint64(val.n[2])*uint64(val2.n[3]) +
876 uint64(val.n[3])*uint64(val2.n[2]) +
877 uint64(val.n[4])*uint64(val2.n[1]) +
878 uint64(val.n[5])*uint64(val2.n[0])
879 t5 := m & fieldBaseMask
880 881 // Terms for 2^(fieldBase*6).
882 m = (m >> fieldBase) +
883 uint64(val.n[0])*uint64(val2.n[6]) +
884 uint64(val.n[1])*uint64(val2.n[5]) +
885 uint64(val.n[2])*uint64(val2.n[4]) +
886 uint64(val.n[3])*uint64(val2.n[3]) +
887 uint64(val.n[4])*uint64(val2.n[2]) +
888 uint64(val.n[5])*uint64(val2.n[1]) +
889 uint64(val.n[6])*uint64(val2.n[0])
890 t6 := m & fieldBaseMask
891 892 // Terms for 2^(fieldBase*7).
893 m = (m >> fieldBase) +
894 uint64(val.n[0])*uint64(val2.n[7]) +
895 uint64(val.n[1])*uint64(val2.n[6]) +
896 uint64(val.n[2])*uint64(val2.n[5]) +
897 uint64(val.n[3])*uint64(val2.n[4]) +
898 uint64(val.n[4])*uint64(val2.n[3]) +
899 uint64(val.n[5])*uint64(val2.n[2]) +
900 uint64(val.n[6])*uint64(val2.n[1]) +
901 uint64(val.n[7])*uint64(val2.n[0])
902 t7 := m & fieldBaseMask
903 904 // Terms for 2^(fieldBase*8).
905 m = (m >> fieldBase) +
906 uint64(val.n[0])*uint64(val2.n[8]) +
907 uint64(val.n[1])*uint64(val2.n[7]) +
908 uint64(val.n[2])*uint64(val2.n[6]) +
909 uint64(val.n[3])*uint64(val2.n[5]) +
910 uint64(val.n[4])*uint64(val2.n[4]) +
911 uint64(val.n[5])*uint64(val2.n[3]) +
912 uint64(val.n[6])*uint64(val2.n[2]) +
913 uint64(val.n[7])*uint64(val2.n[1]) +
914 uint64(val.n[8])*uint64(val2.n[0])
915 t8 := m & fieldBaseMask
916 917 // Terms for 2^(fieldBase*9).
918 m = (m >> fieldBase) +
919 uint64(val.n[0])*uint64(val2.n[9]) +
920 uint64(val.n[1])*uint64(val2.n[8]) +
921 uint64(val.n[2])*uint64(val2.n[7]) +
922 uint64(val.n[3])*uint64(val2.n[6]) +
923 uint64(val.n[4])*uint64(val2.n[5]) +
924 uint64(val.n[5])*uint64(val2.n[4]) +
925 uint64(val.n[6])*uint64(val2.n[3]) +
926 uint64(val.n[7])*uint64(val2.n[2]) +
927 uint64(val.n[8])*uint64(val2.n[1]) +
928 uint64(val.n[9])*uint64(val2.n[0])
929 t9 := m & fieldBaseMask
930 931 // Terms for 2^(fieldBase*10).
932 m = (m >> fieldBase) +
933 uint64(val.n[1])*uint64(val2.n[9]) +
934 uint64(val.n[2])*uint64(val2.n[8]) +
935 uint64(val.n[3])*uint64(val2.n[7]) +
936 uint64(val.n[4])*uint64(val2.n[6]) +
937 uint64(val.n[5])*uint64(val2.n[5]) +
938 uint64(val.n[6])*uint64(val2.n[4]) +
939 uint64(val.n[7])*uint64(val2.n[3]) +
940 uint64(val.n[8])*uint64(val2.n[2]) +
941 uint64(val.n[9])*uint64(val2.n[1])
942 t10 := m & fieldBaseMask
943 944 // Terms for 2^(fieldBase*11).
945 m = (m >> fieldBase) +
946 uint64(val.n[2])*uint64(val2.n[9]) +
947 uint64(val.n[3])*uint64(val2.n[8]) +
948 uint64(val.n[4])*uint64(val2.n[7]) +
949 uint64(val.n[5])*uint64(val2.n[6]) +
950 uint64(val.n[6])*uint64(val2.n[5]) +
951 uint64(val.n[7])*uint64(val2.n[4]) +
952 uint64(val.n[8])*uint64(val2.n[3]) +
953 uint64(val.n[9])*uint64(val2.n[2])
954 t11 := m & fieldBaseMask
955 956 // Terms for 2^(fieldBase*12).
957 m = (m >> fieldBase) +
958 uint64(val.n[3])*uint64(val2.n[9]) +
959 uint64(val.n[4])*uint64(val2.n[8]) +
960 uint64(val.n[5])*uint64(val2.n[7]) +
961 uint64(val.n[6])*uint64(val2.n[6]) +
962 uint64(val.n[7])*uint64(val2.n[5]) +
963 uint64(val.n[8])*uint64(val2.n[4]) +
964 uint64(val.n[9])*uint64(val2.n[3])
965 t12 := m & fieldBaseMask
966 967 // Terms for 2^(fieldBase*13).
968 m = (m >> fieldBase) +
969 uint64(val.n[4])*uint64(val2.n[9]) +
970 uint64(val.n[5])*uint64(val2.n[8]) +
971 uint64(val.n[6])*uint64(val2.n[7]) +
972 uint64(val.n[7])*uint64(val2.n[6]) +
973 uint64(val.n[8])*uint64(val2.n[5]) +
974 uint64(val.n[9])*uint64(val2.n[4])
975 t13 := m & fieldBaseMask
976 977 // Terms for 2^(fieldBase*14).
978 m = (m >> fieldBase) +
979 uint64(val.n[5])*uint64(val2.n[9]) +
980 uint64(val.n[6])*uint64(val2.n[8]) +
981 uint64(val.n[7])*uint64(val2.n[7]) +
982 uint64(val.n[8])*uint64(val2.n[6]) +
983 uint64(val.n[9])*uint64(val2.n[5])
984 t14 := m & fieldBaseMask
985 986 // Terms for 2^(fieldBase*15).
987 m = (m >> fieldBase) +
988 uint64(val.n[6])*uint64(val2.n[9]) +
989 uint64(val.n[7])*uint64(val2.n[8]) +
990 uint64(val.n[8])*uint64(val2.n[7]) +
991 uint64(val.n[9])*uint64(val2.n[6])
992 t15 := m & fieldBaseMask
993 994 // Terms for 2^(fieldBase*16).
995 m = (m >> fieldBase) +
996 uint64(val.n[7])*uint64(val2.n[9]) +
997 uint64(val.n[8])*uint64(val2.n[8]) +
998 uint64(val.n[9])*uint64(val2.n[7])
999 t16 := m & fieldBaseMask
1000 1001 // Terms for 2^(fieldBase*17).
1002 m = (m >> fieldBase) +
1003 uint64(val.n[8])*uint64(val2.n[9]) +
1004 uint64(val.n[9])*uint64(val2.n[8])
1005 t17 := m & fieldBaseMask
1006 1007 // Terms for 2^(fieldBase*18).
1008 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
1009 t18 := m & fieldBaseMask
1010 1011 // What's left is for 2^(fieldBase*19).
1012 t19 := m >> fieldBase
1013 1014 // At this point, all of the terms are grouped into their respective
1015 // base.
1016 //
1017 // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
1018 // when the modulus is of the special form m = b^t - c, highly efficient
1019 // reduction can be achieved per the provided algorithm.
1020 //
1021 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
1022 // this criteria.
1023 //
1024 // 4294968273 in field representation (base 2^26) is:
1025 // n[0] = 977
1026 // n[1] = 64
1027 // That is to say (2^26 * 64) + 977 = 4294968273
1028 //
1029 // Since each word is in base 26, the upper terms (t10 and up) start
1030 // at 260 bits (versus the final desired range of 256 bits), so the
1031 // field representation of 'c' from above needs to be adjusted for the
1032 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 =
1033 // 68719492368. Thus, the adjusted field representation of 'c' is:
1034 // n[0] = 977 * 16 = 15632
1035 // n[1] = 64 * 16 = 1024
1036 // That is to say (2^26 * 1024) + 15632 = 68719492368
1037 //
1038 // To reduce the final term, t19, the entire 'c' value is needed instead
1039 // of only n[0] because there are no more terms left to handle n[1].
1040 // This means there might be some magnitude left in the upper bits that
1041 // is handled below.
1042 m = t0 + t10*15632
1043 t0 = m & fieldBaseMask
1044 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
1045 t1 = m & fieldBaseMask
1046 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
1047 t2 = m & fieldBaseMask
1048 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
1049 t3 = m & fieldBaseMask
1050 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
1051 t4 = m & fieldBaseMask
1052 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
1053 t5 = m & fieldBaseMask
1054 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
1055 t6 = m & fieldBaseMask
1056 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
1057 t7 = m & fieldBaseMask
1058 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
1059 t8 = m & fieldBaseMask
1060 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
1061 t9 = m & fieldMSBMask
1062 m >>= fieldMSBBits
1063 1064 // At this point, if the magnitude is greater than 0, the overall value
1065 // is greater than the max possible 256-bit value. In particular, it is
1066 // "how many times larger" than the max value it is.
1067 //
1068 // The algorithm presented in [HAC] section 14.3.4 repeats until the
1069 // quotient is zero. However, due to the above, we already know at
1070 // least how many times we would need to repeat as it's the value
1071 // currently in m. Thus we can simply multiply the magnitude by the
1072 // field representation of the prime and do a single iteration. Notice
1073 // that nothing will be changed when the magnitude is zero, so we could
1074 // skip this in that case, however always running regardless allows it
1075 // to run in constant time. The final result will be in the range
1076 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
1077 // magnitude of 1, but it is denormalized.
1078 d := t0 + m*977
1079 f.n[0] = uint32(d & fieldBaseMask)
1080 d = (d >> fieldBase) + t1 + m*64
1081 f.n[1] = uint32(d & fieldBaseMask)
1082 f.n[2] = uint32((d >> fieldBase) + t2)
1083 f.n[3] = uint32(t3)
1084 f.n[4] = uint32(t4)
1085 f.n[5] = uint32(t5)
1086 f.n[6] = uint32(t6)
1087 f.n[7] = uint32(t7)
1088 f.n[8] = uint32(t8)
1089 f.n[9] = uint32(t9)
1090 1091 return f
1092 }
1093 1094 // SquareRootVal either calculates the square root of the passed value when it
1095 // exists or the square root of the negation of the value when it does not exist
1096 // and stores the result in f in constant time. The return flag is true when
1097 // the calculated square root is for the passed value itself and false when it
1098 // is for its negation.
1099 //
1100 // Note that this function can overflow if multiplying any of the individual
1101 // words exceeds a max uint32. In practice, this means the magnitude of the
1102 // field must be a max of 8 to prevent overflow. The magnitude of the result
1103 // will be 1.
1104 //
1105 // Preconditions:
1106 // - The input field value MUST have a max magnitude of 8
1107 // Output Normalized: No
1108 // Output Max Magnitude: 1
1109 func (f *FieldVal) SquareRootVal(val *FieldVal) bool {
1110 // This uses the Tonelli-Shanks method for calculating the square root of
1111 // the value when it exists. The key principles of the method follow.
1112 //
1113 // Fermat's little theorem states that for a nonzero number 'a' and prime
1114 // 'p', a^(p-1) ≡ 1 (mod p).
1115 //
1116 // Further, Euler's criterion states that an integer 'a' has a square root
1117 // (aka is a quadratic residue) modulo a prime if a^((p-1)/2) ≡ 1 (mod p)
1118 // and, conversely, when it does NOT have a square root (aka 'a' is a
1119 // non-residue) a^((p-1)/2) ≡ -1 (mod p).
1120 //
1121 // This can be seen by considering that Fermat's little theorem can be
1122 // written as (a^((p-1)/2) - 1)(a^((p-1)/2) + 1) ≡ 0 (mod p). Therefore,
1123 // one of the two factors must be 0. Then, when a ≡ x^2 (aka 'a' is a
1124 // quadratic residue), (x^2)^((p-1)/2) ≡ x^(p-1) ≡ 1 (mod p) which implies
1125 // the first factor must be zero. Finally, per Lagrange's theorem, the
1126 // non-residues are the only remaining possible solutions and thus must make
1127 // the second factor zero to satisfy Fermat's little theorem implying that
1128 // a^((p-1)/2) ≡ -1 (mod p) for that case.
1129 //
1130 // The Tonelli-Shanks method uses these facts along with factoring out
1131 // powers of two to solve a congruence that results in either the solution
1132 // when the square root exists or the square root of the negation of the
1133 // value when it does not. In the case of primes that are ≡ 3 (mod 4), the
1134 // possible solutions are r = ±a^((p+1)/4) (mod p). Therefore, either r^2 ≡
1135 // a (mod p) is true in which case ±r are the two solutions, or r^2 ≡ -a
1136 // (mod p) in which case 'a' is a non-residue and there are no solutions.
1137 //
1138 // The secp256k1 prime is ≡ 3 (mod 4), so this result applies.
1139 //
1140 // In other words, calculate a^((p+1)/4) and then square it and check it
1141 // against the original value to determine if it is actually the square
1142 // root.
1143 //
1144 // In order to efficiently compute a^((p+1)/4), (p+1)/4 needs to be split
1145 // into a sequence of squares and multiplications that minimizes the number
1146 // of multiplications needed (since they are more costly than squarings).
1147 //
1148 // The secp256k1 prime + 1 / 4 is 2^254 - 2^30 - 244. In binary, that is:
1149 //
1150 // 00111111 11111111 11111111 11111111
1151 // 11111111 11111111 11111111 11111111
1152 // 11111111 11111111 11111111 11111111
1153 // 11111111 11111111 11111111 11111111
1154 // 11111111 11111111 11111111 11111111
1155 // 11111111 11111111 11111111 11111111
1156 // 11111111 11111111 11111111 11111111
1157 // 10111111 11111111 11111111 00001100
1158 //
1159 // Notice that can be broken up into three windows of consecutive 1s (in
1160 // order of least to most significant) as:
1161 //
1162 // 6-bit window with two bits set (bits 4, 5, 6, 7 unset)
1163 // 23-bit window with 22 bits set (bit 30 unset)
1164 // 223-bit window with all 223 bits set
1165 //
1166 // Thus, the groups of 1 bits in each window forms the set:
1167 // S = {2, 22, 223}.
1168 //
1169 // The strategy is to calculate a^(2^n - 1) for each grouping via an
1170 // addition chain with a sliding window.
1171 //
1172 // The addition chain used is (credits to Peter Dettman):
1173 // (0,0),(1,0),(2,2),(3,2),(4,1),(5,5),(6,6),(7,7),(8,8),(9,7),(10,2)
1174 // => 2^1 2^[2] 2^3 2^6 2^9 2^11 2^[22] 2^44 2^88 2^176 2^220 2^[223]
1175 //
1176 // This has a cost of 254 field squarings and 13 field multiplications.
1177 var a, a2, a3, a6, a9, a11, a22, a44, a88, a176, a220, a223 FieldVal
1178 a.Set(val)
1179 a2.SquareVal(&a).Mul(&a) // a2 = a^(2^2 - 1)
1180 a3.SquareVal(&a2).Mul(&a) // a3 = a^(2^3 - 1)
1181 a6.SquareVal(&a3).Square().Square() // a6 = a^(2^6 - 2^3)
1182 a6.Mul(&a3) // a6 = a^(2^6 - 1)
1183 a9.SquareVal(&a6).Square().Square() // a9 = a^(2^9 - 2^3)
1184 a9.Mul(&a3) // a9 = a^(2^9 - 1)
1185 a11.SquareVal(&a9).Square() // a11 = a^(2^11 - 2^2)
1186 a11.Mul(&a2) // a11 = a^(2^11 - 1)
1187 a22.SquareVal(&a11).Square().Square().Square().Square() // a22 = a^(2^16 - 2^5)
1188 a22.Square().Square().Square().Square().Square() // a22 = a^(2^21 - 2^10)
1189 a22.Square() // a22 = a^(2^22 - 2^11)
1190 a22.Mul(&a11) // a22 = a^(2^22 - 1)
1191 a44.SquareVal(&a22).Square().Square().Square().Square() // a44 = a^(2^27 - 2^5)
1192 a44.Square().Square().Square().Square().Square() // a44 = a^(2^32 - 2^10)
1193 a44.Square().Square().Square().Square().Square() // a44 = a^(2^37 - 2^15)
1194 a44.Square().Square().Square().Square().Square() // a44 = a^(2^42 - 2^20)
1195 a44.Square().Square() // a44 = a^(2^44 - 2^22)
1196 a44.Mul(&a22) // a44 = a^(2^44 - 1)
1197 a88.SquareVal(&a44).Square().Square().Square().Square() // a88 = a^(2^49 - 2^5)
1198 a88.Square().Square().Square().Square().Square() // a88 = a^(2^54 - 2^10)
1199 a88.Square().Square().Square().Square().Square() // a88 = a^(2^59 - 2^15)
1200 a88.Square().Square().Square().Square().Square() // a88 = a^(2^64 - 2^20)
1201 a88.Square().Square().Square().Square().Square() // a88 = a^(2^69 - 2^25)
1202 a88.Square().Square().Square().Square().Square() // a88 = a^(2^74 - 2^30)
1203 a88.Square().Square().Square().Square().Square() // a88 = a^(2^79 - 2^35)
1204 a88.Square().Square().Square().Square().Square() // a88 = a^(2^84 - 2^40)
1205 a88.Square().Square().Square().Square() // a88 = a^(2^88 - 2^44)
1206 a88.Mul(&a44) // a88 = a^(2^88 - 1)
1207 a176.SquareVal(&a88).Square().Square().Square().Square() // a176 = a^(2^93 - 2^5)
1208 a176.Square().Square().Square().Square().Square() // a176 = a^(2^98 - 2^10)
1209 a176.Square().Square().Square().Square().Square() // a176 = a^(2^103 - 2^15)
1210 a176.Square().Square().Square().Square().Square() // a176 = a^(2^108 - 2^20)
1211 a176.Square().Square().Square().Square().Square() // a176 = a^(2^113 - 2^25)
1212 a176.Square().Square().Square().Square().Square() // a176 = a^(2^118 - 2^30)
1213 a176.Square().Square().Square().Square().Square() // a176 = a^(2^123 - 2^35)
1214 a176.Square().Square().Square().Square().Square() // a176 = a^(2^128 - 2^40)
1215 a176.Square().Square().Square().Square().Square() // a176 = a^(2^133 - 2^45)
1216 a176.Square().Square().Square().Square().Square() // a176 = a^(2^138 - 2^50)
1217 a176.Square().Square().Square().Square().Square() // a176 = a^(2^143 - 2^55)
1218 a176.Square().Square().Square().Square().Square() // a176 = a^(2^148 - 2^60)
1219 a176.Square().Square().Square().Square().Square() // a176 = a^(2^153 - 2^65)
1220 a176.Square().Square().Square().Square().Square() // a176 = a^(2^158 - 2^70)
1221 a176.Square().Square().Square().Square().Square() // a176 = a^(2^163 - 2^75)
1222 a176.Square().Square().Square().Square().Square() // a176 = a^(2^168 - 2^80)
1223 a176.Square().Square().Square().Square().Square() // a176 = a^(2^173 - 2^85)
1224 a176.Square().Square().Square() // a176 = a^(2^176 - 2^88)
1225 a176.Mul(&a88) // a176 = a^(2^176 - 1)
1226 a220.SquareVal(&a176).Square().Square().Square().Square() // a220 = a^(2^181 - 2^5)
1227 a220.Square().Square().Square().Square().Square() // a220 = a^(2^186 - 2^10)
1228 a220.Square().Square().Square().Square().Square() // a220 = a^(2^191 - 2^15)
1229 a220.Square().Square().Square().Square().Square() // a220 = a^(2^196 - 2^20)
1230 a220.Square().Square().Square().Square().Square() // a220 = a^(2^201 - 2^25)
1231 a220.Square().Square().Square().Square().Square() // a220 = a^(2^206 - 2^30)
1232 a220.Square().Square().Square().Square().Square() // a220 = a^(2^211 - 2^35)
1233 a220.Square().Square().Square().Square().Square() // a220 = a^(2^216 - 2^40)
1234 a220.Square().Square().Square().Square() // a220 = a^(2^220 - 2^44)
1235 a220.Mul(&a44) // a220 = a^(2^220 - 1)
1236 a223.SquareVal(&a220).Square().Square() // a223 = a^(2^223 - 2^3)
1237 a223.Mul(&a3) // a223 = a^(2^223 - 1)
1238 1239 f.SquareVal(&a223).Square().Square().Square().Square() // f = a^(2^228 - 2^5)
1240 f.Square().Square().Square().Square().Square() // f = a^(2^233 - 2^10)
1241 f.Square().Square().Square().Square().Square() // f = a^(2^238 - 2^15)
1242 f.Square().Square().Square().Square().Square() // f = a^(2^243 - 2^20)
1243 f.Square().Square().Square() // f = a^(2^246 - 2^23)
1244 f.Mul(&a22) // f = a^(2^246 - 2^22 - 1)
1245 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 2^27 - 2^5)
1246 f.Square() // f = a^(2^252 - 2^28 - 2^6)
1247 f.Mul(&a2) // f = a^(2^252 - 2^28 - 2^6 - 2^1 - 1)
1248 f.Square().Square() // f = a^(2^254 - 2^30 - 2^8 - 2^3 - 2^2)
1249 // // = a^(2^254 - 2^30 - 244)
1250 // // = a^((p+1)/4)
1251 1252 // Ensure the calculated result is actually the square root by squaring it
1253 // and checking against the original value.
1254 var sqr FieldVal
1255 return sqr.SquareVal(f).Normalize().Equals(val.Normalize())
1256 }
1257 1258 // Square squares the field value in constant time. The existing field value is
1259 // modified. Note that this function can overflow if multiplying any of the
1260 // individual words exceeds a max uint32. In practice, this means the magnitude
1261 // of the field must be a max of 8 to prevent overflow.
1262 //
1263 // The field value is returned to support chaining. This enables syntax like:
1264 // f.Square().Mul(f2) so that f = f^2 * f2.
1265 //
1266 // Preconditions:
1267 // - The field value MUST have a max magnitude of 8
1268 // Output Normalized: No
1269 // Output Max Magnitude: 1
1270 func (f *FieldVal) Square() *FieldVal {
1271 return f.SquareVal(f)
1272 }
1273 1274 // SquareVal squares the passed value and stores the result in f in constant
1275 // time. Note that this function can overflow if multiplying any of the
1276 // individual words exceeds a max uint32. In practice, this means the magnitude
1277 // of the field being squared must be a max of 8 to prevent overflow.
1278 //
1279 // The field value is returned to support chaining. This enables syntax like:
1280 // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
1281 //
1282 // Preconditions:
1283 // - The input field value MUST have a max magnitude of 8
1284 // Output Normalized: No
1285 // Output Max Magnitude: 1
1286 func (f *FieldVal) SquareVal(val *FieldVal) *FieldVal {
1287 // This could be done with a couple of for loops and an array to store
1288 // the intermediate terms, but this unrolled version is significantly
1289 // faster.
1290 1291 // Terms for 2^(fieldBase*0).
1292 m := uint64(val.n[0]) * uint64(val.n[0])
1293 t0 := m & fieldBaseMask
1294 1295 // Terms for 2^(fieldBase*1).
1296 m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
1297 t1 := m & fieldBaseMask
1298 1299 // Terms for 2^(fieldBase*2).
1300 m = (m >> fieldBase) +
1301 2*uint64(val.n[0])*uint64(val.n[2]) +
1302 uint64(val.n[1])*uint64(val.n[1])
1303 t2 := m & fieldBaseMask
1304 1305 // Terms for 2^(fieldBase*3).
1306 m = (m >> fieldBase) +
1307 2*uint64(val.n[0])*uint64(val.n[3]) +
1308 2*uint64(val.n[1])*uint64(val.n[2])
1309 t3 := m & fieldBaseMask
1310 1311 // Terms for 2^(fieldBase*4).
1312 m = (m >> fieldBase) +
1313 2*uint64(val.n[0])*uint64(val.n[4]) +
1314 2*uint64(val.n[1])*uint64(val.n[3]) +
1315 uint64(val.n[2])*uint64(val.n[2])
1316 t4 := m & fieldBaseMask
1317 1318 // Terms for 2^(fieldBase*5).
1319 m = (m >> fieldBase) +
1320 2*uint64(val.n[0])*uint64(val.n[5]) +
1321 2*uint64(val.n[1])*uint64(val.n[4]) +
1322 2*uint64(val.n[2])*uint64(val.n[3])
1323 t5 := m & fieldBaseMask
1324 1325 // Terms for 2^(fieldBase*6).
1326 m = (m >> fieldBase) +
1327 2*uint64(val.n[0])*uint64(val.n[6]) +
1328 2*uint64(val.n[1])*uint64(val.n[5]) +
1329 2*uint64(val.n[2])*uint64(val.n[4]) +
1330 uint64(val.n[3])*uint64(val.n[3])
1331 t6 := m & fieldBaseMask
1332 1333 // Terms for 2^(fieldBase*7).
1334 m = (m >> fieldBase) +
1335 2*uint64(val.n[0])*uint64(val.n[7]) +
1336 2*uint64(val.n[1])*uint64(val.n[6]) +
1337 2*uint64(val.n[2])*uint64(val.n[5]) +
1338 2*uint64(val.n[3])*uint64(val.n[4])
1339 t7 := m & fieldBaseMask
1340 1341 // Terms for 2^(fieldBase*8).
1342 m = (m >> fieldBase) +
1343 2*uint64(val.n[0])*uint64(val.n[8]) +
1344 2*uint64(val.n[1])*uint64(val.n[7]) +
1345 2*uint64(val.n[2])*uint64(val.n[6]) +
1346 2*uint64(val.n[3])*uint64(val.n[5]) +
1347 uint64(val.n[4])*uint64(val.n[4])
1348 t8 := m & fieldBaseMask
1349 1350 // Terms for 2^(fieldBase*9).
1351 m = (m >> fieldBase) +
1352 2*uint64(val.n[0])*uint64(val.n[9]) +
1353 2*uint64(val.n[1])*uint64(val.n[8]) +
1354 2*uint64(val.n[2])*uint64(val.n[7]) +
1355 2*uint64(val.n[3])*uint64(val.n[6]) +
1356 2*uint64(val.n[4])*uint64(val.n[5])
1357 t9 := m & fieldBaseMask
1358 1359 // Terms for 2^(fieldBase*10).
1360 m = (m >> fieldBase) +
1361 2*uint64(val.n[1])*uint64(val.n[9]) +
1362 2*uint64(val.n[2])*uint64(val.n[8]) +
1363 2*uint64(val.n[3])*uint64(val.n[7]) +
1364 2*uint64(val.n[4])*uint64(val.n[6]) +
1365 uint64(val.n[5])*uint64(val.n[5])
1366 t10 := m & fieldBaseMask
1367 1368 // Terms for 2^(fieldBase*11).
1369 m = (m >> fieldBase) +
1370 2*uint64(val.n[2])*uint64(val.n[9]) +
1371 2*uint64(val.n[3])*uint64(val.n[8]) +
1372 2*uint64(val.n[4])*uint64(val.n[7]) +
1373 2*uint64(val.n[5])*uint64(val.n[6])
1374 t11 := m & fieldBaseMask
1375 1376 // Terms for 2^(fieldBase*12).
1377 m = (m >> fieldBase) +
1378 2*uint64(val.n[3])*uint64(val.n[9]) +
1379 2*uint64(val.n[4])*uint64(val.n[8]) +
1380 2*uint64(val.n[5])*uint64(val.n[7]) +
1381 uint64(val.n[6])*uint64(val.n[6])
1382 t12 := m & fieldBaseMask
1383 1384 // Terms for 2^(fieldBase*13).
1385 m = (m >> fieldBase) +
1386 2*uint64(val.n[4])*uint64(val.n[9]) +
1387 2*uint64(val.n[5])*uint64(val.n[8]) +
1388 2*uint64(val.n[6])*uint64(val.n[7])
1389 t13 := m & fieldBaseMask
1390 1391 // Terms for 2^(fieldBase*14).
1392 m = (m >> fieldBase) +
1393 2*uint64(val.n[5])*uint64(val.n[9]) +
1394 2*uint64(val.n[6])*uint64(val.n[8]) +
1395 uint64(val.n[7])*uint64(val.n[7])
1396 t14 := m & fieldBaseMask
1397 1398 // Terms for 2^(fieldBase*15).
1399 m = (m >> fieldBase) +
1400 2*uint64(val.n[6])*uint64(val.n[9]) +
1401 2*uint64(val.n[7])*uint64(val.n[8])
1402 t15 := m & fieldBaseMask
1403 1404 // Terms for 2^(fieldBase*16).
1405 m = (m >> fieldBase) +
1406 2*uint64(val.n[7])*uint64(val.n[9]) +
1407 uint64(val.n[8])*uint64(val.n[8])
1408 t16 := m & fieldBaseMask
1409 1410 // Terms for 2^(fieldBase*17).
1411 m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
1412 t17 := m & fieldBaseMask
1413 1414 // Terms for 2^(fieldBase*18).
1415 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
1416 t18 := m & fieldBaseMask
1417 1418 // What's left is for 2^(fieldBase*19).
1419 t19 := m >> fieldBase
1420 1421 // At this point, all of the terms are grouped into their respective
1422 // base.
1423 //
1424 // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
1425 // when the modulus is of the special form m = b^t - c, highly efficient
1426 // reduction can be achieved per the provided algorithm.
1427 //
1428 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
1429 // this criteria.
1430 //
1431 // 4294968273 in field representation (base 2^26) is:
1432 // n[0] = 977
1433 // n[1] = 64
1434 // That is to say (2^26 * 64) + 977 = 4294968273
1435 //
1436 // Since each word is in base 26, the upper terms (t10 and up) start
1437 // at 260 bits (versus the final desired range of 256 bits), so the
1438 // field representation of 'c' from above needs to be adjusted for the
1439 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 =
1440 // 68719492368. Thus, the adjusted field representation of 'c' is:
1441 // n[0] = 977 * 16 = 15632
1442 // n[1] = 64 * 16 = 1024
1443 // That is to say (2^26 * 1024) + 15632 = 68719492368
1444 //
1445 // To reduce the final term, t19, the entire 'c' value is needed instead
1446 // of only n[0] because there are no more terms left to handle n[1].
1447 // This means there might be some magnitude left in the upper bits that
1448 // is handled below.
1449 m = t0 + t10*15632
1450 t0 = m & fieldBaseMask
1451 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
1452 t1 = m & fieldBaseMask
1453 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
1454 t2 = m & fieldBaseMask
1455 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
1456 t3 = m & fieldBaseMask
1457 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
1458 t4 = m & fieldBaseMask
1459 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
1460 t5 = m & fieldBaseMask
1461 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
1462 t6 = m & fieldBaseMask
1463 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
1464 t7 = m & fieldBaseMask
1465 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
1466 t8 = m & fieldBaseMask
1467 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
1468 t9 = m & fieldMSBMask
1469 m >>= fieldMSBBits
1470 1471 // At this point, if the magnitude is greater than 0, the overall value
1472 // is greater than the max possible 256-bit value. In particular, it is
1473 // "how many times larger" than the max value it is.
1474 //
1475 // The algorithm presented in [HAC] section 14.3.4 repeats until the
1476 // quotient is zero. However, due to the above, we already know at
1477 // least how many times we would need to repeat as it's the value
1478 // currently in m. Thus we can simply multiply the magnitude by the
1479 // field representation of the prime and do a single iteration. Notice
1480 // that nothing will be changed when the magnitude is zero, so we could
1481 // skip this in that case, however always running regardless allows it
1482 // to run in constant time. The final result will be in the range
1483 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
1484 // magnitude of 1, but it is denormalized.
1485 n := t0 + m*977
1486 f.n[0] = uint32(n & fieldBaseMask)
1487 n = (n >> fieldBase) + t1 + m*64
1488 f.n[1] = uint32(n & fieldBaseMask)
1489 f.n[2] = uint32((n >> fieldBase) + t2)
1490 f.n[3] = uint32(t3)
1491 f.n[4] = uint32(t4)
1492 f.n[5] = uint32(t5)
1493 f.n[6] = uint32(t6)
1494 f.n[7] = uint32(t7)
1495 f.n[8] = uint32(t8)
1496 f.n[9] = uint32(t9)
1497 1498 return f
1499 }
1500 1501 // Inverse finds the modular multiplicative inverse of the field value in
1502 // constant time. The existing field value is modified.
1503 //
1504 // The field value is returned to support chaining. This enables syntax like:
1505 // f.Inverse().Mul(f2) so that f = f^-1 * f2.
1506 //
1507 // Preconditions:
1508 // - The field value MUST have a max magnitude of 8
1509 // Output Normalized: No
1510 // Output Max Magnitude: 1
1511 func (f *FieldVal) Inverse() *FieldVal {
1512 // Fermat's little theorem states that for a nonzero number 'a' and prime
1513 // 'p', a^(p-1) ≡ 1 (mod p). Multiplying both sides of the equation by the
1514 // multiplicative inverse a^-1 yields a^(p-2) ≡ a^-1 (mod p). Thus, a^(p-2)
1515 // is the multiplicative inverse.
1516 //
1517 // In order to efficiently compute a^(p-2), p-2 needs to be split into a
1518 // sequence of squares and multiplications that minimizes the number of
1519 // multiplications needed (since they are more costly than squarings).
1520 // Intermediate results are saved and reused as well.
1521 //
1522 // The secp256k1 prime - 2 is 2^256 - 4294968275. In binary, that is:
1523 //
1524 // 11111111 11111111 11111111 11111111
1525 // 11111111 11111111 11111111 11111111
1526 // 11111111 11111111 11111111 11111111
1527 // 11111111 11111111 11111111 11111111
1528 // 11111111 11111111 11111111 11111111
1529 // 11111111 11111111 11111111 11111111
1530 // 11111111 11111111 11111111 11111110
1531 // 11111111 11111111 11111100 00101101
1532 //
1533 // Notice that can be broken up into five windows of consecutive 1s (in
1534 // order of least to most significant) as:
1535 //
1536 // 2-bit window with 1 bit set (bit 1 unset)
1537 // 3-bit window with 2 bits set (bit 4 unset)
1538 // 5-bit window with 1 bit set (bits 6, 7, 8, 9 unset)
1539 // 23-bit window with 22 bits set (bit 32 unset)
1540 // 223-bit window with all 223 bits set
1541 //
1542 // Thus, the groups of 1 bits in each window forms the set:
1543 // S = {1, 2, 22, 223}.
1544 //
1545 // The strategy is to calculate a^(2^n - 1) for each grouping via an
1546 // addition chain with a sliding window.
1547 //
1548 // The addition chain used is (credits to Peter Dettman):
1549 // (0,0),(1,0),(2,2),(3,2),(4,1),(5,5),(6,6),(7,7),(8,8),(9,7),(10,2)
1550 // => 2^[1] 2^[2] 2^3 2^6 2^9 2^11 2^[22] 2^44 2^88 2^176 2^220 2^[223]
1551 //
1552 // This has a cost of 255 field squarings and 15 field multiplications.
1553 var a, a2, a3, a6, a9, a11, a22, a44, a88, a176, a220, a223 FieldVal
1554 a.Set(f)
1555 a2.SquareVal(&a).Mul(&a) // a2 = a^(2^2 - 1)
1556 a3.SquareVal(&a2).Mul(&a) // a3 = a^(2^3 - 1)
1557 a6.SquareVal(&a3).Square().Square() // a6 = a^(2^6 - 2^3)
1558 a6.Mul(&a3) // a6 = a^(2^6 - 1)
1559 a9.SquareVal(&a6).Square().Square() // a9 = a^(2^9 - 2^3)
1560 a9.Mul(&a3) // a9 = a^(2^9 - 1)
1561 a11.SquareVal(&a9).Square() // a11 = a^(2^11 - 2^2)
1562 a11.Mul(&a2) // a11 = a^(2^11 - 1)
1563 a22.SquareVal(&a11).Square().Square().Square().Square() // a22 = a^(2^16 - 2^5)
1564 a22.Square().Square().Square().Square().Square() // a22 = a^(2^21 - 2^10)
1565 a22.Square() // a22 = a^(2^22 - 2^11)
1566 a22.Mul(&a11) // a22 = a^(2^22 - 1)
1567 a44.SquareVal(&a22).Square().Square().Square().Square() // a44 = a^(2^27 - 2^5)
1568 a44.Square().Square().Square().Square().Square() // a44 = a^(2^32 - 2^10)
1569 a44.Square().Square().Square().Square().Square() // a44 = a^(2^37 - 2^15)
1570 a44.Square().Square().Square().Square().Square() // a44 = a^(2^42 - 2^20)
1571 a44.Square().Square() // a44 = a^(2^44 - 2^22)
1572 a44.Mul(&a22) // a44 = a^(2^44 - 1)
1573 a88.SquareVal(&a44).Square().Square().Square().Square() // a88 = a^(2^49 - 2^5)
1574 a88.Square().Square().Square().Square().Square() // a88 = a^(2^54 - 2^10)
1575 a88.Square().Square().Square().Square().Square() // a88 = a^(2^59 - 2^15)
1576 a88.Square().Square().Square().Square().Square() // a88 = a^(2^64 - 2^20)
1577 a88.Square().Square().Square().Square().Square() // a88 = a^(2^69 - 2^25)
1578 a88.Square().Square().Square().Square().Square() // a88 = a^(2^74 - 2^30)
1579 a88.Square().Square().Square().Square().Square() // a88 = a^(2^79 - 2^35)
1580 a88.Square().Square().Square().Square().Square() // a88 = a^(2^84 - 2^40)
1581 a88.Square().Square().Square().Square() // a88 = a^(2^88 - 2^44)
1582 a88.Mul(&a44) // a88 = a^(2^88 - 1)
1583 a176.SquareVal(&a88).Square().Square().Square().Square() // a176 = a^(2^93 - 2^5)
1584 a176.Square().Square().Square().Square().Square() // a176 = a^(2^98 - 2^10)
1585 a176.Square().Square().Square().Square().Square() // a176 = a^(2^103 - 2^15)
1586 a176.Square().Square().Square().Square().Square() // a176 = a^(2^108 - 2^20)
1587 a176.Square().Square().Square().Square().Square() // a176 = a^(2^113 - 2^25)
1588 a176.Square().Square().Square().Square().Square() // a176 = a^(2^118 - 2^30)
1589 a176.Square().Square().Square().Square().Square() // a176 = a^(2^123 - 2^35)
1590 a176.Square().Square().Square().Square().Square() // a176 = a^(2^128 - 2^40)
1591 a176.Square().Square().Square().Square().Square() // a176 = a^(2^133 - 2^45)
1592 a176.Square().Square().Square().Square().Square() // a176 = a^(2^138 - 2^50)
1593 a176.Square().Square().Square().Square().Square() // a176 = a^(2^143 - 2^55)
1594 a176.Square().Square().Square().Square().Square() // a176 = a^(2^148 - 2^60)
1595 a176.Square().Square().Square().Square().Square() // a176 = a^(2^153 - 2^65)
1596 a176.Square().Square().Square().Square().Square() // a176 = a^(2^158 - 2^70)
1597 a176.Square().Square().Square().Square().Square() // a176 = a^(2^163 - 2^75)
1598 a176.Square().Square().Square().Square().Square() // a176 = a^(2^168 - 2^80)
1599 a176.Square().Square().Square().Square().Square() // a176 = a^(2^173 - 2^85)
1600 a176.Square().Square().Square() // a176 = a^(2^176 - 2^88)
1601 a176.Mul(&a88) // a176 = a^(2^176 - 1)
1602 a220.SquareVal(&a176).Square().Square().Square().Square() // a220 = a^(2^181 - 2^5)
1603 a220.Square().Square().Square().Square().Square() // a220 = a^(2^186 - 2^10)
1604 a220.Square().Square().Square().Square().Square() // a220 = a^(2^191 - 2^15)
1605 a220.Square().Square().Square().Square().Square() // a220 = a^(2^196 - 2^20)
1606 a220.Square().Square().Square().Square().Square() // a220 = a^(2^201 - 2^25)
1607 a220.Square().Square().Square().Square().Square() // a220 = a^(2^206 - 2^30)
1608 a220.Square().Square().Square().Square().Square() // a220 = a^(2^211 - 2^35)
1609 a220.Square().Square().Square().Square().Square() // a220 = a^(2^216 - 2^40)
1610 a220.Square().Square().Square().Square() // a220 = a^(2^220 - 2^44)
1611 a220.Mul(&a44) // a220 = a^(2^220 - 1)
1612 a223.SquareVal(&a220).Square().Square() // a223 = a^(2^223 - 2^3)
1613 a223.Mul(&a3) // a223 = a^(2^223 - 1)
1614 1615 f.SquareVal(&a223).Square().Square().Square().Square() // f = a^(2^228 - 2^5)
1616 f.Square().Square().Square().Square().Square() // f = a^(2^233 - 2^10)
1617 f.Square().Square().Square().Square().Square() // f = a^(2^238 - 2^15)
1618 f.Square().Square().Square().Square().Square() // f = a^(2^243 - 2^20)
1619 f.Square().Square().Square() // f = a^(2^246 - 2^23)
1620 f.Mul(&a22) // f = a^(2^246 - 4194305)
1621 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760)
1622 f.Mul(&a) // f = a^(2^251 - 134217759)
1623 f.Square().Square().Square() // f = a^(2^254 - 1073742072)
1624 f.Mul(&a2) // f = a^(2^254 - 1073742069)
1625 f.Square().Square() // f = a^(2^256 - 4294968276)
1626 return f.Mul(&a) // f = a^(2^256 - 4294968275) = a^(p-2)
1627 }
1628 1629 // IsGtOrEqPrimeMinusOrder returns whether or not the field value is greater
1630 // than or equal to the field prime minus the secp256k1 group order in constant
1631 // time.
1632 //
1633 // Preconditions:
1634 // - The field value MUST be normalized
1635 func (f *FieldVal) IsGtOrEqPrimeMinusOrder() bool {
1636 // The secp256k1 prime is equivalent to 2^256 - 4294968273 and the group
1637 // order is 2^256 - 432420386565659656852420866394968145599. Thus,
1638 // the prime minus the group order is:
1639 // 432420386565659656852420866390673177326
1640 //
1641 // In hex that is:
1642 // 0x00000000 00000000 00000000 00000001 45512319 50b75fc4 402da172 2fc9baee
1643 //
1644 // Converting that to field representation (base 2^26) is:
1645 //
1646 // n[0] = 0x03c9baee
1647 // n[1] = 0x03685c8b
1648 // n[2] = 0x01fc4402
1649 // n[3] = 0x006542dd
1650 // n[4] = 0x01455123
1651 //
1652 // This can be verified with the following test code:
1653 // pMinusN := new(big.Int).Sub(curveParams.P, curveParams.N)
1654 // var fv FieldVal
1655 // fv.SetByteSlice(pMinusN.Bytes())
1656 // t.Logf("%x", fv.n)
1657 //
1658 // Outputs: [3c9baee 3685c8b 1fc4402 6542dd 1455123 0 0 0 0 0]
1659 const (
1660 pMinusNWordZero = 0x03c9baee
1661 pMinusNWordOne = 0x03685c8b
1662 pMinusNWordTwo = 0x01fc4402
1663 pMinusNWordThree = 0x006542dd
1664 pMinusNWordFour = 0x01455123
1665 pMinusNWordFive = 0x00000000
1666 pMinusNWordSix = 0x00000000
1667 pMinusNWordSeven = 0x00000000
1668 pMinusNWordEight = 0x00000000
1669 pMinusNWordNine = 0x00000000
1670 )
1671 1672 // The intuition here is that the value is greater than field prime minus
1673 // the group order if one of the higher individual words is greater than the
1674 // corresponding word and all higher words in the value are equal.
1675 result := constantTimeGreater(f.n[9], pMinusNWordNine)
1676 highWordsEqual := constantTimeEq(f.n[9], pMinusNWordNine)
1677 result |= highWordsEqual & constantTimeGreater(f.n[8], pMinusNWordEight)
1678 highWordsEqual &= constantTimeEq(f.n[8], pMinusNWordEight)
1679 result |= highWordsEqual & constantTimeGreater(f.n[7], pMinusNWordSeven)
1680 highWordsEqual &= constantTimeEq(f.n[7], pMinusNWordSeven)
1681 result |= highWordsEqual & constantTimeGreater(f.n[6], pMinusNWordSix)
1682 highWordsEqual &= constantTimeEq(f.n[6], pMinusNWordSix)
1683 result |= highWordsEqual & constantTimeGreater(f.n[5], pMinusNWordFive)
1684 highWordsEqual &= constantTimeEq(f.n[5], pMinusNWordFive)
1685 result |= highWordsEqual & constantTimeGreater(f.n[4], pMinusNWordFour)
1686 highWordsEqual &= constantTimeEq(f.n[4], pMinusNWordFour)
1687 result |= highWordsEqual & constantTimeGreater(f.n[3], pMinusNWordThree)
1688 highWordsEqual &= constantTimeEq(f.n[3], pMinusNWordThree)
1689 result |= highWordsEqual & constantTimeGreater(f.n[2], pMinusNWordTwo)
1690 highWordsEqual &= constantTimeEq(f.n[2], pMinusNWordTwo)
1691 result |= highWordsEqual & constantTimeGreater(f.n[1], pMinusNWordOne)
1692 highWordsEqual &= constantTimeEq(f.n[1], pMinusNWordOne)
1693 result |= highWordsEqual & constantTimeGreaterOrEq(f.n[0], pMinusNWordZero)
1694 1695 return result != 0
1696 }
1697