1 // Copyright (c) 2020-2023 The Decred developers
2 // Use of this source code is governed by an ISC
3 // license that can be found in the LICENSE file.
4 5 package secp256k1
6 7 import (
8 "math/big"
9 10 "encoding/hex"
11 )
12 13 // References:
14 // [SECG]: Recommended Elliptic Curve Domain Parameters
15 // https://www.secg.org/sec2-v2.pdf
16 //
17 // [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
18 // http://cacr.uwaterloo.ca/hac/
19 20 // Many elliptic curve operations require working with scalars in a finite field
21 // characterized by the order of the group underlying the secp256k1 curve.
22 // Given this precision is larger than the biggest available native type,
23 // obviously some form of bignum math is needed. This code implements
24 // specialized fixed-precision field arithmetic rather than relying on an
25 // arbitrary-precision arithmetic package such as math/big for dealing with the
26 // math modulo the group order since the size is known. As a result, rather
27 // large performance gains are achieved by taking advantage of many
28 // optimizations not available to arbitrary-precision arithmetic and generic
29 // modular arithmetic algorithms.
30 //
31 // There are various ways to internally represent each element. For example,
32 // the most obvious representation would be to use an array of 4 uint64s (64
33 // bits * 4 = 256 bits). However, that representation suffers from the fact
34 // that there is no native Go type large enough to handle the intermediate
35 // results while adding or multiplying two 64-bit numbers.
36 //
37 // Given the above, this implementation represents the field elements as 8
38 // uint32s with each word (array entry) treated as base 2^32. This was chosen
39 // because most systems at the current time are 64-bit (or at least have 64-bit
40 // registers available for specialized purposes such as MMX) so the intermediate
41 // results can typically be done using a native register (and using uint64s to
42 // avoid the need for additional half-word arithmetic)
43 44 const (
45 // These fields provide convenient access to each of the words of the
46 // secp256k1 curve group order N to improve code readability.
47 //
48 // The group order of the curve per [SECG] is:
49 // 0xffffffff ffffffff ffffffff fffffffe baaedce6 af48a03b bfd25e8c d0364141
50 //
51 // nolint: dupword
52 orderWordZero uint32 = 0xd0364141
53 orderWordOne uint32 = 0xbfd25e8c
54 orderWordTwo uint32 = 0xaf48a03b
55 orderWordThree uint32 = 0xbaaedce6
56 orderWordFour uint32 = 0xfffffffe
57 orderWordFive uint32 = 0xffffffff
58 orderWordSix uint32 = 0xffffffff
59 orderWordSeven uint32 = 0xffffffff
60 // These fields provide convenient access to each of the words of the two's
61 // complement of the secp256k1 curve group order N to improve code
62 // readability.
63 //
64 // The two's complement of the group order is:
65 // 0x00000000 00000000 00000000 00000001 45512319 50b75fc4 402da173 2fc9bebf
66 orderComplementWordZero = (^orderWordZero) + 1
67 orderComplementWordOne = ^orderWordOne
68 orderComplementWordTwo = ^orderWordTwo
69 orderComplementWordThree = ^orderWordThree
70 // orderComplementWordFour uint32 = ^orderWordFour // unused
71 // orderComplementWordFive uint32 = ^orderWordFive // unused
72 // orderComplementWordSix uint32 = ^orderWordSix // unused
73 // orderComplementWordSeven uint32 = ^orderWordSeven // unused
74 // These fields provide convenient access to each of the words of the
75 // secp256k1 curve group order N / 2 to improve code readability and avoid
76 // the need to recalculate them.
77 //
78 // The half order of the secp256k1 curve group is:
79 // 0x7fffffff ffffffff ffffffff ffffffff 5d576e73 57a4501d dfe92f46 681b20a0
80 //
81 // nolint: dupword
82 halfOrderWordZero uint32 = 0x681b20a0
83 halfOrderWordOne uint32 = 0xdfe92f46
84 halfOrderWordTwo uint32 = 0x57a4501d
85 halfOrderWordThree uint32 = 0x5d576e73
86 halfOrderWordFour uint32 = 0xffffffff
87 halfOrderWordFive uint32 = 0xffffffff
88 halfOrderWordSix uint32 = 0xffffffff
89 halfOrderWordSeven uint32 = 0x7fffffff
90 // uint32Mask is simply a mask with all bits set for a uint32 and is used to
91 // improve the readability of the code.
92 uint32Mask = 0xffffffff
93 )
94 95 96 // ModNScalar implements optimized 256-bit constant-time fixed-precision
97 // arithmetic over the secp256k1 group order. This means all arithmetic is
98 // performed modulo:
99 //
100 // 0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141
101 //
102 // It only implements the arithmetic needed for elliptic curve operations,
103 // however, the operations that are not implemented can typically be worked
104 // around if absolutely needed. For example, subtraction can be performed by
105 // adding the negation.
106 //
107 // Should it be absolutely necessary, conversion to the standard library
108 // math/big.Int can be accomplished by using the Bytes method, slicing the
109 // resulting fixed-size array, and feeding it to big.Int.SetBytes. However,
110 // that should typically be avoided when possible as conversion to big.Ints
111 // requires allocations, is not constant time, and is slower when working modulo
112 // the group order.
113 type ModNScalar struct {
114 // The scalar is represented as 8 32-bit integers in base 2^32.
115 //
116 // The following depicts the internal representation:
117 // ---------------------------------------------------------
118 // | n[7] | n[6] | ... | n[0] |
119 // | 32 bits | 32 bits | ... | 32 bits |
120 // | Mult: 2^(32*7) | Mult: 2^(32*6) | ... | Mult: 2^(32*0) |
121 // ---------------------------------------------------------
122 //
123 // For example, consider the number 2^87 + 2^42 + 1. It would be
124 // represented as:
125 // n[0] = 1
126 // n[1] = 2^10
127 // n[2] = 2^23
128 // n[3..7] = 0
129 //
130 // The full 256-bit value is then calculated by looping i from 7..0 and
131 // doing sum(n[i] * 2^(32i)) like so:
132 // n[7] * 2^(32*7) = 0 * 2^224 = 0
133 // n[6] * 2^(32*6) = 0 * 2^192 = 0
134 // ...
135 // n[2] * 2^(32*2) = 2^23 * 2^64 = 2^87
136 // n[1] * 2^(32*1) = 2^10 * 2^32 = 2^42
137 // n[0] * 2^(32*0) = 1 * 2^0 = 1
138 // Sum: 0 + 0 + ... + 2^87 + 2^42 + 1 = 2^87 + 2^42 + 1
139 n [8]uint32
140 }
141 142 // String returns the scalar as a human-readable hex string.
143 //
144 // This is NOT constant time.
145 func (s ModNScalar) String() string {
146 b := s.Bytes()
147 return string(hex.EncodeToString(b[:]))
148 }
149 150 // Set sets the scalar equal to a copy of the passed one in constant time.
151 //
152 // The scalar is returned to support chaining. This enables syntax like:
153 // s := new(ModNScalar).Set(s2).Add(1) so that s = s2 + 1 where s2 is not
154 // modified.
155 func (s *ModNScalar) Set(val *ModNScalar) *ModNScalar {
156 *s = *val
157 return s
158 }
159 160 // Zero sets the scalar to zero in constant time. A newly created scalar is
161 // already set to zero. This function can be useful to clear an existing scalar
162 // for reuse.
163 func (s *ModNScalar) Zero() {
164 s.n[0] = 0
165 s.n[1] = 0
166 s.n[2] = 0
167 s.n[3] = 0
168 s.n[4] = 0
169 s.n[5] = 0
170 s.n[6] = 0
171 s.n[7] = 0
172 }
173 174 // IsZeroBit returns 1 when the scalar is equal to zero or 0 otherwise in
175 // constant time.
176 //
177 // Note that a bool is not used here because it is not possible in Go to convert
178 // from a bool to numeric value in constant time and many constant-time
179 // operations require a numeric value. See IsZero for the version that returns
180 // a bool.
181 func (s *ModNScalar) IsZeroBit() uint32 {
182 // The scalar can only be zero if no bits are set in any of the words.
183 bits := s.n[0] | s.n[1] | s.n[2] | s.n[3] | s.n[4] | s.n[5] | s.n[6] | s.n[7]
184 return constantTimeEq(bits, 0)
185 }
186 187 // IsZero returns whether or not the scalar is equal to zero in constant time.
188 func (s *ModNScalar) IsZero() bool {
189 // The scalar can only be zero if no bits are set in any of the words.
190 bits := s.n[0] | s.n[1] | s.n[2] | s.n[3] | s.n[4] | s.n[5] | s.n[6] | s.n[7]
191 return bits == 0
192 }
193 194 // SetInt sets the scalar to the passed integer in constant time. This is a
195 // convenience function since it is fairly common to perform some arithmetic
196 // with small native integers.
197 //
198 // The scalar is returned to support chaining. This enables syntax like:
199 // s := new(ModNScalar).SetInt(2).Mul(s2) so that s = 2 * s2.
200 func (s *ModNScalar) SetInt(ui uint32) *ModNScalar {
201 s.Zero()
202 s.n[0] = ui
203 return s
204 }
205 206 // constantTimeEq returns 1 if a == b or 0 otherwise in constant time.
207 func constantTimeEq(a, b uint32) uint32 {
208 return uint32((uint64(a^b) - 1) >> 63)
209 }
210 211 // constantTimeNotEq returns 1 if a != b or 0 otherwise in constant time.
212 func constantTimeNotEq(a, b uint32) uint32 {
213 return ^uint32((uint64(a^b)-1)>>63) & 1
214 }
215 216 // constantTimeLess returns 1 if a < b or 0 otherwise in constant time.
217 func constantTimeLess(a, b uint32) uint32 {
218 return uint32((uint64(a) - uint64(b)) >> 63)
219 }
220 221 // constantTimeLessOrEq returns 1 if a <= b or 0 otherwise in constant time.
222 func constantTimeLessOrEq(a, b uint32) uint32 {
223 return uint32((uint64(a) - uint64(b) - 1) >> 63)
224 }
225 226 // constantTimeGreater returns 1 if a > b or 0 otherwise in constant time.
227 func constantTimeGreater(a, b uint32) uint32 {
228 return constantTimeLess(b, a)
229 }
230 231 // constantTimeGreaterOrEq returns 1 if a >= b or 0 otherwise in constant time.
232 func constantTimeGreaterOrEq(a, b uint32) uint32 {
233 return constantTimeLessOrEq(b, a)
234 }
235 236 // constantTimeMin returns min(a,b) in constant time.
237 func constantTimeMin(a, b uint32) uint32 {
238 return b ^ ((a ^ b) & -constantTimeLess(a, b))
239 }
240 241 // overflows determines if the current scalar is greater than or equal to the
242 // group order in constant time and returns 1 if it is or 0 otherwise.
243 func (s *ModNScalar) overflows() uint32 {
244 // The intuition here is that the scalar is greater than the group order if
245 // one of the higher individual words is greater than corresponding word of
246 // the group order and all higher words in the scalar are equal to their
247 // corresponding word of the group order. Since this type is modulo the
248 // group order, being equal is also an overflow back to 0.
249 //
250 // Note that the words 5, 6, and 7 are all the max uint32 value, so there is
251 // no need to test if those individual words of the scalar exceeds them,
252 // hence, only equality is checked for them.
253 highWordsEqual := constantTimeEq(s.n[7], orderWordSeven)
254 highWordsEqual &= constantTimeEq(s.n[6], orderWordSix)
255 highWordsEqual &= constantTimeEq(s.n[5], orderWordFive)
256 overflow := highWordsEqual & constantTimeGreater(s.n[4], orderWordFour)
257 highWordsEqual &= constantTimeEq(s.n[4], orderWordFour)
258 overflow |= highWordsEqual & constantTimeGreater(s.n[3], orderWordThree)
259 highWordsEqual &= constantTimeEq(s.n[3], orderWordThree)
260 overflow |= highWordsEqual & constantTimeGreater(s.n[2], orderWordTwo)
261 highWordsEqual &= constantTimeEq(s.n[2], orderWordTwo)
262 overflow |= highWordsEqual & constantTimeGreater(s.n[1], orderWordOne)
263 highWordsEqual &= constantTimeEq(s.n[1], orderWordOne)
264 overflow |= highWordsEqual & constantTimeGreaterOrEq(s.n[0], orderWordZero)
265 return overflow
266 }
267 268 // reduce256 reduces the current scalar modulo the group order in accordance
269 // with the overflows parameter in constant time. The overflows parameter
270 // specifies whether or not the scalar is known to be greater than the group
271 // order and MUST either be 1 in the case it is or 0 in the case it is not for a
272 // correct result.
273 func (s *ModNScalar) reduce256(overflows uint32) {
274 // Notice that since s < 2^256 < 2N (where N is the group order), the max
275 // possible number of reductions required is one. Therefore, in the case a
276 // reduction is needed, it can be performed with a single subtraction of N.
277 // Also, recall that subtraction is equivalent to addition by the two's
278 // complement while ignoring the carry.
279 //
280 // When s >= N, the overflows parameter will be 1. Conversely, it will be 0
281 // when s < N. Thus multiplying by the overflows parameter will either
282 // result in 0 or the multiplicand itself.
283 //
284 // Combining the above along with the fact that s + 0 = s, the following is
285 // a constant time implementation that works by either adding 0 or the two's
286 // complement of N as needed.
287 //
288 // The final result will be in the range 0 <= s < N as expected.
289 overflows64 := uint64(overflows)
290 c := uint64(s.n[0]) + overflows64*uint64(orderComplementWordZero)
291 s.n[0] = uint32(c & uint32Mask)
292 c = (c >> 32) + uint64(s.n[1]) + overflows64*uint64(orderComplementWordOne)
293 s.n[1] = uint32(c & uint32Mask)
294 c = (c >> 32) + uint64(s.n[2]) + overflows64*uint64(orderComplementWordTwo)
295 s.n[2] = uint32(c & uint32Mask)
296 c = (c >> 32) + uint64(s.n[3]) + overflows64*uint64(orderComplementWordThree)
297 s.n[3] = uint32(c & uint32Mask)
298 c = (c >> 32) + uint64(s.n[4]) + overflows64 // * 1
299 s.n[4] = uint32(c & uint32Mask)
300 c = (c >> 32) + uint64(s.n[5]) // + overflows64 * 0
301 s.n[5] = uint32(c & uint32Mask)
302 c = (c >> 32) + uint64(s.n[6]) // + overflows64 * 0
303 s.n[6] = uint32(c & uint32Mask)
304 c = (c >> 32) + uint64(s.n[7]) // + overflows64 * 0
305 s.n[7] = uint32(c & uint32Mask)
306 }
307 308 // SetBytes interprets the provided array as a 256-bit big-endian unsigned
309 // integer, reduces it modulo the group order, sets the scalar to the result,
310 // and returns either 1 if it was reduced (aka it overflowed) or 0 otherwise in
311 // constant time.
312 //
313 // Note that a bool is not used here because it is not possible in Go to convert
314 // from a bool to numeric value in constant time and many constant-time
315 // operations require a numeric value.
316 func (s *ModNScalar) SetBytes(b *[32]byte) uint32 {
317 // Pack the 256 total bits across the 8 uint32 words. This could be done
318 // with a for loop, but benchmarks show this unrolled version is about 2
319 // times faster than the variant that uses a loop.
320 s.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 | uint32(b[28])<<24
321 s.n[1] = uint32(b[27]) | uint32(b[26])<<8 | uint32(b[25])<<16 | uint32(b[24])<<24
322 s.n[2] = uint32(b[23]) | uint32(b[22])<<8 | uint32(b[21])<<16 | uint32(b[20])<<24
323 s.n[3] = uint32(b[19]) | uint32(b[18])<<8 | uint32(b[17])<<16 | uint32(b[16])<<24
324 s.n[4] = uint32(b[15]) | uint32(b[14])<<8 | uint32(b[13])<<16 | uint32(b[12])<<24
325 s.n[5] = uint32(b[11]) | uint32(b[10])<<8 | uint32(b[9])<<16 | uint32(b[8])<<24
326 s.n[6] = uint32(b[7]) | uint32(b[6])<<8 | uint32(b[5])<<16 | uint32(b[4])<<24
327 s.n[7] = uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
328 // The value might be >= N, so reduce it as required and return whether or
329 // not it was reduced.
330 needsReduce := s.overflows()
331 s.reduce256(needsReduce)
332 return needsReduce
333 }
334 335 // zeroArray32 zeroes the provided 32-byte buffer.
336 func zeroArray32(b *[32]byte) { *b = [32]byte{} }
337 338 // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
339 // integer (meaning it is truncated to the first 32 bytes), reduces it modulo
340 // the group order, sets the scalar to the result, and returns whether or not
341 // the resulting truncated 256-bit integer overflowed in constant time.
342 //
343 // Note that since passing a slice with more than 32 bytes is truncated, it is
344 // possible that the truncated value is less than the order of the curve and
345 // hence it will not be reported as having overflowed in that case. It is up to
346 // the caller to decide whether it needs to provide numbers of the appropriate
347 // size or it is acceptable to use this function with the described truncation
348 // and overflow behavior.
349 func (s *ModNScalar) SetByteSlice(b []byte) bool {
350 var b32 [32]byte
351 b = b[:constantTimeMin(uint32(len(b)), 32)]
352 copy(b32[:], b32[:32-len(b)])
353 copy(b32[32-len(b):], b)
354 result := s.SetBytes(&b32)
355 zeroArray32(&b32)
356 return result != 0
357 }
358 359 // PutBytesUnchecked unpacks the scalar to a 32-byte big-endian value directly
360 // into the passed byte slice in constant time. The target slice must have at
361 // least 32 bytes available or it will panic.
362 //
363 // There is a similar function, PutBytes, which unpacks the scalar into a
364 // 32-byte array directly. This version is provided since it can be useful to
365 // write directly into part of a larger buffer without needing a separate
366 // allocation.
367 //
368 // Preconditions:
369 // - The target slice MUST have at least 32 bytes available
370 func (s *ModNScalar) PutBytesUnchecked(b []byte) {
371 // Unpack the 256 total bits from the 8 uint32 words. This could be done
372 // with a for loop, but benchmarks show this unrolled version is about 2
373 // times faster than the variant which uses a loop.
374 b[31] = byte(s.n[0])
375 b[30] = byte(s.n[0] >> 8)
376 b[29] = byte(s.n[0] >> 16)
377 b[28] = byte(s.n[0] >> 24)
378 b[27] = byte(s.n[1])
379 b[26] = byte(s.n[1] >> 8)
380 b[25] = byte(s.n[1] >> 16)
381 b[24] = byte(s.n[1] >> 24)
382 b[23] = byte(s.n[2])
383 b[22] = byte(s.n[2] >> 8)
384 b[21] = byte(s.n[2] >> 16)
385 b[20] = byte(s.n[2] >> 24)
386 b[19] = byte(s.n[3])
387 b[18] = byte(s.n[3] >> 8)
388 b[17] = byte(s.n[3] >> 16)
389 b[16] = byte(s.n[3] >> 24)
390 b[15] = byte(s.n[4])
391 b[14] = byte(s.n[4] >> 8)
392 b[13] = byte(s.n[4] >> 16)
393 b[12] = byte(s.n[4] >> 24)
394 b[11] = byte(s.n[5])
395 b[10] = byte(s.n[5] >> 8)
396 b[9] = byte(s.n[5] >> 16)
397 b[8] = byte(s.n[5] >> 24)
398 b[7] = byte(s.n[6])
399 b[6] = byte(s.n[6] >> 8)
400 b[5] = byte(s.n[6] >> 16)
401 b[4] = byte(s.n[6] >> 24)
402 b[3] = byte(s.n[7])
403 b[2] = byte(s.n[7] >> 8)
404 b[1] = byte(s.n[7] >> 16)
405 b[0] = byte(s.n[7] >> 24)
406 }
407 408 // PutBytes unpacks the scalar to a 32-byte big-endian value using the passed
409 // byte array in constant time.
410 //
411 // There is a similar function, PutBytesUnchecked, which unpacks the scalar into
412 // a slice that must have at least 32 bytes available. This version is provided
413 // since it can be useful to write directly into an array that is type checked.
414 //
415 // Alternatively, there is also Bytes, which unpacks the scalar into a new array
416 // and returns that which can sometimes be more ergonomic in applications that
417 // aren't concerned about an additional copy.
418 func (s *ModNScalar) PutBytes(b *[32]byte) { s.PutBytesUnchecked(b[:]) }
419 420 // Bytes unpacks the scalar to a 32-byte big-endian value in constant time.
421 //
422 // See PutBytes and PutBytesUnchecked for variants that allow an array or slice
423 // to be passed which can be useful to cut down on the number of allocations
424 // by allowing the caller to reuse a buffer or write directly into part of a
425 // larger buffer.
426 func (s *ModNScalar) Bytes() [32]byte {
427 var b [32]byte
428 s.PutBytesUnchecked(b[:])
429 return b
430 }
431 432 // IsOdd returns whether the scalar is an odd number in constant time.
433 func (s *ModNScalar) IsOdd() bool {
434 // Only odd numbers have the bottom bit set.
435 return s.n[0]&1 == 1
436 }
437 438 // Equals returns whether or not the two scalars are the same in constant time.
439 func (s *ModNScalar) Equals(val *ModNScalar) bool {
440 // Xor only sets bits when they are different, so the two scalars can only
441 // be the same if no bits are set after xoring each word.
442 bits := (s.n[0] ^ val.n[0]) | (s.n[1] ^ val.n[1]) | (s.n[2] ^ val.n[2]) |
443 (s.n[3] ^ val.n[3]) | (s.n[4] ^ val.n[4]) | (s.n[5] ^ val.n[5]) |
444 (s.n[6] ^ val.n[6]) | (s.n[7] ^ val.n[7])
445 446 return bits == 0
447 }
448 449 // Add2 adds the passed two scalars together modulo the group order in constant
450 // time and stores the result in s.
451 //
452 // The scalar is returned to support chaining. This enables syntax like:
453 // s3.Add2(s, s2).AddInt(1) so that s3 = s + s2 + 1.
454 func (s *ModNScalar) Add2(val1, val2 *ModNScalar) *ModNScalar {
455 c := uint64(val1.n[0]) + uint64(val2.n[0])
456 s.n[0] = uint32(c & uint32Mask)
457 c = (c >> 32) + uint64(val1.n[1]) + uint64(val2.n[1])
458 s.n[1] = uint32(c & uint32Mask)
459 c = (c >> 32) + uint64(val1.n[2]) + uint64(val2.n[2])
460 s.n[2] = uint32(c & uint32Mask)
461 c = (c >> 32) + uint64(val1.n[3]) + uint64(val2.n[3])
462 s.n[3] = uint32(c & uint32Mask)
463 c = (c >> 32) + uint64(val1.n[4]) + uint64(val2.n[4])
464 s.n[4] = uint32(c & uint32Mask)
465 c = (c >> 32) + uint64(val1.n[5]) + uint64(val2.n[5])
466 s.n[5] = uint32(c & uint32Mask)
467 c = (c >> 32) + uint64(val1.n[6]) + uint64(val2.n[6])
468 s.n[6] = uint32(c & uint32Mask)
469 c = (c >> 32) + uint64(val1.n[7]) + uint64(val2.n[7])
470 s.n[7] = uint32(c & uint32Mask)
471 // The result is now 256 bits, but it might still be >= N, so use the
472 // existing normal reduce method for 256-bit values.
473 s.reduce256(uint32(c>>32) + s.overflows())
474 return s
475 }
476 477 // Add adds the passed scalar to the existing one modulo the group order in
478 // constant time and stores the result in s.
479 //
480 // The scalar is returned to support chaining. This enables syntax like:
481 // s.Add(s2).AddInt(1) so that s = s + s2 + 1.
482 func (s *ModNScalar) Add(val *ModNScalar) *ModNScalar { return s.Add2(s, val) }
483 484 // accumulator96 provides a 96-bit accumulator for use in the intermediate
485 // calculations requiring more than 64-bits.
486 type accumulator96 struct {
487 n [3]uint32
488 }
489 490 // Add adds the passed unsigned 64-bit value to the accumulator.
491 func (a *accumulator96) Add(v uint64) {
492 low := uint32(v & uint32Mask)
493 hi := uint32(v >> 32)
494 a.n[0] += low
495 hi += constantTimeLess(a.n[0], low) // Carry if overflow in n[0].
496 a.n[1] += hi
497 a.n[2] += constantTimeLess(a.n[1], hi) // Carry if overflow in n[1].
498 }
499 500 // Rsh32 right shifts the accumulator by 32 bits.
501 func (a *accumulator96) Rsh32() {
502 a.n[0] = a.n[1]
503 a.n[1] = a.n[2]
504 a.n[2] = 0
505 }
506 507 // reduce385 reduces the 385-bit intermediate result in the passed terms modulo
508 // the group order in constant time and stores the result in s.
509 func (s *ModNScalar) reduce385(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12 uint64) {
510 // At this point, the intermediate result in the passed terms has been
511 // reduced to fit within 385 bits, so reduce it again using the same method
512 // described in reduce512. As before, the intermediate result will end up
513 // being reduced by another 127 bits to 258 bits, thus 9 32-bit terms are
514 // needed for this iteration. The reduced terms are assigned back to t0
515 // through t8.
516 //
517 // Note that several of the intermediate calculations require adding 64-bit
518 // products together which would overflow a uint64, so a 96-bit accumulator
519 // is used instead until the value is reduced enough to use native uint64s.
520 // Terms for 2^(32*0).
521 var acc accumulator96
522 acc.n[0] = uint32(t0) // == acc.Add(t0) because acc is guaranteed to be 0.
523 acc.Add(t8 * uint64(orderComplementWordZero))
524 t0 = uint64(acc.n[0])
525 acc.Rsh32()
526 // Terms for 2^(32*1).
527 acc.Add(t1)
528 acc.Add(t8 * uint64(orderComplementWordOne))
529 acc.Add(t9 * uint64(orderComplementWordZero))
530 t1 = uint64(acc.n[0])
531 acc.Rsh32()
532 // Terms for 2^(32*2).
533 acc.Add(t2)
534 acc.Add(t8 * uint64(orderComplementWordTwo))
535 acc.Add(t9 * uint64(orderComplementWordOne))
536 acc.Add(t10 * uint64(orderComplementWordZero))
537 t2 = uint64(acc.n[0])
538 acc.Rsh32()
539 // Terms for 2^(32*3).
540 acc.Add(t3)
541 acc.Add(t8 * uint64(orderComplementWordThree))
542 acc.Add(t9 * uint64(orderComplementWordTwo))
543 acc.Add(t10 * uint64(orderComplementWordOne))
544 acc.Add(t11 * uint64(orderComplementWordZero))
545 t3 = uint64(acc.n[0])
546 acc.Rsh32()
547 // Terms for 2^(32*4).
548 acc.Add(t4)
549 acc.Add(t8) // * uint64(orderComplementWordFour) // * 1
550 acc.Add(t9 * uint64(orderComplementWordThree))
551 acc.Add(t10 * uint64(orderComplementWordTwo))
552 acc.Add(t11 * uint64(orderComplementWordOne))
553 acc.Add(t12 * uint64(orderComplementWordZero))
554 t4 = uint64(acc.n[0])
555 acc.Rsh32()
556 // Terms for 2^(32*5).
557 acc.Add(t5)
558 // acc.Add(t8 * uint64(orderComplementWordFive)) // 0
559 acc.Add(t9) // * uint64(orderComplementWordFour) // * 1
560 acc.Add(t10 * uint64(orderComplementWordThree))
561 acc.Add(t11 * uint64(orderComplementWordTwo))
562 acc.Add(t12 * uint64(orderComplementWordOne))
563 t5 = uint64(acc.n[0])
564 acc.Rsh32()
565 // Terms for 2^(32*6).
566 acc.Add(t6)
567 // acc.Add(t8 * uint64(orderComplementWordSix)) // 0
568 // acc.Add(t9 * uint64(orderComplementWordFive)) // 0
569 acc.Add(t10) // * uint64(orderComplementWordFour) // * 1
570 acc.Add(t11 * uint64(orderComplementWordThree))
571 acc.Add(t12 * uint64(orderComplementWordTwo))
572 t6 = uint64(acc.n[0])
573 acc.Rsh32()
574 // Terms for 2^(32*7).
575 acc.Add(t7)
576 // acc.Add(t8 * uint64(orderComplementWordSeven)) // 0
577 // acc.Add(t9 * uint64(orderComplementWordSix)) // 0
578 // acc.Add(t10 * uint64(orderComplementWordFive)) // 0
579 acc.Add(t11) // * uint64(orderComplementWordFour) // * 1
580 acc.Add(t12 * uint64(orderComplementWordThree))
581 t7 = uint64(acc.n[0])
582 acc.Rsh32()
583 // Terms for 2^(32*8).
584 // acc.Add(t9 * uint64(orderComplementWordSeven)) // 0
585 // acc.Add(t10 * uint64(orderComplementWordSix)) // 0
586 // acc.Add(t11 * uint64(orderComplementWordFive)) // 0
587 acc.Add(t12) // * uint64(orderComplementWordFour) // * 1
588 t8 = uint64(acc.n[0])
589 // acc.Rsh32() // No need since not used after this. Guaranteed to be 0.
590 //
591 // NOTE: All of the remaining multiplications for this iteration result in 0
592 // as they all involve multiplying by combinations of the fifth, sixth, and
593 // seventh words of the two's complement of N, which are 0, so skip them.
594 //
595 // At this point, the result is reduced to fit within 258 bits, so reduce it
596 // again using a slightly modified version of the same method. The maximum
597 // value in t8 is 2 at this point and therefore multiplying it by each word
598 // of the two's complement of N and adding it to a 32-bit term will result
599 // in a maximum requirement of 33 bits, so it is safe to use native uint64s
600 // here for the intermediate term carry propagation.
601 //
602 // Also, since the maximum value in t8 is 2, this ends up reducing by
603 // another 2 bits to 256 bits.
604 c := t0 + t8*uint64(orderComplementWordZero)
605 s.n[0] = uint32(c & uint32Mask)
606 c = (c >> 32) + t1 + t8*uint64(orderComplementWordOne)
607 s.n[1] = uint32(c & uint32Mask)
608 c = (c >> 32) + t2 + t8*uint64(orderComplementWordTwo)
609 s.n[2] = uint32(c & uint32Mask)
610 c = (c >> 32) + t3 + t8*uint64(orderComplementWordThree)
611 s.n[3] = uint32(c & uint32Mask)
612 c = (c >> 32) + t4 + t8 // * uint64(orderComplementWordFour) == * 1
613 s.n[4] = uint32(c & uint32Mask)
614 c = (c >> 32) + t5 // + t8*uint64(orderComplementWordFive) == 0
615 s.n[5] = uint32(c & uint32Mask)
616 c = (c >> 32) + t6 // + t8*uint64(orderComplementWordSix) == 0
617 s.n[6] = uint32(c & uint32Mask)
618 c = (c >> 32) + t7 // + t8*uint64(orderComplementWordSeven) == 0
619 s.n[7] = uint32(c & uint32Mask)
620 // The result is now 256 bits, but it might still be >= N, so use the
621 // existing normal reduce method for 256-bit values.
622 s.reduce256(uint32(c>>32) + s.overflows())
623 }
624 625 // reduce512 reduces the 512-bit intermediate result in the passed terms modulo
626 // the group order down to 385 bits in constant time and stores the result in s.
627 func (s *ModNScalar) reduce512(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15 uint64) {
628 // At this point, the intermediate result in the passed terms is grouped
629 // into the respective bases.
630 //
631 // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
632 // when the modulus is of the special form m = b^t - c, where log_2(c) < t,
633 // highly efficient reduction can be achieved per the provided algorithm.
634 //
635 // The secp256k1 group order fits this criteria since it is:
636 // 2^256 - 432420386565659656852420866394968145599
637 //
638 // Technically the max possible value here is (N-1)^2 since the two scalars
639 // being multiplied are always mod N. Nevertheless, it is safer to consider
640 // it to be (2^256-1)^2 = 2^512 - 2^256 + 1 since it is the product of two
641 // 256-bit values.
642 //
643 // The algorithm is to reduce the result modulo the prime by subtracting
644 // multiples of the group order N. However, in order simplify carry
645 // propagation, this adds with the two's complement of N to achieve the same
646 // result.
647 //
648 // Since the two's complement of N has 127 leading zero bits, this will end
649 // up reducing the intermediate result from 512 bits to 385 bits, resulting
650 // in 13 32-bit terms. The reduced terms are assigned back to t0 through
651 // t12.
652 //
653 // Note that several of the intermediate calculations require adding 64-bit
654 // products together which would overflow a uint64, so a 96-bit accumulator
655 // is used instead.
656 //
657 // Terms for 2^(32*0).
658 var acc accumulator96
659 acc.n[0] = uint32(t0) // == acc.Add(t0) because acc is guaranteed to be 0.
660 acc.Add(t8 * uint64(orderComplementWordZero))
661 t0 = uint64(acc.n[0])
662 acc.Rsh32()
663 // Terms for 2^(32*1).
664 acc.Add(t1)
665 acc.Add(t8 * uint64(orderComplementWordOne))
666 acc.Add(t9 * uint64(orderComplementWordZero))
667 t1 = uint64(acc.n[0])
668 acc.Rsh32()
669 // Terms for 2^(32*2).
670 acc.Add(t2)
671 acc.Add(t8 * uint64(orderComplementWordTwo))
672 acc.Add(t9 * uint64(orderComplementWordOne))
673 acc.Add(t10 * uint64(orderComplementWordZero))
674 t2 = uint64(acc.n[0])
675 acc.Rsh32()
676 // Terms for 2^(32*3).
677 acc.Add(t3)
678 acc.Add(t8 * uint64(orderComplementWordThree))
679 acc.Add(t9 * uint64(orderComplementWordTwo))
680 acc.Add(t10 * uint64(orderComplementWordOne))
681 acc.Add(t11 * uint64(orderComplementWordZero))
682 t3 = uint64(acc.n[0])
683 acc.Rsh32()
684 // Terms for 2^(32*4).
685 acc.Add(t4)
686 acc.Add(t8) // * uint64(orderComplementWordFour) // * 1
687 acc.Add(t9 * uint64(orderComplementWordThree))
688 acc.Add(t10 * uint64(orderComplementWordTwo))
689 acc.Add(t11 * uint64(orderComplementWordOne))
690 acc.Add(t12 * uint64(orderComplementWordZero))
691 t4 = uint64(acc.n[0])
692 acc.Rsh32()
693 // Terms for 2^(32*5).
694 acc.Add(t5)
695 // acc.Add(t8 * uint64(orderComplementWordFive)) // 0
696 acc.Add(t9) // * uint64(orderComplementWordFour) // * 1
697 acc.Add(t10 * uint64(orderComplementWordThree))
698 acc.Add(t11 * uint64(orderComplementWordTwo))
699 acc.Add(t12 * uint64(orderComplementWordOne))
700 acc.Add(t13 * uint64(orderComplementWordZero))
701 t5 = uint64(acc.n[0])
702 acc.Rsh32()
703 // Terms for 2^(32*6).
704 acc.Add(t6)
705 // acc.Add(t8 * uint64(orderComplementWordSix)) // 0
706 // acc.Add(t9 * uint64(orderComplementWordFive)) // 0
707 acc.Add(t10) // * uint64(orderComplementWordFour)) // * 1
708 acc.Add(t11 * uint64(orderComplementWordThree))
709 acc.Add(t12 * uint64(orderComplementWordTwo))
710 acc.Add(t13 * uint64(orderComplementWordOne))
711 acc.Add(t14 * uint64(orderComplementWordZero))
712 t6 = uint64(acc.n[0])
713 acc.Rsh32()
714 // Terms for 2^(32*7).
715 acc.Add(t7)
716 // acc.Add(t8 * uint64(orderComplementWordSeven)) // 0
717 // acc.Add(t9 * uint64(orderComplementWordSix)) // 0
718 // acc.Add(t10 * uint64(orderComplementWordFive)) // 0
719 acc.Add(t11) // * uint64(orderComplementWordFour) // * 1
720 acc.Add(t12 * uint64(orderComplementWordThree))
721 acc.Add(t13 * uint64(orderComplementWordTwo))
722 acc.Add(t14 * uint64(orderComplementWordOne))
723 acc.Add(t15 * uint64(orderComplementWordZero))
724 t7 = uint64(acc.n[0])
725 acc.Rsh32()
726 // Terms for 2^(32*8).
727 // acc.Add(t9 * uint64(orderComplementWordSeven)) // 0
728 // acc.Add(t10 * uint64(orderComplementWordSix)) // 0
729 // acc.Add(t11 * uint64(orderComplementWordFive)) // 0
730 acc.Add(t12) // * uint64(orderComplementWordFour) // * 1
731 acc.Add(t13 * uint64(orderComplementWordThree))
732 acc.Add(t14 * uint64(orderComplementWordTwo))
733 acc.Add(t15 * uint64(orderComplementWordOne))
734 t8 = uint64(acc.n[0])
735 acc.Rsh32()
736 // Terms for 2^(32*9).
737 // acc.Add(t10 * uint64(orderComplementWordSeven)) // 0
738 // acc.Add(t11 * uint64(orderComplementWordSix)) // 0
739 // acc.Add(t12 * uint64(orderComplementWordFive)) // 0
740 acc.Add(t13) // * uint64(orderComplementWordFour) // * 1
741 acc.Add(t14 * uint64(orderComplementWordThree))
742 acc.Add(t15 * uint64(orderComplementWordTwo))
743 t9 = uint64(acc.n[0])
744 acc.Rsh32()
745 // Terms for 2^(32*10).
746 // acc.Add(t11 * uint64(orderComplementWordSeven)) // 0
747 // acc.Add(t12 * uint64(orderComplementWordSix)) // 0
748 // acc.Add(t13 * uint64(orderComplementWordFive)) // 0
749 acc.Add(t14) // * uint64(orderComplementWordFour) // * 1
750 acc.Add(t15 * uint64(orderComplementWordThree))
751 t10 = uint64(acc.n[0])
752 acc.Rsh32()
753 // Terms for 2^(32*11).
754 // acc.Add(t12 * uint64(orderComplementWordSeven)) // 0
755 // acc.Add(t13 * uint64(orderComplementWordSix)) // 0
756 // acc.Add(t14 * uint64(orderComplementWordFive)) // 0
757 acc.Add(t15) // * uint64(orderComplementWordFour) // * 1
758 t11 = uint64(acc.n[0])
759 acc.Rsh32()
760 // NOTE: All of the remaining multiplications for this iteration result in 0
761 // as they all involve multiplying by combinations of the fifth, sixth, and
762 // seventh words of the two's complement of N, which are 0, so skip them.
763 //
764 // Terms for 2^(32*12).
765 t12 = uint64(acc.n[0])
766 // acc.Rsh32() // No need since not used after this. Guaranteed to be 0.
767 //
768 // At this point, the result is reduced to fit within 385 bits, so reduce it
769 // again using the same method accordingly.
770 s.reduce385(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12)
771 }
772 773 // Mul2 multiplies the passed two scalars together modulo the group order in
774 // constant time and stores the result in s.
775 //
776 // The scalar is returned to support chaining. This enables syntax like:
777 // s3.Mul2(s, s2).AddInt(1) so that s3 = (s * s2) + 1.
778 func (s *ModNScalar) Mul2(val, val2 *ModNScalar) *ModNScalar {
779 // This could be done with for loops and an array to store the intermediate
780 // terms, but this unrolled version is significantly faster.
781 //
782 // The overall strategy employed here is:
783 // 1) Calculate the 512-bit product of the two scalars using the standard
784 // pencil-and-paper method.
785 // 2) Reduce the result modulo the prime by effectively subtracting
786 // multiples of the group order N (actually performed by adding multiples
787 // of the two's complement of N to avoid implementing subtraction).
788 // 3) Repeat step 2 noting that each iteration reduces the required number
789 // of bits by 127 because the two's complement of N has 127 leading zero
790 // bits.
791 // 4) Once reduced to 256 bits, call the existing reduce method to perform
792 // a final reduction as needed.
793 //
794 // Note that several of the intermediate calculations require adding 64-bit
795 // products together which would overflow a uint64, so a 96-bit accumulator
796 // is used instead.
797 //
798 // Terms for 2^(32*0).
799 var acc accumulator96
800 acc.Add(uint64(val.n[0]) * uint64(val2.n[0]))
801 t0 := uint64(acc.n[0])
802 acc.Rsh32()
803 // Terms for 2^(32*1).
804 acc.Add(uint64(val.n[0]) * uint64(val2.n[1]))
805 acc.Add(uint64(val.n[1]) * uint64(val2.n[0]))
806 t1 := uint64(acc.n[0])
807 acc.Rsh32()
808 // Terms for 2^(32*2).
809 acc.Add(uint64(val.n[0]) * uint64(val2.n[2]))
810 acc.Add(uint64(val.n[1]) * uint64(val2.n[1]))
811 acc.Add(uint64(val.n[2]) * uint64(val2.n[0]))
812 t2 := uint64(acc.n[0])
813 acc.Rsh32()
814 // Terms for 2^(32*3).
815 acc.Add(uint64(val.n[0]) * uint64(val2.n[3]))
816 acc.Add(uint64(val.n[1]) * uint64(val2.n[2]))
817 acc.Add(uint64(val.n[2]) * uint64(val2.n[1]))
818 acc.Add(uint64(val.n[3]) * uint64(val2.n[0]))
819 t3 := uint64(acc.n[0])
820 acc.Rsh32()
821 // Terms for 2^(32*4).
822 acc.Add(uint64(val.n[0]) * uint64(val2.n[4]))
823 acc.Add(uint64(val.n[1]) * uint64(val2.n[3]))
824 acc.Add(uint64(val.n[2]) * uint64(val2.n[2]))
825 acc.Add(uint64(val.n[3]) * uint64(val2.n[1]))
826 acc.Add(uint64(val.n[4]) * uint64(val2.n[0]))
827 t4 := uint64(acc.n[0])
828 acc.Rsh32()
829 // Terms for 2^(32*5).
830 acc.Add(uint64(val.n[0]) * uint64(val2.n[5]))
831 acc.Add(uint64(val.n[1]) * uint64(val2.n[4]))
832 acc.Add(uint64(val.n[2]) * uint64(val2.n[3]))
833 acc.Add(uint64(val.n[3]) * uint64(val2.n[2]))
834 acc.Add(uint64(val.n[4]) * uint64(val2.n[1]))
835 acc.Add(uint64(val.n[5]) * uint64(val2.n[0]))
836 t5 := uint64(acc.n[0])
837 acc.Rsh32()
838 // Terms for 2^(32*6).
839 acc.Add(uint64(val.n[0]) * uint64(val2.n[6]))
840 acc.Add(uint64(val.n[1]) * uint64(val2.n[5]))
841 acc.Add(uint64(val.n[2]) * uint64(val2.n[4]))
842 acc.Add(uint64(val.n[3]) * uint64(val2.n[3]))
843 acc.Add(uint64(val.n[4]) * uint64(val2.n[2]))
844 acc.Add(uint64(val.n[5]) * uint64(val2.n[1]))
845 acc.Add(uint64(val.n[6]) * uint64(val2.n[0]))
846 t6 := uint64(acc.n[0])
847 acc.Rsh32()
848 // Terms for 2^(32*7).
849 acc.Add(uint64(val.n[0]) * uint64(val2.n[7]))
850 acc.Add(uint64(val.n[1]) * uint64(val2.n[6]))
851 acc.Add(uint64(val.n[2]) * uint64(val2.n[5]))
852 acc.Add(uint64(val.n[3]) * uint64(val2.n[4]))
853 acc.Add(uint64(val.n[4]) * uint64(val2.n[3]))
854 acc.Add(uint64(val.n[5]) * uint64(val2.n[2]))
855 acc.Add(uint64(val.n[6]) * uint64(val2.n[1]))
856 acc.Add(uint64(val.n[7]) * uint64(val2.n[0]))
857 t7 := uint64(acc.n[0])
858 acc.Rsh32()
859 // Terms for 2^(32*8).
860 acc.Add(uint64(val.n[1]) * uint64(val2.n[7]))
861 acc.Add(uint64(val.n[2]) * uint64(val2.n[6]))
862 acc.Add(uint64(val.n[3]) * uint64(val2.n[5]))
863 acc.Add(uint64(val.n[4]) * uint64(val2.n[4]))
864 acc.Add(uint64(val.n[5]) * uint64(val2.n[3]))
865 acc.Add(uint64(val.n[6]) * uint64(val2.n[2]))
866 acc.Add(uint64(val.n[7]) * uint64(val2.n[1]))
867 t8 := uint64(acc.n[0])
868 acc.Rsh32()
869 // Terms for 2^(32*9).
870 acc.Add(uint64(val.n[2]) * uint64(val2.n[7]))
871 acc.Add(uint64(val.n[3]) * uint64(val2.n[6]))
872 acc.Add(uint64(val.n[4]) * uint64(val2.n[5]))
873 acc.Add(uint64(val.n[5]) * uint64(val2.n[4]))
874 acc.Add(uint64(val.n[6]) * uint64(val2.n[3]))
875 acc.Add(uint64(val.n[7]) * uint64(val2.n[2]))
876 t9 := uint64(acc.n[0])
877 acc.Rsh32()
878 // Terms for 2^(32*10).
879 acc.Add(uint64(val.n[3]) * uint64(val2.n[7]))
880 acc.Add(uint64(val.n[4]) * uint64(val2.n[6]))
881 acc.Add(uint64(val.n[5]) * uint64(val2.n[5]))
882 acc.Add(uint64(val.n[6]) * uint64(val2.n[4]))
883 acc.Add(uint64(val.n[7]) * uint64(val2.n[3]))
884 t10 := uint64(acc.n[0])
885 acc.Rsh32()
886 // Terms for 2^(32*11).
887 acc.Add(uint64(val.n[4]) * uint64(val2.n[7]))
888 acc.Add(uint64(val.n[5]) * uint64(val2.n[6]))
889 acc.Add(uint64(val.n[6]) * uint64(val2.n[5]))
890 acc.Add(uint64(val.n[7]) * uint64(val2.n[4]))
891 t11 := uint64(acc.n[0])
892 acc.Rsh32()
893 // Terms for 2^(32*12).
894 acc.Add(uint64(val.n[5]) * uint64(val2.n[7]))
895 acc.Add(uint64(val.n[6]) * uint64(val2.n[6]))
896 acc.Add(uint64(val.n[7]) * uint64(val2.n[5]))
897 t12 := uint64(acc.n[0])
898 acc.Rsh32()
899 // Terms for 2^(32*13).
900 acc.Add(uint64(val.n[6]) * uint64(val2.n[7]))
901 acc.Add(uint64(val.n[7]) * uint64(val2.n[6]))
902 t13 := uint64(acc.n[0])
903 acc.Rsh32()
904 // Terms for 2^(32*14).
905 acc.Add(uint64(val.n[7]) * uint64(val2.n[7]))
906 t14 := uint64(acc.n[0])
907 acc.Rsh32()
908 // What's left is for 2^(32*15).
909 t15 := uint64(acc.n[0])
910 // acc.Rsh32() // No need since not used after this. Guaranteed to be 0.
911 //
912 // At this point, all of the terms are grouped into their respective base
913 // and occupy up to 512 bits. Reduce the result accordingly.
914 s.reduce512(
915 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14,
916 t15,
917 )
918 return s
919 }
920 921 // Mul multiplies the passed scalar with the existing one modulo the group order
922 // in constant time and stores the result in s.
923 //
924 // The scalar is returned to support chaining. This enables syntax like:
925 // s.Mul(s2).AddInt(1) so that s = (s * s2) + 1.
926 func (s *ModNScalar) Mul(val *ModNScalar) *ModNScalar {
927 return s.Mul2(s, val)
928 }
929 930 // SquareVal squares the passed scalar modulo the group order in constant time
931 // and stores the result in s.
932 //
933 // The scalar is returned to support chaining. This enables syntax like:
934 // s3.SquareVal(s).Mul(s) so that s3 = s^2 * s = s^3.
935 func (s *ModNScalar) SquareVal(val *ModNScalar) *ModNScalar {
936 // This could technically be optimized slightly to take advantage of the
937 // fact that many of the intermediate calculations in squaring are just
938 // doubling, however, benchmarking has shown that due to the need to use a
939 // 96-bit accumulator, any savings are essentially offset by that and
940 // consequently there is no real difference in performance over just
941 // multiplying the value by itself to justify the extra code for now. This
942 // can be revisited in the future if it becomes a bottleneck in practice.
943 return s.Mul2(val, val)
944 }
945 946 // Square squares the scalar modulo the group order in constant time. The
947 // existing scalar is modified.
948 //
949 // The scalar is returned to support chaining. This enables syntax like:
950 // s.Square().Mul(s2) so that s = s^2 * s2.
951 func (s *ModNScalar) Square() *ModNScalar {
952 return s.SquareVal(s)
953 }
954 955 // NegateVal negates the passed scalar modulo the group order and stores the
956 // result in s in constant time.
957 //
958 // The scalar is returned to support chaining. This enables syntax like:
959 // s.NegateVal(s2).AddInt(1) so that s = -s2 + 1.
960 func (s *ModNScalar) NegateVal(val *ModNScalar) *ModNScalar {
961 // Since the scalar is already in the range 0 <= val < N, where N is the
962 // group order, negation modulo the group order is just the group order
963 // minus the value. This implies that the result will always be in the
964 // desired range with the sole exception of 0 because N - 0 = N itself.
965 //
966 // Therefore, in order to avoid the need to reduce the result for every
967 // other case in order to achieve constant time, this creates a mask that is
968 // all 0s in the case of the scalar being negated is 0 and all 1s otherwise
969 // and bitwise ands that mask with each word.
970 //
971 // Finally, to simplify the carry propagation, this adds the two's
972 // complement of the scalar to N in order to achieve the same result.
973 bits := val.n[0] | val.n[1] | val.n[2] | val.n[3] | val.n[4] | val.n[5] |
974 val.n[6] | val.n[7]
975 mask := uint64(uint32Mask * constantTimeNotEq(bits, 0))
976 c := uint64(orderWordZero) + (uint64(^val.n[0]) + 1)
977 s.n[0] = uint32(c & mask)
978 c = (c >> 32) + uint64(orderWordOne) + uint64(^val.n[1])
979 s.n[1] = uint32(c & mask)
980 c = (c >> 32) + uint64(orderWordTwo) + uint64(^val.n[2])
981 s.n[2] = uint32(c & mask)
982 c = (c >> 32) + uint64(orderWordThree) + uint64(^val.n[3])
983 s.n[3] = uint32(c & mask)
984 c = (c >> 32) + uint64(orderWordFour) + uint64(^val.n[4])
985 s.n[4] = uint32(c & mask)
986 c = (c >> 32) + uint64(orderWordFive) + uint64(^val.n[5])
987 s.n[5] = uint32(c & mask)
988 c = (c >> 32) + uint64(orderWordSix) + uint64(^val.n[6])
989 s.n[6] = uint32(c & mask)
990 c = (c >> 32) + uint64(orderWordSeven) + uint64(^val.n[7])
991 s.n[7] = uint32(c & mask)
992 return s
993 }
994 995 // Negate negates the scalar modulo the group order in constant time. The
996 // existing scalar is modified.
997 //
998 // The scalar is returned to support chaining. This enables syntax like:
999 // s.Negate().AddInt(1) so that s = -s + 1.
1000 func (s *ModNScalar) Negate() *ModNScalar { return s.NegateVal(s) }
1001 1002 // InverseValNonConst finds the modular multiplicative inverse of the passed
1003 // scalar and stores result in s in *non-constant* time.
1004 //
1005 // The scalar is returned to support chaining. This enables syntax like:
1006 // s3.InverseVal(s1).Mul(s2) so that s3 = s1^-1 * s2.
1007 func (s *ModNScalar) InverseValNonConst(val *ModNScalar) *ModNScalar {
1008 // This is making use of big integers for now. Ideally it will be replaced
1009 // with an implementation that does not depend on big integers.
1010 valBytes := val.Bytes()
1011 bigVal := (&big.Int{}).SetBytes(valBytes[:])
1012 bigVal.ModInverse(bigVal, Params().N)
1013 s.SetByteSlice(bigVal.Bytes())
1014 return s
1015 }
1016 1017 // InverseNonConst finds the modular multiplicative inverse of the scalar in
1018 // *non-constant* time. The existing scalar is modified.
1019 //
1020 // The scalar is returned to support chaining. This enables syntax like:
1021 // s.Inverse().Mul(s2) so that s = s^-1 * s2.
1022 func (s *ModNScalar) InverseNonConst() *ModNScalar {
1023 return s.InverseValNonConst(s)
1024 }
1025 1026 // IsOverHalfOrder returns whether the scalar exceeds the group order
1027 // divided by 2 in constant time.
1028 func (s *ModNScalar) IsOverHalfOrder() bool {
1029 // The intuition here is that the scalar is greater than half of the group
1030 // order if one of the higher individual words is greater than the
1031 // corresponding word of the half group order and all higher words in the
1032 // scalar are equal to their corresponding word of the half group order.
1033 //
1034 // Note that the words 4, 5, and 6 are all the max uint32 value, so there is
1035 // no need to test if those individual words of the scalar exceeds them,
1036 // hence, only equality is checked for them.
1037 result := constantTimeGreater(s.n[7], halfOrderWordSeven)
1038 highWordsEqual := constantTimeEq(s.n[7], halfOrderWordSeven)
1039 highWordsEqual &= constantTimeEq(s.n[6], halfOrderWordSix)
1040 highWordsEqual &= constantTimeEq(s.n[5], halfOrderWordFive)
1041 highWordsEqual &= constantTimeEq(s.n[4], halfOrderWordFour)
1042 result |= highWordsEqual & constantTimeGreater(s.n[3], halfOrderWordThree)
1043 highWordsEqual &= constantTimeEq(s.n[3], halfOrderWordThree)
1044 result |= highWordsEqual & constantTimeGreater(s.n[2], halfOrderWordTwo)
1045 highWordsEqual &= constantTimeEq(s.n[2], halfOrderWordTwo)
1046 result |= highWordsEqual & constantTimeGreater(s.n[1], halfOrderWordOne)
1047 highWordsEqual &= constantTimeEq(s.n[1], halfOrderWordOne)
1048 result |= highWordsEqual & constantTimeGreater(s.n[0], halfOrderWordZero)
1049 return result != 0
1050 }
1051