package gnarl // Scalar arithmetic modulo Q (the prime order of the Schnorr subgroup). // // Q = (P+1)/6 is a ~213-bit prime. Scalars are used for private keys, // nonces, challenges, and responses in the Schnorr protocol. // // N = P + 1 = 6Q. The full non-split torus has order N, and the // Schnorr subgroup has prime order Q. // // Representation: 4×uint64 little-endian limbs, reduced to [0, Q). // Like the Cayley scheme, scalars are NOT in Montgomery form. import ( "crypto/rand" "math/big" "math/bits" ) // scalar is a 256-bit value mod Q in 4 little-endian uint64 limbs. type scalar [4]uint64 // Q limbs (little-endian). Q = (P+1)/6. // Q hex = 0x18e5f11e7959f44807ba64d85a69a223c0776edda9942fad452629 // Q bits = 213 var qLimbs = scalar{ 0xdda9942fad452629, 0xd85a69a223c0776e, 0x1e7959f44807ba64, 0x000000000018e5f1, } // scZero is the zero scalar. var scZero scalar // scOneVal is the scalar 1. var scOneVal = scalar{1, 0, 0, 0} // scIsZero returns true if s == 0. func scIsZero(s *scalar) bool { return (s[0] | s[1] | s[2] | s[3]) == 0 } // scGte returns true if a >= b as unsigned 256-bit integers. func scGte(a, b *scalar) bool { _, borrow := bits.Sub64(a[0], b[0], 0) _, borrow = bits.Sub64(a[1], b[1], borrow) _, borrow = bits.Sub64(a[2], b[2], borrow) _, borrow = bits.Sub64(a[3], b[3], borrow) return borrow == 0 } // scReduce reduces s to [0, Q). // Q is ~213 bits but values can be up to 256 bits (4 full limbs), // so we use big.Int for the reduction rather than repeated subtraction. func scReduce(s *scalar) { if !scGte(s, &qLimbs) { return } // Convert to big.Int, reduce, convert back. val := new(big.Int) val.SetUint64(s[3]) val.Lsh(val, 64) val.Or(val, new(big.Int).SetUint64(s[2])) val.Lsh(val, 64) val.Or(val, new(big.Int).SetUint64(s[1])) val.Lsh(val, 64) val.Or(val, new(big.Int).SetUint64(s[0])) val.Mod(val, qBig) mask64 := new(big.Int).SetUint64(0xFFFFFFFFFFFFFFFF) s[0] = new(big.Int).And(val, mask64).Uint64() val.Rsh(val, 64) s[1] = new(big.Int).And(val, mask64).Uint64() val.Rsh(val, 64) s[2] = new(big.Int).And(val, mask64).Uint64() val.Rsh(val, 64) s[3] = val.Uint64() } // scAdd computes r = a + b mod Q. func scAdd(r, a, b *scalar) { var carry uint64 r[0], carry = bits.Add64(a[0], b[0], 0) r[1], carry = bits.Add64(a[1], b[1], carry) r[2], carry = bits.Add64(a[2], b[2], carry) r[3], carry = bits.Add64(a[3], b[3], carry) d0, borrow := bits.Sub64(r[0], qLimbs[0], 0) d1, borrow := bits.Sub64(r[1], qLimbs[1], borrow) d2, borrow := bits.Sub64(r[2], qLimbs[2], borrow) d3, borrow := bits.Sub64(r[3], qLimbs[3], borrow) underflow := borrow &^ carry mask := uint64(0) - underflow r[0] = (r[0] & mask) | (d0 & ^mask) r[1] = (r[1] & mask) | (d1 & ^mask) r[2] = (r[2] & mask) | (d2 & ^mask) r[3] = (r[3] & mask) | (d3 & ^mask) } // scSub computes r = a - b mod Q. func scSub(r, a, b *scalar) { var borrow uint64 r[0], borrow = bits.Sub64(a[0], b[0], 0) r[1], borrow = bits.Sub64(a[1], b[1], borrow) r[2], borrow = bits.Sub64(a[2], b[2], borrow) r[3], borrow = bits.Sub64(a[3], b[3], borrow) mask := uint64(0) - borrow var carry uint64 r[0], carry = bits.Add64(r[0], qLimbs[0]&mask, 0) r[1], carry = bits.Add64(r[1], qLimbs[1]&mask, carry) r[2], carry = bits.Add64(r[2], qLimbs[2]&mask, carry) r[3], _ = bits.Add64(r[3], qLimbs[3]&mask, carry) } // scNeg computes r = -a mod Q. func scNeg(r, a *scalar) { scSub(r, &scZero, a) } // scMul computes r = a * b mod Q. // Uses big.Int for the full 512-bit product and reduction (scalar muls are infrequent). func scMul(r, a, b *scalar) { var abuf, bbuf [27]byte scToBytes27(abuf[:], a) scToBytes27(bbuf[:], b) aBig := new(big.Int).SetBytes(abuf[:]) bBig := new(big.Int).SetBytes(bbuf[:]) prod := new(big.Int).Mul(aBig, bBig) prod.Mod(prod, qBig) var rbuf [27]byte prodBytes := prod.Bytes() copy(rbuf[27-len(prodBytes):], prodBytes) scFromBytes27(r, rbuf[:]) } // scFromBytes27 sets s from a 27-byte big-endian encoding. // The value is reduced mod Q. func scFromBytes27(s *scalar, b []byte) { if len(b) < 27 { *s = scZero return } // 27 bytes = 216 bits. Same layout as feFromBytes27: // b[0..2] → low 24 bits of limb[3] (bits 192..215) // b[3..10] → limb[2] (bits 128..191) // b[11..18] → limb[1] (bits 64..127) // b[19..26] → limb[0] (bits 0..63) s[3] = uint64(b[0])<<16 | uint64(b[1])<<8 | uint64(b[2]) s[2] = uint64(b[3])<<56 | uint64(b[4])<<48 | uint64(b[5])<<40 | uint64(b[6])<<32 | uint64(b[7])<<24 | uint64(b[8])<<16 | uint64(b[9])<<8 | uint64(b[10]) s[1] = uint64(b[11])<<56 | uint64(b[12])<<48 | uint64(b[13])<<40 | uint64(b[14])<<32 | uint64(b[15])<<24 | uint64(b[16])<<16 | uint64(b[17])<<8 | uint64(b[18]) s[0] = uint64(b[19])<<56 | uint64(b[20])<<48 | uint64(b[21])<<40 | uint64(b[22])<<32 | uint64(b[23])<<24 | uint64(b[24])<<16 | uint64(b[25])<<8 | uint64(b[26]) scReduce(s) } // scToBytes27 writes s to a 27-byte big-endian buffer. func scToBytes27(b []byte, s *scalar) { // limb[3] low 24 bits → b[0..2] b[0] = byte(s[3] >> 16) b[1] = byte(s[3] >> 8) b[2] = byte(s[3]) // limb[2] → b[3..10] b[3] = byte(s[2] >> 56) b[4] = byte(s[2] >> 48) b[5] = byte(s[2] >> 40) b[6] = byte(s[2] >> 32) b[7] = byte(s[2] >> 24) b[8] = byte(s[2] >> 16) b[9] = byte(s[2] >> 8) b[10] = byte(s[2]) // limb[1] → b[11..18] b[11] = byte(s[1] >> 56) b[12] = byte(s[1] >> 48) b[13] = byte(s[1] >> 40) b[14] = byte(s[1] >> 32) b[15] = byte(s[1] >> 24) b[16] = byte(s[1] >> 16) b[17] = byte(s[1] >> 8) b[18] = byte(s[1]) // limb[0] → b[19..26] b[19] = byte(s[0] >> 56) b[20] = byte(s[0] >> 48) b[21] = byte(s[0] >> 40) b[22] = byte(s[0] >> 32) b[23] = byte(s[0] >> 24) b[24] = byte(s[0] >> 16) b[25] = byte(s[0] >> 8) b[26] = byte(s[0]) } // scRandom generates a uniform random scalar in [1, Q). func scRandom(s *scalar) error { var buf [27]byte for { if _, err := rand.Read(buf[:]); err != nil { return err } // Clear top 5 bits to ensure < 2^213 (Q is ~213 bits). // Q top byte starts with 0x18 = 0001_1000, so clearing top 3 bits // gives values < 2^213 which covers Q's range. We'll reduce mod Q anyway. buf[0] &= 0x1F scFromBytes27(s, buf[:]) if !scIsZero(s) { return nil } } }