package gnarl

// Scalar arithmetic modulo Q (the prime order of the Schnorr subgroup).
//
// Q = (P+1)/6 is a ~213-bit prime. Scalars are used for private keys,
// nonces, challenges, and responses in the Schnorr protocol.
//
// N = P + 1 = 6Q. The full non-split torus has order N, and the
// Schnorr subgroup has prime order Q.
//
// Representation: 4×uint64 little-endian limbs, reduced to [0, Q).
// Like the Cayley scheme, scalars are NOT in Montgomery form.

import (
	"crypto/rand"
	"math/big"
	"math/bits"
)

// scalar is a 256-bit value mod Q in 4 little-endian uint64 limbs.
type scalar [4]uint64

// Q limbs (little-endian). Q = (P+1)/6.
// Q hex = 0x18e5f11e7959f44807ba64d85a69a223c0776edda9942fad452629
// Q bits = 213
var qLimbs = scalar{
	0xdda9942fad452629,
	0xd85a69a223c0776e,
	0x1e7959f44807ba64,
	0x000000000018e5f1,
}

// scZero is the zero scalar.
var scZero scalar

// scOneVal is the scalar 1.
var scOneVal = scalar{1, 0, 0, 0}

// scIsZero returns true if s == 0.
func scIsZero(s *scalar) bool {
	return (s[0] | s[1] | s[2] | s[3]) == 0
}

// scGte returns true if a >= b as unsigned 256-bit integers.
func scGte(a, b *scalar) bool {
	_, borrow := bits.Sub64(a[0], b[0], 0)
	_, borrow = bits.Sub64(a[1], b[1], borrow)
	_, borrow = bits.Sub64(a[2], b[2], borrow)
	_, borrow = bits.Sub64(a[3], b[3], borrow)
	return borrow == 0
}

// scReduce reduces s to [0, Q).
// Q is ~213 bits but values can be up to 256 bits (4 full limbs),
// so we use big.Int for the reduction rather than repeated subtraction.
func scReduce(s *scalar) {
	if !scGte(s, &qLimbs) {
		return
	}
	// Convert to big.Int, reduce, convert back.
	val := new(big.Int)
	val.SetUint64(s[3])
	val.Lsh(val, 64)
	val.Or(val, new(big.Int).SetUint64(s[2]))
	val.Lsh(val, 64)
	val.Or(val, new(big.Int).SetUint64(s[1]))
	val.Lsh(val, 64)
	val.Or(val, new(big.Int).SetUint64(s[0]))
	val.Mod(val, qBig)

	mask64 := new(big.Int).SetUint64(0xFFFFFFFFFFFFFFFF)
	s[0] = new(big.Int).And(val, mask64).Uint64()
	val.Rsh(val, 64)
	s[1] = new(big.Int).And(val, mask64).Uint64()
	val.Rsh(val, 64)
	s[2] = new(big.Int).And(val, mask64).Uint64()
	val.Rsh(val, 64)
	s[3] = val.Uint64()
}

// scAdd computes r = a + b mod Q.
func scAdd(r, a, b *scalar) {
	var carry uint64
	r[0], carry = bits.Add64(a[0], b[0], 0)
	r[1], carry = bits.Add64(a[1], b[1], carry)
	r[2], carry = bits.Add64(a[2], b[2], carry)
	r[3], carry = bits.Add64(a[3], b[3], carry)

	d0, borrow := bits.Sub64(r[0], qLimbs[0], 0)
	d1, borrow := bits.Sub64(r[1], qLimbs[1], borrow)
	d2, borrow := bits.Sub64(r[2], qLimbs[2], borrow)
	d3, borrow := bits.Sub64(r[3], qLimbs[3], borrow)

	underflow := borrow &^ carry
	mask := uint64(0) - underflow
	r[0] = (r[0] & mask) | (d0 & ^mask)
	r[1] = (r[1] & mask) | (d1 & ^mask)
	r[2] = (r[2] & mask) | (d2 & ^mask)
	r[3] = (r[3] & mask) | (d3 & ^mask)
}

// scSub computes r = a - b mod Q.
func scSub(r, a, b *scalar) {
	var borrow uint64
	r[0], borrow = bits.Sub64(a[0], b[0], 0)
	r[1], borrow = bits.Sub64(a[1], b[1], borrow)
	r[2], borrow = bits.Sub64(a[2], b[2], borrow)
	r[3], borrow = bits.Sub64(a[3], b[3], borrow)

	mask := uint64(0) - borrow
	var carry uint64
	r[0], carry = bits.Add64(r[0], qLimbs[0]&mask, 0)
	r[1], carry = bits.Add64(r[1], qLimbs[1]&mask, carry)
	r[2], carry = bits.Add64(r[2], qLimbs[2]&mask, carry)
	r[3], _ = bits.Add64(r[3], qLimbs[3]&mask, carry)
}

// scNeg computes r = -a mod Q.
func scNeg(r, a *scalar) {
	scSub(r, &scZero, a)
}

// scMul computes r = a * b mod Q.
// Uses big.Int for the full 512-bit product and reduction (scalar muls are infrequent).
func scMul(r, a, b *scalar) {
	var abuf, bbuf [27]byte
	scToBytes27(abuf[:], a)
	scToBytes27(bbuf[:], b)
	aBig := new(big.Int).SetBytes(abuf[:])
	bBig := new(big.Int).SetBytes(bbuf[:])
	prod := new(big.Int).Mul(aBig, bBig)
	prod.Mod(prod, qBig)

	var rbuf [27]byte
	prodBytes := prod.Bytes()
	copy(rbuf[27-len(prodBytes):], prodBytes)
	scFromBytes27(r, rbuf[:])
}

// scFromBytes27 sets s from a 27-byte big-endian encoding.
// The value is reduced mod Q.
func scFromBytes27(s *scalar, b []byte) {
	if len(b) < 27 {
		*s = scZero
		return
	}
	// 27 bytes = 216 bits. Same layout as feFromBytes27:
	// b[0..2] → low 24 bits of limb[3] (bits 192..215)
	// b[3..10] → limb[2] (bits 128..191)
	// b[11..18] → limb[1] (bits 64..127)
	// b[19..26] → limb[0] (bits 0..63)
	s[3] = uint64(b[0])<<16 | uint64(b[1])<<8 | uint64(b[2])
	s[2] = uint64(b[3])<<56 | uint64(b[4])<<48 | uint64(b[5])<<40 | uint64(b[6])<<32 |
		uint64(b[7])<<24 | uint64(b[8])<<16 | uint64(b[9])<<8 | uint64(b[10])
	s[1] = uint64(b[11])<<56 | uint64(b[12])<<48 | uint64(b[13])<<40 | uint64(b[14])<<32 |
		uint64(b[15])<<24 | uint64(b[16])<<16 | uint64(b[17])<<8 | uint64(b[18])
	s[0] = uint64(b[19])<<56 | uint64(b[20])<<48 | uint64(b[21])<<40 | uint64(b[22])<<32 |
		uint64(b[23])<<24 | uint64(b[24])<<16 | uint64(b[25])<<8 | uint64(b[26])
	scReduce(s)
}

// scToBytes27 writes s to a 27-byte big-endian buffer.
func scToBytes27(b []byte, s *scalar) {
	// limb[3] low 24 bits → b[0..2]
	b[0] = byte(s[3] >> 16)
	b[1] = byte(s[3] >> 8)
	b[2] = byte(s[3])
	// limb[2] → b[3..10]
	b[3] = byte(s[2] >> 56)
	b[4] = byte(s[2] >> 48)
	b[5] = byte(s[2] >> 40)
	b[6] = byte(s[2] >> 32)
	b[7] = byte(s[2] >> 24)
	b[8] = byte(s[2] >> 16)
	b[9] = byte(s[2] >> 8)
	b[10] = byte(s[2])
	// limb[1] → b[11..18]
	b[11] = byte(s[1] >> 56)
	b[12] = byte(s[1] >> 48)
	b[13] = byte(s[1] >> 40)
	b[14] = byte(s[1] >> 32)
	b[15] = byte(s[1] >> 24)
	b[16] = byte(s[1] >> 16)
	b[17] = byte(s[1] >> 8)
	b[18] = byte(s[1])
	// limb[0] → b[19..26]
	b[19] = byte(s[0] >> 56)
	b[20] = byte(s[0] >> 48)
	b[21] = byte(s[0] >> 40)
	b[22] = byte(s[0] >> 32)
	b[23] = byte(s[0] >> 24)
	b[24] = byte(s[0] >> 16)
	b[25] = byte(s[0] >> 8)
	b[26] = byte(s[0])
}

// scRandom generates a uniform random scalar in [1, Q).
func scRandom(s *scalar) error {
	var buf [27]byte
	for {
		if _, err := rand.Read(buf[:]); err != nil {
			return err
		}
		// Clear top 5 bits to ensure < 2^213 (Q is ~213 bits).
		// Q top byte starts with 0x18 = 0001_1000, so clearing top 3 bits
		// gives values < 2^213 which covers Q's range. We'll reduce mod Q anyway.
		buf[0] &= 0x1F
		scFromBytes27(s, buf[:])
		if !scIsZero(s) {
			return nil
		}
	}
}