package crypto // Radix-3 Number-Theoretic Transform (NTT) over Z_271 for trinary Hamadryad. // // The trinary analogue of the radix-2 NTT in ntt.go. Transforms a // length-27 polynomial from coefficient form to evaluation form over Z_271, // enabling O(n log n) polynomial multiplication via pointwise multiply. // // For trinary Hamadryad: n=27 = 3^3, p=271. // // 271 is prime and 270 = 271-1 = 2 × 5 × 3^3, so Z_271* has order 270. // g = 6 is a primitive root (generator) of Z_271*. // psi = 6^(270/54) = 6^5 = 188 is a primitive 54th root of unity. // // For polynomials mod (x^27 + 1), we use the negacyclic NTT: // pre-multiply coefficients by powers of psi (a 54th root), // then apply a standard 27-point NTT with omega = psi^2 = 114 // (a primitive 27th root of unity). // // The radix-3 butterfly uses the primitive cube root of unity // w3 = omega^9 = 242, which satisfies 1 + w3 + w3^2 = 0 (mod 271). // GnarlP is the ring modulus for the trinary Hamadryad. const GnarlP = 271 // GnarlN is the ring dimension: degree of x^27 + 1. const GnarlN = 27 // mod271 reduces x into [0, 270]. x must be in [0, 73441). // Uses Barrett reduction: q = (x * 483) >> 17, r = x - q*271. // Magic constant: 483 = floor(2^17 / 271). All arithmetic fits in uint32. // Max intermediate: 73170 * 483 = 35,341,110 < 2^32. // //go:nosplit func mod271(x uint32) uint16 { q := (x * 483) >> 17 r := x - q*271 if r >= 271 { r -= 271 } return uint16(r) } // mod271s reduces a signed int into [0, 270]. Used only in test/rare paths. func mod271s(x int) uint16 { x %= GnarlP if x < 0 { x += GnarlP } return uint16(x) } // powMod271 computes base^exp mod 271 via binary exponentiation. func powMod271(base, exp int) uint16 { result := 1 b := base % GnarlP if b < 0 { b += GnarlP } for e := exp; e > 0; e >>= 1 { if e&1 == 1 { result = (result * b) % GnarlP } b = (b * b) % GnarlP } return uint16(result) } // invMod271 computes the modular inverse of a mod 271. // Uses Fermat's little theorem: a^(-1) = a^(p-2) mod p. func invMod271(a uint16) uint16 { return powMod271(int(a), GnarlP-2) } // digitRev3 reverses the base-3 digits of x with the given number of digits. // For n=27 = 3^3, we use 3 digits. func digitRev3(x, digits int) int { r := 0 for range digits { r = r*3 + x%3 x /= 3 } return r } // Pre-computed tables for the radix-3 NTT, filled by initNTT27Tables. var ( // psiPows27[i] = psi^i mod 271, for i=0..53. // psi = 188 (primitive 54th root of unity). psiPows27 [54]uint16 // psiInvPows27[i] = psi^(-i) mod 271. psiInvPows27 [54]uint16 // invN27 = 27^(-1) mod 271 = 261. invN27 uint16 ntt27TablesReady bool ) // initNTT27Tables populates the radix-3 NTT lookup tables. func initNTT27Tables() { if ntt27TablesReady { return } const psi = 188 // primitive 54th root of unity in Z_271 psiPows27[0] = 1 for i := 1; i < 54; i++ { psiPows27[i] = mod271(uint32(psiPows27[i-1]) * psi) } psiInv := invMod271(psi) psiInvPows27[0] = 1 for i := 1; i < 54; i++ { psiInvPows27[i] = mod271(uint32(psiInvPows27[i-1]) * uint32(psiInv)) } invN27 = invMod271(uint16(GnarlN)) ntt27TablesReady = true } func init() { initNTT27Tables() } // ntt27 computes the forward negacyclic NTT of a length-27 polynomial over Z_271. // // The negacyclic NTT evaluates the polynomial at the roots of (x^27 + 1), // which are psi^(2k+1) for k=0..26. Implementation: // 1. Pre-multiply: a[i] *= psi^i // 2. Radix-3 DIT NTT with omega = psi^2 (primitive 27th root of unity) // // The radix-3 butterfly for indices [j, j+m, j+2m] with twiddle w: // // a0' = a0 + w*a1 + w^2*a2 // a1' = a0 + w*a1*w3 + w^2*a2*w3^2 // a2' = a0 + w*a1*w3^2 + w^2*a2*w3 // // where w3 = omega^(27/3) = omega^9 = 242 is the primitive cube root of unity. func ntt27(a *[GnarlN]uint16) { // Fully-unrolled negacyclic NTT over Z_271, n=27 = 3^3. // Three stages of radix-3 butterflies with precomputed twiddles. // Step 1: pre-multiply by psi^i for negacyclic. // psi = 188, psiPows27 = [1,188,114,23,259,183,258,266,144,243,156,60, // 169,65,25,93,140,33,242,239,217,146,77,113,106,145,160] // a[0] *= 1 (no-op) a[1] = mod271(uint32(a[1]) * 188) a[2] = mod271(uint32(a[2]) * 114) a[3] = mod271(uint32(a[3]) * 23) a[4] = mod271(uint32(a[4]) * 259) a[5] = mod271(uint32(a[5]) * 183) a[6] = mod271(uint32(a[6]) * 258) a[7] = mod271(uint32(a[7]) * 266) a[8] = mod271(uint32(a[8]) * 144) a[9] = mod271(uint32(a[9]) * 243) a[10] = mod271(uint32(a[10]) * 156) a[11] = mod271(uint32(a[11]) * 60) a[12] = mod271(uint32(a[12]) * 169) a[13] = mod271(uint32(a[13]) * 65) a[14] = mod271(uint32(a[14]) * 25) a[15] = mod271(uint32(a[15]) * 93) a[16] = mod271(uint32(a[16]) * 140) a[17] = mod271(uint32(a[17]) * 33) a[18] = mod271(uint32(a[18]) * 242) a[19] = mod271(uint32(a[19]) * 239) a[20] = mod271(uint32(a[20]) * 217) a[21] = mod271(uint32(a[21]) * 146) a[22] = mod271(uint32(a[22]) * 77) a[23] = mod271(uint32(a[23]) * 113) a[24] = mod271(uint32(a[24]) * 106) a[25] = mod271(uint32(a[25]) * 145) a[26] = mod271(uint32(a[26]) * 160) // Step 2: digit-reversal permutation (9 swaps). a[1], a[9] = a[9], a[1] a[2], a[18] = a[18], a[2] a[4], a[12] = a[12], a[4] a[5], a[21] = a[21], a[5] a[7], a[15] = a[15], a[7] a[8], a[24] = a[24], a[8] a[11], a[19] = a[19], a[11] a[14], a[22] = a[22], a[14] a[17], a[25] = a[25], a[17] // Step 3: radix-3 butterfly stages (fully unrolled). // w3 = 242, w3sq = 28. All twiddles precomputed. // --- Stage 0: stride=1, groupSize=3, all tw1=tw2=1 --- // 9 butterflies, each: b0=a0+a1+a2, b1=a0+242*a1+28*a2, b2=a0+28*a1+242*a2 nttBfly1(a, 0, 1, 2) nttBfly1(a, 3, 4, 5) nttBfly1(a, 6, 7, 8) nttBfly1(a, 9, 10, 11) nttBfly1(a, 12, 13, 14) nttBfly1(a, 15, 16, 17) nttBfly1(a, 18, 19, 20) nttBfly1(a, 21, 22, 23) nttBfly1(a, 24, 25, 26) // --- Stage 1: stride=3, groupSize=9, twiddleStep=3 --- // j=0: tw1=1, tw2=1 (same as stage 0 kernel) nttBfly1(a, 0, 3, 6) nttBfly1(a, 9, 12, 15) nttBfly1(a, 18, 21, 24) // j=1: tw1=258, tw2=169 nttBfly(a, 1, 4, 7, 258, 169) nttBfly(a, 10, 13, 16, 258, 169) nttBfly(a, 19, 22, 25, 258, 169) // j=2: tw1=169, tw2=106 nttBfly(a, 2, 5, 8, 169, 106) nttBfly(a, 11, 14, 17, 169, 106) nttBfly(a, 20, 23, 26, 169, 106) // --- Stage 2: stride=9, groupSize=27, twiddleStep=1 --- // j=0: tw1=1, tw2=1 nttBfly1(a, 0, 9, 18) // j=1: tw1=114, tw2=259 nttBfly(a, 1, 10, 19, 114, 259) // j=2: tw1=259, tw2=144 nttBfly(a, 2, 11, 20, 259, 144) // j=3: tw1=258, tw2=169 nttBfly(a, 3, 12, 21, 258, 169) // j=4: tw1=144, tw2=140 nttBfly(a, 4, 13, 22, 144, 140) // j=5: tw1=156, tw2=217 nttBfly(a, 5, 14, 23, 156, 217) // j=6: tw1=169, tw2=106 nttBfly(a, 6, 15, 24, 169, 106) // j=7: tw1=25, tw2=83 nttBfly(a, 7, 16, 25, 25, 83) // j=8: tw1=140, tw2=88 nttBfly(a, 8, 17, 26, 140, 88) } // nttBfly1 performs a radix-3 butterfly with trivial twiddles (tw1=tw2=1). // b0 = a0 + a1 + a2, b1 = a0 + 242*a1 + 28*a2, b2 = a0 + 28*a1 + 242*a2 // //go:nosplit func nttBfly1(a *[27]uint16, i0, i1, i2 int) { v0 := uint32(a[i0]) v1 := uint32(a[i1]) v2 := uint32(a[i2]) a[i0] = mod271(v0 + v1 + v2) a[i1] = mod271(v0 + uint32(mod271(v1*242)) + uint32(mod271(v2*28))) a[i2] = mod271(v0 + uint32(mod271(v1*28)) + uint32(mod271(v2*242))) } // nttBfly performs a radix-3 butterfly with given twiddle factors tw1, tw2. // //go:nosplit func nttBfly(a *[27]uint16, i0, i1, i2 int, tw1, tw2 uint32) { v0 := uint32(a[i0]) a1tw := uint32(mod271(uint32(a[i1]) * tw1)) a2tw := uint32(mod271(uint32(a[i2]) * tw2)) a[i0] = mod271(v0 + a1tw + a2tw) a[i1] = mod271(v0 + uint32(mod271(a1tw*242)) + uint32(mod271(a2tw*28))) a[i2] = mod271(v0 + uint32(mod271(a1tw*28)) + uint32(mod271(a2tw*242))) } // intt27 computes the inverse negacyclic NTT, recovering coefficients. // // Uses the Decimation-In-Frequency (DIF) structure: butterflies top-down // (largest group first), followed by digit-reversal permutation. // This is the natural inverse of the forward DIT NTT. func intt27(a *[GnarlN]uint16) { // Fully-unrolled inverse negacyclic NTT (DIF, top-down). // --- Stage 2: groupSize=27, stride=9, twiddleStep=1 --- inttBfly1(a, 0, 9, 18) inttBfly(a, 1, 10, 19, 126, 158) inttBfly(a, 2, 11, 20, 158, 32) inttBfly(a, 3, 12, 21, 125, 178) inttBfly(a, 4, 13, 22, 32, 211) inttBfly(a, 5, 14, 23, 238, 5) inttBfly(a, 6, 15, 24, 178, 248) inttBfly(a, 7, 16, 25, 206, 160) inttBfly(a, 8, 17, 26, 211, 77) // --- Stage 1: groupSize=9, stride=3, twiddleStep=3 --- inttBfly1(a, 0, 3, 6) inttBfly(a, 1, 4, 7, 125, 178) inttBfly(a, 2, 5, 8, 178, 248) inttBfly1(a, 9, 12, 15) inttBfly(a, 10, 13, 16, 125, 178) inttBfly(a, 11, 14, 17, 178, 248) inttBfly1(a, 18, 21, 24) inttBfly(a, 19, 22, 25, 125, 178) inttBfly(a, 20, 23, 26, 178, 248) // --- Stage 0: groupSize=3, stride=1, twiddleStep=9 (all trivial) --- inttBfly1(a, 0, 1, 2) inttBfly1(a, 3, 4, 5) inttBfly1(a, 6, 7, 8) inttBfly1(a, 9, 10, 11) inttBfly1(a, 12, 13, 14) inttBfly1(a, 15, 16, 17) inttBfly1(a, 18, 19, 20) inttBfly1(a, 21, 22, 23) inttBfly1(a, 24, 25, 26) // Digit-reversal permutation (9 swaps). a[1], a[9] = a[9], a[1] a[2], a[18] = a[18], a[2] a[4], a[12] = a[12], a[4] a[5], a[21] = a[21], a[5] a[7], a[15] = a[15], a[7] a[8], a[24] = a[24], a[8] a[11], a[19] = a[19], a[11] a[14], a[22] = a[22], a[14] a[17], a[25] = a[25], a[17] // Post-multiply: fused invN27 * psiInvPows27[i] for each coefficient. a[0] = mod271(uint32(a[0]) * 261) a[1] = mod271(uint32(a[1]) * 245) a[2] = mod271(uint32(a[2]) * 95) a[3] = mod271(uint32(a[3]) * 247) a[4] = mod271(uint32(a[4]) * 46) a[5] = mod271(uint32(a[5]) * 228) a[6] = mod271(uint32(a[6]) * 105) a[7] = mod271(uint32(a[7]) * 2) a[8] = mod271(uint32(a[8]) * 222) a[9] = mod271(uint32(a[9]) * 252) a[10] = mod271(uint32(a[10]) * 59) a[11] = mod271(uint32(a[11]) * 45) a[12] = mod271(uint32(a[12]) * 117) a[13] = mod271(uint32(a[13]) * 250) a[14] = mod271(uint32(a[14]) * 108) a[15] = mod271(uint32(a[15]) * 64) a[16] = mod271(uint32(a[16]) * 58) a[17] = mod271(uint32(a[17]) * 205) a[18] = mod271(uint32(a[18]) * 262) a[19] = mod271(uint32(a[19]) * 85) a[20] = mod271(uint32(a[20]) * 221) a[21] = mod271(uint32(a[21]) * 141) a[22] = mod271(uint32(a[22]) * 204) a[23] = mod271(uint32(a[23]) * 151) a[24] = mod271(uint32(a[24]) * 230) a[25] = mod271(uint32(a[25]) * 56) a[26] = mod271(uint32(a[26]) * 254) } // inttBfly1 performs an inverse radix-3 DIF butterfly with trivial twiddles (tw1inv=tw2inv=1). // b0 = a0 + a1 + a2, b1 = a0 + 28*a1 + 242*a2, b2 = a0 + 242*a1 + 28*a2 // //go:nosplit func inttBfly1(a *[27]uint16, i0, i1, i2 int) { v0 := uint32(a[i0]) v1 := uint32(a[i1]) v2 := uint32(a[i2]) a[i0] = mod271(v0 + v1 + v2) a[i1] = mod271(v0 + uint32(mod271(v1*28)) + uint32(mod271(v2*242))) a[i2] = mod271(v0 + uint32(mod271(v1*242)) + uint32(mod271(v2*28))) } // inttBfly performs an inverse radix-3 DIF butterfly with given twiddle factors. // After the butterfly, b1 *= tw1inv, b2 *= tw2inv. // //go:nosplit func inttBfly(a *[27]uint16, i0, i1, i2 int, tw1inv, tw2inv uint32) { v0 := uint32(a[i0]) v1 := uint32(a[i1]) v2 := uint32(a[i2]) a[i0] = mod271(v0 + v1 + v2) b1 := mod271(v0 + uint32(mod271(v1*28)) + uint32(mod271(v2*242))) b2 := mod271(v0 + uint32(mod271(v1*242)) + uint32(mod271(v2*28))) a[i1] = mod271(uint32(b1) * tw1inv) a[i2] = mod271(uint32(b2) * tw2inv) }