recognizer.go raw

   1  // Searchable encryption via homomorphic pattern matching (the recognizer).
   2  //
   3  // The recognizer evaluates pattern-matching circuits on encrypted data.
   4  // Neither the data nor the query pattern are revealed to the evaluator.
   5  //
   6  // Architecture:
   7  //
   8  //   Data owner:
   9  //     1. Encrypts each bit of the data: c_i = HEEncrypt(pk, data[i])
  10  //     2. Sends encrypted data to the server.
  11  //
  12  //   Query maker:
  13  //     1. Defines a pattern as a Boolean circuit (AND, XOR, NOT gates).
  14  //     2. Encrypts query parameters: q_j = HEEncrypt(pk, pattern[j])
  15  //     3. Sends encrypted query to the server.
  16  //
  17  //   Server (evaluator):
  18  //     1. Evaluates the circuit homomorphically on encrypted data + query.
  19  //     2. Returns encrypted result bit: Enc(match?) to the query maker.
  20  //     3. Never sees plaintext data, query, or result.
  21  //
  22  //   Query maker:
  23  //     1. Decrypts the result: match = HEDecrypt(sk, result)
  24  //     2. Learns only whether the pattern matched, nothing else.
  25  //
  26  // Circuit structure for byte-level matching:
  27  //
  28  //   "Does position i contain byte value v?"
  29  //   For each bit position b ∈ [0,7]:
  30  //     match_b = XNOR(data[i*8+b], v_b)      // bit equality
  31  //             = NOT(XOR(data[i*8+b], v_b))
  32  //   match_i = AND(match_0, match_1, ..., match_7)   // all 8 bits match
  33  //
  34  // The circuit depth is:
  35  //   - XOR: depth 0 (addition, no noise growth)
  36  //   - NOT: depth 0 (constant addition)
  37  //   - AND: depth 1 per gate (multiplication, quadratic noise growth)
  38  //
  39  // For byte matching: 1 level of AND (8-way AND via tree of 2-ANDs = 3 levels).
  40  // With HE64 parameters (1 multiplication level), we can do 1 AND gate per path,
  41  // which suffices for single-bit pattern tests. For full byte matching,
  42  // we'd need a deeper noise budget or modulus switching.
  43  //
  44  // Practical approach: use the 1-level budget for the critical AND gate,
  45  // and batch the XOR/NOT operations (which are free in noise) to reduce
  46  // the circuit to a single AND depth.
  47  
  48  package ring
  49  
  50  import (
  51  	"crypto/rand"
  52  	"io"
  53  )
  54  
  55  // EncryptedBitVector is a sequence of homomorphically encrypted bits.
  56  type EncryptedBitVector struct {
  57  	Bits   []*HECiphertext
  58  	PK     *KEMPublicKey
  59  	params KEMParams
  60  }
  61  
  62  // EncryptBits encrypts a byte slice as a bit vector.
  63  // Each bit gets its own ciphertext.
  64  func EncryptBits(pk *KEMPublicKey, data []byte) *EncryptedBitVector {
  65  	return EncryptBitsFrom(pk, data, rand.Reader)
  66  }
  67  
  68  // EncryptBitsFrom encrypts using the given randomness source.
  69  func EncryptBitsFrom(pk *KEMPublicKey, data []byte, rng io.Reader) *EncryptedBitVector {
  70  	bits := make([]*HECiphertext, len(data)*8)
  71  	for i, b := range data {
  72  		for j := range 8 {
  73  			bit := int((b >> j) & 1)
  74  			bits[i*8+j] = HEEncryptFrom(pk, bit, rng)
  75  		}
  76  	}
  77  	return &EncryptedBitVector{
  78  		Bits:   bits,
  79  		PK:     pk,
  80  		params: pk.P,
  81  	}
  82  }
  83  
  84  // DecryptBits decrypts an encrypted bit vector back to bytes.
  85  func DecryptBits(sk *KEMSecretKey, ebv *EncryptedBitVector) []byte {
  86  	nBytes := (len(ebv.Bits) + 7) / 8
  87  	result := make([]byte, nBytes)
  88  	for i, ct := range ebv.Bits {
  89  		bit := HEDecrypt(sk, ct)
  90  		if bit != 0 {
  91  			result[i/8] |= 1 << (i % 8)
  92  		}
  93  	}
  94  	return result
  95  }
  96  
  97  // EncryptedPattern represents an encrypted search pattern.
  98  type EncryptedPattern struct {
  99  	// PatternBits are the encrypted pattern bits.
 100  	PatternBits []*HECiphertext
 101  
 102  	// Mask indicates which positions are "don't care" (wildcard).
 103  	// nil means all positions must match.
 104  	Mask []bool
 105  }
 106  
 107  // EncryptPattern encrypts a pattern byte for searching.
 108  // Wildcards can be specified via the mask (true = must match, false = wildcard).
 109  func EncryptPattern(pk *KEMPublicKey, pattern []byte, mask []bool) *EncryptedPattern {
 110  	bits := make([]*HECiphertext, len(pattern)*8)
 111  	for i, b := range pattern {
 112  		for j := range 8 {
 113  			bit := int((b >> j) & 1)
 114  			bits[i*8+j] = HEEncrypt(pk, bit)
 115  		}
 116  	}
 117  
 118  	// Expand mask to bit level.
 119  	var bitMask []bool
 120  	if mask != nil {
 121  		bitMask = make([]bool, len(pattern)*8)
 122  		for i, m := range mask {
 123  			for j := range 8 {
 124  				bitMask[i*8+j] = m
 125  			}
 126  		}
 127  	}
 128  
 129  	return &EncryptedPattern{
 130  		PatternBits: bits,
 131  		Mask:        bitMask,
 132  	}
 133  }
 134  
 135  // MatchBit evaluates whether a single encrypted data bit equals
 136  // a single encrypted pattern bit. Returns Enc(data_bit XNOR pattern_bit).
 137  //
 138  // XNOR(a, b) = NOT(XOR(a, b)) = NOT(a + b) = 1 + a + b (mod 2).
 139  // In the ciphertext domain: HEAdd(HEAdd(enc_a, enc_b), Enc(1))... but
 140  // that's HEAdd(HEAdd(a, b), one). Actually NOT(XOR(a,b)):
 141  // = HENot(HEAdd(a, b)) = HENot(HEXOR(a, b))
 142  //
 143  // This is depth-0 (no multiplication), so it's free in noise budget.
 144  func MatchBit(dataBit, patternBit *HECiphertext) *HECiphertext {
 145  	xored := HEXOR(dataBit, patternBit) // 1 if bits differ
 146  	return HENot(xored)                 // 1 if bits equal
 147  }
 148  
 149  // MatchByte evaluates whether 8 consecutive data bits equal the
 150  // pattern byte. Returns Enc(all_8_bits_match).
 151  //
 152  // This requires AND-ing 8 bit-equality results together.
 153  // With depth-1 budget, we can do ONE AND gate total.
 154  // For 8 bits, we need log2(8) = 3 levels of AND — too deep.
 155  //
 156  // Solution: reduce to a single AND using batch XOR.
 157  // The trick: instead of checking each bit individually,
 158  // compute the Hamming distance homomorphically.
 159  //
 160  // Alternative single-depth approach:
 161  // Σ (data_b XOR pattern_b) = 0 iff all bits match.
 162  // In BGV mod 2: sum of XORs is just XOR of all bit-differences.
 163  // If any bit differs, the sum is 1 (odd count = nonzero).
 164  // But sum mod 2 doesn't detect even numbers of differences.
 165  //
 166  // Better: use the identity for AND of XNOR results.
 167  // For a single pair, just return the XNOR (depth 0).
 168  // The caller decides how many pairs to AND together based on budget.
 169  func MatchByte(dataBits, patternBits []*HECiphertext, rlk *RelinearizationKey) *HECiphertext {
 170  	if len(dataBits) != 8 || len(patternBits) != 8 {
 171  		panic("MatchByte requires exactly 8 bits each")
 172  	}
 173  
 174  	// Compute XNOR for each bit position.
 175  	matches := make([]*HECiphertext, 8)
 176  	for i := range 8 {
 177  		matches[i] = MatchBit(dataBits[i], patternBits[i])
 178  	}
 179  
 180  	// AND tree: depth log2(8) = 3.
 181  	// Level 1: AND pairs → 4 results (depth 1).
 182  	// Level 2: AND pairs → 2 results (depth 2).
 183  	// Level 3: AND pair → 1 result (depth 3).
 184  	//
 185  	// With depth-1 budget, we can only do level 1.
 186  	// Return the AND of the first pair as a partial match indicator.
 187  	//
 188  	// For full byte matching, use MatchByteSingle which reduces
 189  	// to a single multiplication.
 190  	return andTree(matches, rlk)
 191  }
 192  
 193  // MatchByteSingle evaluates byte equality using a single AND gate.
 194  //
 195  // Trick: XOR all 8 bit-differences together, then check if result is 0.
 196  // XOR(d0⊕p0, d1⊕p1, ..., d7⊕p7) = 0 iff ALL bits match... no, it's 0
 197  // iff an EVEN number of bits differ. That's not what we want.
 198  //
 199  // Alternative trick using the OR-NOT equivalence:
 200  // All bits match = NOT(any bit differs)
 201  // Any bit differs = OR(d0⊕p0, d1⊕p1, ..., d7⊕p7)
 202  // OR = NOT(AND(NOT(x), NOT(y), ...))
 203  //
 204  // So: allMatch = NOT(OR(diffs)) = AND(NOT(d0⊕p0), NOT(d1⊕p1), ...)
 205  //             = AND(XNOR(d0,p0), XNOR(d1,p1), ...)
 206  // This is the same 8-way AND, still depth 3.
 207  //
 208  // With depth-1 limitation, the practical approach is:
 209  // Split into two 4-bit groups, AND each group (1 level), then
 210  // the caller can AND the two groups in a second pass with fresh keys.
 211  //
 212  // Or: use the "indicator polynomial" trick:
 213  // f(x) = 1 - x evaluates to 1 at x=0 and 0 at x=1.
 214  // For the sum s = Σ (d_i ⊕ p_i), we want f(s) = 1 iff s = 0.
 215  // But s is the XOR (mod 2), not the integer sum, so this doesn't work directly.
 216  //
 217  // Practical depth-1 solution:
 218  // AND the first pair, returning Enc(bit0_matches AND bit1_matches).
 219  // The server can report partial match results for each pair of bits,
 220  // and the client combines them.
 221  func MatchByteSingle(dataBits, patternBits []*HECiphertext, rlk *RelinearizationKey) []*HECiphertext {
 222  	if len(dataBits) != 8 || len(patternBits) != 8 {
 223  		panic("MatchByteSingle requires exactly 8 bits each")
 224  	}
 225  
 226  	// Compute XNOR (bit equality) for each position.
 227  	xnors := make([]*HECiphertext, 8)
 228  	for i := range 8 {
 229  		xnors[i] = MatchBit(dataBits[i], patternBits[i])
 230  	}
 231  
 232  	// AND adjacent pairs (depth 1).
 233  	results := make([]*HECiphertext, 4)
 234  	for i := range 4 {
 235  		results[i] = HEAND(xnors[2*i], xnors[2*i+1], rlk)
 236  	}
 237  
 238  	return results
 239  }
 240  
 241  // SearchAt evaluates whether encrypted data starting at position pos
 242  // matches the encrypted pattern. Returns per-pair AND results.
 243  func SearchAt(data *EncryptedBitVector, pattern *EncryptedPattern, pos int, rlk *RelinearizationKey) []*HECiphertext {
 244  	patLen := len(pattern.PatternBits) / 8 // pattern length in bytes
 245  	if pos*8+patLen*8 > len(data.Bits) {
 246  		return nil // pattern would extend beyond data
 247  	}
 248  
 249  	// For each byte of the pattern, compute pair-AND results.
 250  	var allResults []*HECiphertext
 251  	for byteIdx := range patLen {
 252  		dataStart := (pos + byteIdx) * 8
 253  		patStart := byteIdx * 8
 254  
 255  		// Skip wildcard bytes.
 256  		if pattern.Mask != nil {
 257  			isWild := true
 258  			for b := range 8 {
 259  				if pattern.Mask[patStart+b] {
 260  					isWild = false
 261  					break
 262  				}
 263  			}
 264  			if isWild {
 265  				continue
 266  			}
 267  		}
 268  
 269  		pairs := MatchByteSingle(
 270  			data.Bits[dataStart:dataStart+8],
 271  			pattern.PatternBits[patStart:patStart+8],
 272  			rlk,
 273  		)
 274  		allResults = append(allResults, pairs...)
 275  	}
 276  
 277  	return allResults
 278  }
 279  
 280  // andTree computes a tree of AND operations.
 281  // If the depth exceeds the noise budget, the result may not decrypt correctly.
 282  func andTree(inputs []*HECiphertext, rlk *RelinearizationKey) *HECiphertext {
 283  	if len(inputs) == 0 {
 284  		return nil
 285  	}
 286  	if len(inputs) == 1 {
 287  		return inputs[0]
 288  	}
 289  
 290  	// Reduce pairwise.
 291  	for len(inputs) > 1 {
 292  		var next []*HECiphertext
 293  		for i := 0; i+1 < len(inputs); i += 2 {
 294  			next = append(next, HEAND(inputs[i], inputs[i+1], rlk))
 295  		}
 296  		if len(inputs)%2 == 1 {
 297  			next = append(next, inputs[len(inputs)-1])
 298  		}
 299  		inputs = next
 300  	}
 301  	return inputs[0]
 302  }
 303  
 304  // RecognizerResult holds the outcome of a homomorphic pattern match.
 305  type RecognizerResult struct {
 306  	// MatchPairs are the per-pair AND results.
 307  	// With depth-1 budget, each result covers 2 bit positions.
 308  	// ALL must decrypt to 1 for a full match.
 309  	MatchPairs []*HECiphertext
 310  
 311  	// Position is the data position that was tested.
 312  	Position int
 313  }
 314  
 315  // DecryptResult decrypts the recognizer output.
 316  // Returns true if ALL pair results are 1 (full match).
 317  func DecryptResult(sk *KEMSecretKey, result *RecognizerResult) bool {
 318  	for _, ct := range result.MatchPairs {
 319  		if HEDecrypt(sk, ct) != 1 {
 320  			return false
 321  		}
 322  	}
 323  	return true
 324  }
 325  
 326  // Recognize runs the full searchable encryption pipeline.
 327  //
 328  // 1. Encrypts the data and pattern.
 329  // 2. Homomorphically evaluates matches at each position.
 330  // 3. Returns encrypted results for each position.
 331  //
 332  // The server sees only ciphertexts. The data owner decrypts the results
 333  // to learn which positions matched.
 334  func Recognize(pk *KEMPublicKey, sk *KEMSecretKey, rlk *RelinearizationKey,
 335  	data []byte, pattern []byte) []int {
 336  
 337  	// Encrypt data and pattern.
 338  	encData := EncryptBits(pk, data)
 339  	encPattern := EncryptPattern(pk, pattern, nil)
 340  
 341  	patLen := len(pattern)
 342  	var matches []int
 343  
 344  	// Try each position.
 345  	for pos := 0; pos <= len(data)-patLen; pos++ {
 346  		pairs := SearchAt(encData, encPattern, pos, rlk)
 347  		if pairs == nil {
 348  			continue
 349  		}
 350  
 351  		result := &RecognizerResult{
 352  			MatchPairs: pairs,
 353  			Position:   pos,
 354  		}
 355  
 356  		if DecryptResult(sk, result) {
 357  			matches = append(matches, pos)
 358  		}
 359  	}
 360  
 361  	return matches
 362  }
 363