// Package negentropy implements the negentropy set reconciliation protocol // for efficient event sync between relays. // // The protocol works by exchanging XOR fingerprints over sorted ranges of // (timestamp, id) pairs. Matching fingerprints mean the range is identical; // mismatched ranges are subdivided recursively until individual items are // exchanged. package negentropy import ( "bytes" "crypto/sha256" "math" "sort" "smesh.lol/pkg/nostr/event" "smesh.lol/pkg/nostr/filter" "smesh.lol/pkg/store" ) // Item is a (timestamp, id) pair for set reconciliation. type Item struct { Timestamp int64 ID []byte // 32 bytes } func compareItems(a, b Item) int { if a.Timestamp != b.Timestamp { if a.Timestamp < b.Timestamp { return -1 } return 1 } return bytes.Compare(a.ID, b.ID) } // ItemsFromEvents extracts sorted items from events. func ItemsFromEvents(events []*event.E) []Item { items := []Item{:len(events)} for i, ev := range events { id := []byte{:32} copy(id, ev.ID) items[i] = Item{Timestamp: ev.CreatedAt, ID: id} } sortItems(items) return items } func sortItems(items []Item) { sort.Slice(items, func(i, j int) bool { return compareItems(items[i], items[j]) < 0 }) } // Fingerprint computes an XOR fingerprint over a range of items. func Fingerprint(items []Item) [32]byte { var fp [32]byte for _, it := range items { for j := 0; j < 32 && j < len(it.ID); j++ { fp[j] ^= it.ID[j] } } return fp } // Diff computes set differences between two sorted item lists. // Returns have (items in local but not remote) and need (items in remote but not local). func Diff(local, remote []Item) (have, need []Item) { i, j := 0, 0 for i < len(local) && j < len(remote) { cmp := compareItems(local[i], remote[j]) if cmp < 0 { have = append(have, local[i]) i++ } else if cmp > 0 { need = append(need, remote[j]) j++ } else { i++ j++ } } for ; i < len(local); i++ { have = append(have, local[i]) } for ; j < len(remote); j++ { need = append(need, remote[j]) } return } // Reconciler performs fingerprint-based reconciliation. type Reconciler struct { items []Item } // NewReconciler creates a reconciler from a sorted item list. func NewReconciler(items []Item) *Reconciler { sortItems(items) return &Reconciler{items: items} } // Range represents a segment with a fingerprint. type Range struct { UpperTimestamp int64 UpperID []byte Fingerprint [32]byte Count int } // Split divides the item set into n ranges with fingerprints. func (r *Reconciler) Split(n int) []Range { if n <= 0 || len(r.items) == 0 { return nil } if n > len(r.items) { n = len(r.items) } segSize := len(r.items) / n ranges := []Range{:0:n} for i := 0; i < n; i++ { lo := i * segSize hi := (i + 1) * segSize if i == n-1 { hi = len(r.items) } upper := r.items[hi-1] ranges = append(ranges, Range{ UpperTimestamp: upper.Timestamp, UpperID: upper.ID, Fingerprint: Fingerprint(r.items[lo:hi]), Count: hi - lo, }) } return ranges } // FindMismatches compares local ranges against remote ranges and // returns indices of ranges that differ. func FindMismatches(local, remote []Range) []int { n := len(local) if len(remote) < n { n = len(remote) } var mismatches []int for i := 0; i < n; i++ { if local[i].Fingerprint != remote[i].Fingerprint { mismatches = append(mismatches, i) } } return mismatches } // Syncer orchestrates sync between local store and a remote item set. type Syncer struct { store *store.Engine } // NewSyncer creates a syncer. func NewSyncer(s *store.Engine) *Syncer { return &Syncer{store: s} } // LocalItems returns all (timestamp, id) pairs from the local store. func (s *Syncer) LocalItems() []Item { f := &filter.F{} events, err := s.store.QueryEvents(f) if err != nil { return nil } return ItemsFromEvents(events) } // FindNeeded compares local items against remote items and returns // IDs of events we need to fetch. func (s *Syncer) FindNeeded(remoteItems []Item) [][]byte { local := s.LocalItems() _, need := Diff(local, remoteItems) ids := [][]byte{:len(need)} for i, it := range need { ids[i] = it.ID } return ids } // FindHave compares local items against remote items and returns // events we have that the remote doesn't. func (s *Syncer) FindHave(remoteItems []Item) []*event.E { local := s.LocalItems() have, _ := Diff(local, remoteItems) var events []*event.E for _, it := range have { ev, err := s.store.GetByID(it.ID) if err == nil && ev != nil { events = append(events, ev) } } return events } func sha256Hash(data []byte) []byte { h := sha256.Sum256(data) return h[:] } // EstimateRanges returns the optimal number of ranges for a given item count. func EstimateRanges(n int) int { if n <= 0 { return 0 } r := int(math.Ceil(math.Sqrt(float64(n)))) if r < 2 { return 2 } if r > 128 { return 128 } return r }