// Package find provides full-text search over stored events using the // word index (wrd). Words are extracted from event content, lowercased, // and hashed into the sorted wrd index at storage time. This package // queries that index to find matching events. package find import ( "sort" "smesh.lol/pkg/nostr/event" "smesh.lol/pkg/store" ) // Finder performs full-text search. type Finder struct { store *store.Engine } // New creates a Finder. func New(s *store.Engine) *Finder { return &Finder{store: s} } // Search finds events matching all words in the query. // Results are sorted newest-first. Limit 0 = no limit. func (f *Finder) Search(query []byte, limit int) []*event.E { words := SplitWords(query) if len(words) == 0 { return nil } // Get serials for each word, intersect. var sets [][]uint64 for _, w := range words { serials := f.store.SearchWord(w) if len(serials) == 0 { return nil // all words must match } sets = append(sets, serials) } // Intersect all serial sets. result := sets[0] for i := 1; i < len(sets); i++ { result = intersect(result, sets[i]) if len(result) == 0 { return nil } } // Fetch events. var events event.S for _, ser := range result { ev, err := f.store.GetBySerial(ser) if err != nil { continue } events = append(events, ev) } sort.Sort(events) // newest first if limit > 0 && len(events) > limit { events = events[:limit] } return events } // SplitWords splits content into lowercase words (>= 3 chars). // Exported so the store can reuse it for indexing. func SplitWords(content []byte) [][]byte { var words [][]byte var word []byte for _, b := range content { if b >= 'A' && b <= 'Z' { word = append(word, b+32) // lowercase } else if (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9') { word = append(word, b) } else { if len(word) >= 3 { w := []byte{:len(word)} copy(w, word) words = append(words, w) } word = word[:0] } } if len(word) >= 3 { w := []byte{:len(word)} copy(w, word) words = append(words, w) } return words } func intersect(a, b []uint64) []uint64 { set := map[uint64]bool{} for _, v := range b { set[v] = true } var out []uint64 for _, v := range a { if set[v] { out = append(out, v) } } return out }