1 // Package find provides full-text search over stored events using the
2 // word index (wrd). Words are extracted from event content, lowercased,
3 // and hashed into the sorted wrd index at storage time. This package
4 // queries that index to find matching events.
5 package find
6 7 import (
8 "sort"
9 10 "smesh.lol/pkg/nostr/event"
11 "smesh.lol/pkg/store"
12 )
13 14 // Finder performs full-text search.
15 type Finder struct {
16 store *store.Engine
17 }
18 19 // New creates a Finder.
20 func New(s *store.Engine) *Finder { return &Finder{store: s} }
21 22 // Search finds events matching all words in the query.
23 // Results are sorted newest-first. Limit 0 = no limit.
24 func (f *Finder) Search(query []byte, limit int) []*event.E {
25 words := SplitWords(query)
26 if len(words) == 0 {
27 return nil
28 }
29 30 // Get serials for each word, intersect.
31 var sets [][]uint64
32 for _, w := range words {
33 serials := f.store.SearchWord(w)
34 if len(serials) == 0 {
35 return nil // all words must match
36 }
37 sets = append(sets, serials)
38 }
39 40 // Intersect all serial sets.
41 result := sets[0]
42 for i := 1; i < len(sets); i++ {
43 result = intersect(result, sets[i])
44 if len(result) == 0 {
45 return nil
46 }
47 }
48 49 // Fetch events.
50 var events event.S
51 for _, ser := range result {
52 ev, err := f.store.GetBySerial(ser)
53 if err != nil {
54 continue
55 }
56 events = append(events, ev)
57 }
58 sort.Sort(events) // newest first
59 60 if limit > 0 && len(events) > limit {
61 events = events[:limit]
62 }
63 return events
64 }
65 66 // SplitWords splits content into lowercase words (>= 3 chars).
67 // Exported so the store can reuse it for indexing.
68 func SplitWords(content []byte) [][]byte {
69 var words [][]byte
70 var word []byte
71 for _, b := range content {
72 if b >= 'A' && b <= 'Z' {
73 word = append(word, b+32) // lowercase
74 } else if (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9') {
75 word = append(word, b)
76 } else {
77 if len(word) >= 3 {
78 w := []byte{:len(word)}
79 copy(w, word)
80 words = append(words, w)
81 }
82 word = word[:0]
83 }
84 }
85 if len(word) >= 3 {
86 w := []byte{:len(word)}
87 copy(w, word)
88 words = append(words, w)
89 }
90 return words
91 }
92 93 func intersect(a, b []uint64) []uint64 {
94 set := map[uint64]bool{}
95 for _, v := range b {
96 set[v] = true
97 }
98 var out []uint64
99 for _, v := range a {
100 if set[v] {
101 out = append(out, v)
102 }
103 }
104 return out
105 }
106