find.mx raw

   1  // Package find provides full-text search over stored events using the
   2  // word index (wrd). Words are extracted from event content, lowercased,
   3  // and hashed into the sorted wrd index at storage time. This package
   4  // queries that index to find matching events.
   5  package find
   6  
   7  import (
   8  	"sort"
   9  
  10  	"smesh.lol/pkg/nostr/event"
  11  	"smesh.lol/pkg/store"
  12  )
  13  
  14  // Finder performs full-text search.
  15  type Finder struct {
  16  	store *store.Engine
  17  }
  18  
  19  // New creates a Finder.
  20  func New(s *store.Engine) *Finder { return &Finder{store: s} }
  21  
  22  // Search finds events matching all words in the query.
  23  // Results are sorted newest-first. Limit 0 = no limit.
  24  func (f *Finder) Search(query []byte, limit int) []*event.E {
  25  	words := SplitWords(query)
  26  	if len(words) == 0 {
  27  		return nil
  28  	}
  29  
  30  	// Get serials for each word, intersect.
  31  	var sets [][]uint64
  32  	for _, w := range words {
  33  		serials := f.store.SearchWord(w)
  34  		if len(serials) == 0 {
  35  			return nil // all words must match
  36  		}
  37  		sets = append(sets, serials)
  38  	}
  39  
  40  	// Intersect all serial sets.
  41  	result := sets[0]
  42  	for i := 1; i < len(sets); i++ {
  43  		result = intersect(result, sets[i])
  44  		if len(result) == 0 {
  45  			return nil
  46  		}
  47  	}
  48  
  49  	// Fetch events.
  50  	var events event.S
  51  	for _, ser := range result {
  52  		ev, err := f.store.GetBySerial(ser)
  53  		if err != nil {
  54  			continue
  55  		}
  56  		events = append(events, ev)
  57  	}
  58  	sort.Sort(events) // newest first
  59  
  60  	if limit > 0 && len(events) > limit {
  61  		events = events[:limit]
  62  	}
  63  	return events
  64  }
  65  
  66  // SplitWords splits content into lowercase words (>= 3 chars).
  67  // Exported so the store can reuse it for indexing.
  68  func SplitWords(content []byte) [][]byte {
  69  	var words [][]byte
  70  	var word []byte
  71  	for _, b := range content {
  72  		if b >= 'A' && b <= 'Z' {
  73  			word = append(word, b+32) // lowercase
  74  		} else if (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9') {
  75  			word = append(word, b)
  76  		} else {
  77  			if len(word) >= 3 {
  78  				w := []byte{:len(word)}
  79  				copy(w, word)
  80  				words = append(words, w)
  81  			}
  82  			word = word[:0]
  83  		}
  84  	}
  85  	if len(word) >= 3 {
  86  		w := []byte{:len(word)}
  87  		copy(w, word)
  88  		words = append(words, w)
  89  	}
  90  	return words
  91  }
  92  
  93  func intersect(a, b []uint64) []uint64 {
  94  	set := map[uint64]bool{}
  95  	for _, v := range b {
  96  		set[v] = true
  97  	}
  98  	var out []uint64
  99  	for _, v := range a {
 100  		if set[v] {
 101  			out = append(out, v)
 102  		}
 103  	}
 104  	return out
 105  }
 106