// Package crawl is the standalone multi-relay crawler. // Subscribes to directory events from known relays, discovers new relays, // and publishes collected events to a local relay. package main import ( "fmt" "os" "time" "smesh.lol/pkg/nostr/envelope" "smesh.lol/pkg/nostr/ws" ) var crawlSeeds = []string{ "wss://relay.damus.io", "wss://nos.lol", } const crawlKindsFilter = `[0,3,5,1984,10000,10002,10050]` var crawlLog *os.File func clog(format string, args ...any) { ts := time.Now().Format("15:04:05") fmt.Fprintf(crawlLog, ts+" "+format+"\n", args...) } type relayDB struct { score map[string]int order []string } func newRelayDB() *relayDB { return &relayDB{score: map[string]int{}} } func (db *relayDB) add(url string, weight int) { db.score[url] += weight } func (db *relayDB) sorted() []string { urls := []string{:0:len(db.score)} for u := range db.score { urls = append(urls, u) } for i := 1; i < len(urls); i++ { for j := i; j > 0 && db.score[urls[j]] > db.score[urls[j-1]]; j-- { urls[j], urls[j-1] = urls[j-1], urls[j] } } return urls } func main() { var err error crawlLog, err = os.OpenFile("/tmp/smesh-crawl.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) if err != nil { crawlLog = os.Stderr } localURL := "ws://127.0.0.1:3334" args := os.Args[1:] if len(args) >= 1 { localURL = args[0] } clog("started pid=%d local=%s", os.Getpid(), localURL) db := newRelayDB() for _, s := range crawlSeeds { db.add(s, 100) } pass := 0 for { pass++ clog("=== pass %d, %d relays known ===", pass, len(db.score)) ok := crawlPass(localURL, db) if ok { clog("pass complete, sleeping 5m") time.Sleep(5 * time.Minute) } else { clog("pass failed, retrying in 30s") time.Sleep(30 * time.Second) } } } func crawlPass(localURL string, db *relayDB) bool { relays := db.sorted() if len(relays) == 0 { clog("no relays known") return false } totalEvents := 0 for i, relayURL := range relays { clog("[%d/%d] crawling %s (score %d)", i+1, len(relays), relayURL, db.score[relayURL]) events := crawlRelay(relayURL) if len(events) == 0 { clog(" %s → 0 events", relayURL) time.Sleep(1 * time.Second) continue } clog(" %s → %d events", relayURL, len(events)) for _, raw := range events { crawlExtractRelays(raw, db) } published := crawlPublishBatch(localURL, events) clog(" published %d/%d to local", published, len(events)) totalEvents += published time.Sleep(1 * time.Second) } clog("total %d events from %d relays", totalEvents, len(relays)) return true } func crawlRelay(relayURL string) [][]byte { remote, err := ws.Dial(relayURL) if err != nil { clog(" dial %s FAILED: %v", relayURL, err) return nil } defer remote.Close() reqJSON := []byte(`["REQ","cr",{"kinds":` | crawlKindsFilter | `,"limit":200}]`) if err := remote.WriteText(reqJSON); err != nil { clog(" write REQ to %s failed: %v", relayURL, err) return nil } var events [][]byte for { op, payload, err := remote.ReadMessage() if err != nil { break } if op != ws.OpText { continue } label, rem, _ := envelope.Identify(payload) if label == envelope.EOSELabel { break } if label == envelope.EventLabel { var er envelope.EventResult if _, err := er.Unmarshal(rem); err == nil && er.Event != nil { es := &envelope.EventSubmission{E: er.Event} events = append(events, es.Marshal(nil)) } } } return events } func crawlExtractRelays(raw []byte, db *relayDB) { _, rem, err := envelope.Identify(raw) if err != nil { return } var es envelope.EventSubmission if _, err := es.Unmarshal(rem); err != nil || es.E == nil { return } ev := es.E if (ev.Kind != 10002 && ev.Kind != 10050) || ev.Tags == nil { return } for _, t := range ev.Tags.GetAll([]byte("r")) { if t.Len() >= 2 { url := string(t.Value()) if len(url) > 5 && (hasPrefix(url, "wss://") || hasPrefix(url, "ws://")) { db.add(url, 1) } } } } func crawlPublishBatch(localURL string, events [][]byte) int { local, err := ws.Dial(localURL) if err != nil { clog(" local connect failed: %v", err) return 0 } defer local.Close() for _, evBytes := range events { local.WriteText(evBytes) } count := 0 for count < len(events) { _, _, err := local.ReadMessage() if err != nil { break } count++ } return count } func hasPrefix(s, prefix string) bool { return len(s) >= len(prefix) && s[:len(prefix)] == prefix }