health.go raw
1 //go:build !(js && wasm)
2
3 package database
4
5 import (
6 "bytes"
7 "fmt"
8 "io"
9 "time"
10
11 "github.com/dgraph-io/badger/v4"
12 "next.orly.dev/pkg/lol/chk"
13 "next.orly.dev/pkg/lol/log"
14 "next.orly.dev/pkg/database/indexes"
15 "next.orly.dev/pkg/database/indexes/types"
16 )
17
18 // HealthReport contains the results of a database health check.
19 type HealthReport struct {
20 // Scan metadata
21 ScanStarted time.Time
22 ScanDuration time.Duration
23
24 // Event counts
25 CompactEvents int64 // Events stored in compact format (cmp)
26 LegacyEvents int64 // Events in legacy format (evt)
27 SmallEvents int64 // Small inline events (sev)
28 TotalEvents int64 // Total events
29 SerialIdCount int64 // Serial to EventID mappings (sei)
30
31 // Pubkey serial counts
32 PubkeySerials int64 // pks entries (pubkey hash -> serial)
33 SerialPubkeys int64 // spk entries (serial -> pubkey)
34
35 // Graph edge counts
36 EventPubkeyEdges int64 // epg entries
37 PubkeyEventEdges int64 // peg entries
38 EventEventEdges int64 // eeg entries
39 GraphEventEdges int64 // gee entries
40
41 // Index counts
42 KindIndexes int64 // kc- entries
43 PubkeyIndexes int64 // pc- entries
44 TagIndexes int64 // tc- entries
45 WordIndexes int64 // wrd entries
46 IdIndexes int64 // eid entries
47
48 // Issues found
49 MissingSerialEventIds int64 // cmp entries without corresponding sei
50 OrphanedSerialEventIds int64 // sei entries without corresponding cmp
51 PubkeySerialMismatches int64 // pks without matching spk or vice versa
52 OrphanedIndexes int64 // Index entries pointing to non-existent events
53
54 // Sample of missing sei serials (for debugging)
55 MissingSeiSamples []uint64
56
57 // Health score (0-100)
58 HealthScore int
59 }
60
61 // String returns a human-readable health report.
62 func (r *HealthReport) String() string {
63 var buf bytes.Buffer
64 fmt.Fprintln(&buf, "Database Health Report")
65 fmt.Fprintln(&buf, "======================")
66 fmt.Fprintf(&buf, "Scan duration: %v\n\n", r.ScanDuration)
67
68 fmt.Fprintln(&buf, "Event Storage:")
69 fmt.Fprintf(&buf, " Compact events (cmp): %d\n", r.CompactEvents)
70 fmt.Fprintf(&buf, " Legacy events (evt): %d\n", r.LegacyEvents)
71 fmt.Fprintf(&buf, " Small events (sev): %d\n", r.SmallEvents)
72 fmt.Fprintf(&buf, " Total events: %d\n", r.TotalEvents)
73 fmt.Fprintf(&buf, " Serial->ID maps (sei): %d\n\n", r.SerialIdCount)
74
75 fmt.Fprintln(&buf, "Pubkey Mappings:")
76 fmt.Fprintf(&buf, " Pubkey serials (pks): %d\n", r.PubkeySerials)
77 fmt.Fprintf(&buf, " Serial pubkeys (spk): %d\n\n", r.SerialPubkeys)
78
79 fmt.Fprintln(&buf, "Graph Edges:")
80 fmt.Fprintf(&buf, " Event->Pubkey (epg): %d\n", r.EventPubkeyEdges)
81 fmt.Fprintf(&buf, " Pubkey->Event (peg): %d\n", r.PubkeyEventEdges)
82 fmt.Fprintf(&buf, " Event->Event (eeg): %d\n", r.EventEventEdges)
83 fmt.Fprintf(&buf, " Event<-Event (gee): %d\n\n", r.GraphEventEdges)
84
85 fmt.Fprintln(&buf, "Search Indexes:")
86 fmt.Fprintf(&buf, " Kind indexes (kc-): %d\n", r.KindIndexes)
87 fmt.Fprintf(&buf, " Pubkey indexes (pc-): %d\n", r.PubkeyIndexes)
88 fmt.Fprintf(&buf, " Tag indexes (tc-): %d\n", r.TagIndexes)
89 fmt.Fprintf(&buf, " Word indexes (wrd): %d\n", r.WordIndexes)
90 fmt.Fprintf(&buf, " ID indexes (eid): %d\n\n", r.IdIndexes)
91
92 fmt.Fprintln(&buf, "Issues Found:")
93 fmt.Fprintf(&buf, " Missing sei mappings: %d", r.MissingSerialEventIds)
94 if r.MissingSerialEventIds > 0 {
95 fmt.Fprint(&buf, " (CRITICAL)")
96 }
97 fmt.Fprintln(&buf)
98 fmt.Fprintf(&buf, " Orphaned sei mappings: %d\n", r.OrphanedSerialEventIds)
99 fmt.Fprintf(&buf, " Pubkey serial mismatch: %d\n", r.PubkeySerialMismatches)
100 fmt.Fprintf(&buf, " Orphaned indexes: %d\n\n", r.OrphanedIndexes)
101
102 if len(r.MissingSeiSamples) > 0 {
103 fmt.Fprintln(&buf, "Sample missing sei serials:")
104 for i, s := range r.MissingSeiSamples {
105 if i >= 10 {
106 fmt.Fprintf(&buf, " ... and %d more\n", len(r.MissingSeiSamples)-10)
107 break
108 }
109 fmt.Fprintf(&buf, " - %d\n", s)
110 }
111 fmt.Fprintln(&buf)
112 }
113
114 fmt.Fprintf(&buf, "Health Score: %d/100\n", r.HealthScore)
115
116 if r.HealthScore < 50 {
117 fmt.Fprintln(&buf, "\n⚠️ Database has critical issues. Run 'orly db repair' to fix.")
118 } else if r.HealthScore < 80 {
119 fmt.Fprintln(&buf, "\n⚠️ Database has some issues. Consider running 'orly db repair'.")
120 } else {
121 fmt.Fprintln(&buf, "\n✓ Database is healthy.")
122 }
123
124 return buf.String()
125 }
126
127 // HealthCheck performs a comprehensive health check of the database.
128 // It scans all index prefixes and verifies referential integrity.
129 func (d *D) HealthCheck(progress io.Writer) (report *HealthReport, err error) {
130 report = &HealthReport{
131 ScanStarted: time.Now(),
132 MissingSeiSamples: make([]uint64, 0, 100),
133 }
134
135 if progress != nil {
136 fmt.Fprintln(progress, "Starting database health check...")
137 }
138
139 // Build prefix buffers for all index types
140 cmpPrf := buildPrefix(indexes.CompactEventEnc(nil))
141 seiPrf := buildPrefix(indexes.SerialEventIdEnc(nil))
142 evtPrf := buildPrefix(indexes.EventEnc(nil))
143 sevPrf := buildPrefix(indexes.SmallEventEnc(nil))
144 pksPrf := buildPrefix(indexes.PubkeySerialEnc(nil, nil))
145 spkPrf := buildPrefix(indexes.SerialPubkeyEnc(nil))
146 epgPrf := buildPrefix(indexes.EventPubkeyGraphEnc(nil, nil, nil, nil))
147 pegPrf := buildPrefix(indexes.PubkeyEventGraphEnc(nil, nil, nil, nil))
148 eegPrf := buildPrefix(indexes.EventEventGraphEnc(nil, nil, nil, nil))
149 geePrf := buildPrefix(indexes.GraphEventEventEnc(nil, nil, nil, nil))
150 kcPrf := buildPrefix(indexes.KindEnc(nil, nil, nil))
151 pcPrf := buildPrefix(indexes.PubkeyEnc(nil, nil, nil))
152 tcPrf := buildPrefix(indexes.TagEnc(nil, nil, nil, nil))
153 wrdPrf := buildPrefix(indexes.WordEnc(nil, nil))
154 eidPrf := buildPrefix(indexes.IdEnc(nil, nil))
155
156 // Phase 1: Count all entries with each prefix
157 if progress != nil {
158 fmt.Fprintln(progress, "Phase 1: Counting entries by prefix...")
159 }
160
161 err = d.View(func(txn *badger.Txn) error {
162 // Count compact events
163 report.CompactEvents = countPrefix(txn, cmpPrf)
164 if progress != nil {
165 fmt.Fprintf(progress, " Compact events (cmp): %d\n", report.CompactEvents)
166 }
167
168 // Count serial->eventID mappings
169 report.SerialIdCount = countPrefix(txn, seiPrf)
170 if progress != nil {
171 fmt.Fprintf(progress, " Serial->ID maps (sei): %d\n", report.SerialIdCount)
172 }
173
174 // Count legacy events
175 report.LegacyEvents = countPrefix(txn, evtPrf)
176 report.SmallEvents = countPrefix(txn, sevPrf)
177 report.TotalEvents = report.CompactEvents + report.LegacyEvents + report.SmallEvents
178 if progress != nil {
179 fmt.Fprintf(progress, " Legacy events (evt): %d\n", report.LegacyEvents)
180 fmt.Fprintf(progress, " Small events (sev): %d\n", report.SmallEvents)
181 }
182
183 // Count pubkey serial mappings
184 report.PubkeySerials = countPrefix(txn, pksPrf)
185 report.SerialPubkeys = countPrefix(txn, spkPrf)
186 if progress != nil {
187 fmt.Fprintf(progress, " Pubkey serials (pks): %d, (spk): %d\n", report.PubkeySerials, report.SerialPubkeys)
188 }
189
190 // Count graph edges
191 report.EventPubkeyEdges = countPrefix(txn, epgPrf)
192 report.PubkeyEventEdges = countPrefix(txn, pegPrf)
193 report.EventEventEdges = countPrefix(txn, eegPrf)
194 report.GraphEventEdges = countPrefix(txn, geePrf)
195 if progress != nil {
196 fmt.Fprintf(progress, " Graph edges: epg=%d, peg=%d, eeg=%d, gee=%d\n",
197 report.EventPubkeyEdges, report.PubkeyEventEdges, report.EventEventEdges, report.GraphEventEdges)
198 }
199
200 // Count search indexes
201 report.KindIndexes = countPrefix(txn, kcPrf)
202 report.PubkeyIndexes = countPrefix(txn, pcPrf)
203 report.TagIndexes = countPrefix(txn, tcPrf)
204 report.WordIndexes = countPrefix(txn, wrdPrf)
205 report.IdIndexes = countPrefix(txn, eidPrf)
206 if progress != nil {
207 fmt.Fprintf(progress, " Indexes: kc=%d, pc=%d, tc=%d, wrd=%d, eid=%d\n",
208 report.KindIndexes, report.PubkeyIndexes, report.TagIndexes, report.WordIndexes, report.IdIndexes)
209 }
210
211 return nil
212 })
213 if chk.E(err) {
214 return nil, err
215 }
216
217 // Phase 2: Check cmp->sei integrity (CRITICAL)
218 if progress != nil {
219 fmt.Fprintln(progress, "\nPhase 2: Checking compact event -> serial ID integrity...")
220 }
221
222 err = d.View(func(txn *badger.Txn) error {
223 it := txn.NewIterator(badger.IteratorOptions{Prefix: cmpPrf})
224 defer it.Close()
225
226 checked := int64(0)
227 for it.Rewind(); it.Valid(); it.Next() {
228 // Extract serial from cmp key: prefix (3 bytes) + serial (5 bytes)
229 key := it.Item().Key()
230 if len(key) < 8 {
231 continue
232 }
233
234 // Extract the serial
235 serial := extractSerial(key[3:8])
236
237 // Check if sei entry exists for this serial
238 seiKey := buildSeiKey(serial)
239 _, err := txn.Get(seiKey)
240 if err == badger.ErrKeyNotFound {
241 report.MissingSerialEventIds++
242 if len(report.MissingSeiSamples) < 100 {
243 report.MissingSeiSamples = append(report.MissingSeiSamples, serial)
244 }
245 } else if err != nil {
246 log.W.F("error checking sei for serial %d: %v", serial, err)
247 }
248
249 checked++
250 if progress != nil && checked%100000 == 0 {
251 fmt.Fprintf(progress, " Checked %d compact events, %d missing sei so far...\n",
252 checked, report.MissingSerialEventIds)
253 }
254 }
255
256 if progress != nil {
257 fmt.Fprintf(progress, " Checked %d compact events, found %d missing sei entries\n",
258 checked, report.MissingSerialEventIds)
259 }
260
261 return nil
262 })
263 if chk.E(err) {
264 return nil, err
265 }
266
267 // Phase 3: Check for orphaned sei entries (sei without cmp)
268 if progress != nil {
269 fmt.Fprintln(progress, "\nPhase 3: Checking for orphaned serial ID mappings...")
270 }
271
272 err = d.View(func(txn *badger.Txn) error {
273 it := txn.NewIterator(badger.IteratorOptions{Prefix: seiPrf})
274 defer it.Close()
275
276 checked := int64(0)
277 for it.Rewind(); it.Valid(); it.Next() {
278 key := it.Item().Key()
279 if len(key) < 8 {
280 continue
281 }
282
283 // Extract serial from sei key
284 serial := extractSerial(key[3:8])
285
286 // Check if cmp entry exists for this serial
287 cmpKey := buildCmpKey(serial)
288 _, err := txn.Get(cmpKey)
289 if err == badger.ErrKeyNotFound {
290 // Also check legacy evt format
291 evtKey := buildEvtKey(serial)
292 _, err2 := txn.Get(evtKey)
293 if err2 == badger.ErrKeyNotFound {
294 report.OrphanedSerialEventIds++
295 }
296 } else if err != nil {
297 log.W.F("error checking cmp for serial %d: %v", serial, err)
298 }
299
300 checked++
301 if progress != nil && checked%100000 == 0 {
302 fmt.Fprintf(progress, " Checked %d sei entries, %d orphaned so far...\n",
303 checked, report.OrphanedSerialEventIds)
304 }
305 }
306
307 if progress != nil {
308 fmt.Fprintf(progress, " Checked %d sei entries, found %d orphaned\n",
309 checked, report.OrphanedSerialEventIds)
310 }
311
312 return nil
313 })
314 if chk.E(err) {
315 return nil, err
316 }
317
318 // Phase 4: Check pubkey serial consistency
319 if progress != nil {
320 fmt.Fprintln(progress, "\nPhase 4: Checking pubkey serial consistency...")
321 }
322
323 err = d.View(func(txn *badger.Txn) error {
324 // Check that pks count roughly matches spk count
325 // A small difference is acceptable due to timing, but large differences indicate corruption
326 diff := report.PubkeySerials - report.SerialPubkeys
327 if diff < 0 {
328 diff = -diff
329 }
330 // Allow 1% difference
331 threshold := report.PubkeySerials / 100
332 if threshold < 10 {
333 threshold = 10
334 }
335 if diff > threshold {
336 report.PubkeySerialMismatches = diff
337 if progress != nil {
338 fmt.Fprintf(progress, " Found %d pubkey serial mismatches (pks=%d, spk=%d)\n",
339 diff, report.PubkeySerials, report.SerialPubkeys)
340 }
341 } else if progress != nil {
342 fmt.Fprintln(progress, " Pubkey serial counts are consistent")
343 }
344
345 return nil
346 })
347 if chk.E(err) {
348 return nil, err
349 }
350
351 // Calculate health score
352 report.ScanDuration = time.Since(report.ScanStarted)
353 report.HealthScore = calculateHealthScore(report)
354
355 if progress != nil {
356 fmt.Fprintf(progress, "\nHealth check complete. Score: %d/100\n", report.HealthScore)
357 }
358
359 return report, nil
360 }
361
362 // buildPrefix creates a prefix buffer from an encoder.
363 func buildPrefix(enc *indexes.T) []byte {
364 buf := new(bytes.Buffer)
365 if err := enc.MarshalWrite(buf); err != nil {
366 return nil
367 }
368 // Return only the prefix part (3 bytes)
369 b := buf.Bytes()
370 if len(b) >= 3 {
371 return b[:3]
372 }
373 return b
374 }
375
376 // countPrefix counts the number of entries with the given prefix.
377 func countPrefix(txn *badger.Txn, prefix []byte) int64 {
378 it := txn.NewIterator(badger.IteratorOptions{
379 Prefix: prefix,
380 PrefetchValues: false,
381 })
382 defer it.Close()
383
384 var count int64
385 for it.Rewind(); it.Valid(); it.Next() {
386 count++
387 }
388 return count
389 }
390
391 // extractSerial extracts a 40-bit serial from 5 bytes (big-endian).
392 func extractSerial(b []byte) uint64 {
393 if len(b) < 5 {
394 return 0
395 }
396 return (uint64(b[0]) << 32) |
397 (uint64(b[1]) << 24) |
398 (uint64(b[2]) << 16) |
399 (uint64(b[3]) << 8) |
400 uint64(b[4])
401 }
402
403 // buildSeiKey builds a sei (serial->eventID) key for the given serial.
404 func buildSeiKey(serial uint64) []byte {
405 ser := new(types.Uint40)
406 ser.Set(serial)
407 buf := new(bytes.Buffer)
408 indexes.SerialEventIdEnc(ser).MarshalWrite(buf)
409 return buf.Bytes()
410 }
411
412 // buildCmpKey builds a cmp (compact event) key for the given serial.
413 func buildCmpKey(serial uint64) []byte {
414 ser := new(types.Uint40)
415 ser.Set(serial)
416 buf := new(bytes.Buffer)
417 indexes.CompactEventEnc(ser).MarshalWrite(buf)
418 return buf.Bytes()
419 }
420
421 // buildEvtKey builds an evt (legacy event) key for the given serial.
422 func buildEvtKey(serial uint64) []byte {
423 ser := new(types.Uint40)
424 ser.Set(serial)
425 buf := new(bytes.Buffer)
426 indexes.EventEnc(ser).MarshalWrite(buf)
427 return buf.Bytes()
428 }
429
430 // calculateHealthScore calculates a health score from 0-100 based on the report.
431 func calculateHealthScore(r *HealthReport) int {
432 score := 100
433
434 // Missing sei is critical - each one costs 1 point, max 50 point penalty
435 if r.MissingSerialEventIds > 0 {
436 penalty := int(r.MissingSerialEventIds)
437 if penalty > 50 {
438 penalty = 50
439 }
440 score -= penalty
441 }
442
443 // Orphaned sei is less critical - each one costs 0.1 points, max 20 point penalty
444 if r.OrphanedSerialEventIds > 0 {
445 penalty := int(r.OrphanedSerialEventIds / 10)
446 if penalty > 20 {
447 penalty = 20
448 }
449 score -= penalty
450 }
451
452 // Pubkey mismatches cost 0.5 points each, max 20 point penalty
453 if r.PubkeySerialMismatches > 0 {
454 penalty := int(r.PubkeySerialMismatches / 2)
455 if penalty > 20 {
456 penalty = 20
457 }
458 score -= penalty
459 }
460
461 // Orphaned indexes cost 0.01 points each, max 10 point penalty
462 if r.OrphanedIndexes > 0 {
463 penalty := int(r.OrphanedIndexes / 100)
464 if penalty > 10 {
465 penalty = 10
466 }
467 score -= penalty
468 }
469
470 if score < 0 {
471 score = 0
472 }
473 return score
474 }
475