health.go raw

   1  //go:build !(js && wasm)
   2  
   3  package database
   4  
   5  import (
   6  	"bytes"
   7  	"fmt"
   8  	"io"
   9  	"time"
  10  
  11  	"github.com/dgraph-io/badger/v4"
  12  	"next.orly.dev/pkg/lol/chk"
  13  	"next.orly.dev/pkg/lol/log"
  14  	"next.orly.dev/pkg/database/indexes"
  15  	"next.orly.dev/pkg/database/indexes/types"
  16  )
  17  
  18  // HealthReport contains the results of a database health check.
  19  type HealthReport struct {
  20  	// Scan metadata
  21  	ScanStarted  time.Time
  22  	ScanDuration time.Duration
  23  
  24  	// Event counts
  25  	CompactEvents  int64 // Events stored in compact format (cmp)
  26  	LegacyEvents   int64 // Events in legacy format (evt)
  27  	SmallEvents    int64 // Small inline events (sev)
  28  	TotalEvents    int64 // Total events
  29  	SerialIdCount  int64 // Serial to EventID mappings (sei)
  30  
  31  	// Pubkey serial counts
  32  	PubkeySerials int64 // pks entries (pubkey hash -> serial)
  33  	SerialPubkeys int64 // spk entries (serial -> pubkey)
  34  
  35  	// Graph edge counts
  36  	EventPubkeyEdges int64 // epg entries
  37  	PubkeyEventEdges int64 // peg entries
  38  	EventEventEdges  int64 // eeg entries
  39  	GraphEventEdges  int64 // gee entries
  40  
  41  	// Index counts
  42  	KindIndexes   int64 // kc- entries
  43  	PubkeyIndexes int64 // pc- entries
  44  	TagIndexes    int64 // tc- entries
  45  	WordIndexes   int64 // wrd entries
  46  	IdIndexes     int64 // eid entries
  47  
  48  	// Issues found
  49  	MissingSerialEventIds  int64 // cmp entries without corresponding sei
  50  	OrphanedSerialEventIds int64 // sei entries without corresponding cmp
  51  	PubkeySerialMismatches int64 // pks without matching spk or vice versa
  52  	OrphanedIndexes        int64 // Index entries pointing to non-existent events
  53  
  54  	// Sample of missing sei serials (for debugging)
  55  	MissingSeiSamples []uint64
  56  
  57  	// Health score (0-100)
  58  	HealthScore int
  59  }
  60  
  61  // String returns a human-readable health report.
  62  func (r *HealthReport) String() string {
  63  	var buf bytes.Buffer
  64  	fmt.Fprintln(&buf, "Database Health Report")
  65  	fmt.Fprintln(&buf, "======================")
  66  	fmt.Fprintf(&buf, "Scan duration: %v\n\n", r.ScanDuration)
  67  
  68  	fmt.Fprintln(&buf, "Event Storage:")
  69  	fmt.Fprintf(&buf, "  Compact events (cmp):    %d\n", r.CompactEvents)
  70  	fmt.Fprintf(&buf, "  Legacy events (evt):     %d\n", r.LegacyEvents)
  71  	fmt.Fprintf(&buf, "  Small events (sev):      %d\n", r.SmallEvents)
  72  	fmt.Fprintf(&buf, "  Total events:            %d\n", r.TotalEvents)
  73  	fmt.Fprintf(&buf, "  Serial->ID maps (sei):   %d\n\n", r.SerialIdCount)
  74  
  75  	fmt.Fprintln(&buf, "Pubkey Mappings:")
  76  	fmt.Fprintf(&buf, "  Pubkey serials (pks):    %d\n", r.PubkeySerials)
  77  	fmt.Fprintf(&buf, "  Serial pubkeys (spk):    %d\n\n", r.SerialPubkeys)
  78  
  79  	fmt.Fprintln(&buf, "Graph Edges:")
  80  	fmt.Fprintf(&buf, "  Event->Pubkey (epg):     %d\n", r.EventPubkeyEdges)
  81  	fmt.Fprintf(&buf, "  Pubkey->Event (peg):     %d\n", r.PubkeyEventEdges)
  82  	fmt.Fprintf(&buf, "  Event->Event (eeg):      %d\n", r.EventEventEdges)
  83  	fmt.Fprintf(&buf, "  Event<-Event (gee):      %d\n\n", r.GraphEventEdges)
  84  
  85  	fmt.Fprintln(&buf, "Search Indexes:")
  86  	fmt.Fprintf(&buf, "  Kind indexes (kc-):      %d\n", r.KindIndexes)
  87  	fmt.Fprintf(&buf, "  Pubkey indexes (pc-):    %d\n", r.PubkeyIndexes)
  88  	fmt.Fprintf(&buf, "  Tag indexes (tc-):       %d\n", r.TagIndexes)
  89  	fmt.Fprintf(&buf, "  Word indexes (wrd):      %d\n", r.WordIndexes)
  90  	fmt.Fprintf(&buf, "  ID indexes (eid):        %d\n\n", r.IdIndexes)
  91  
  92  	fmt.Fprintln(&buf, "Issues Found:")
  93  	fmt.Fprintf(&buf, "  Missing sei mappings:    %d", r.MissingSerialEventIds)
  94  	if r.MissingSerialEventIds > 0 {
  95  		fmt.Fprint(&buf, " (CRITICAL)")
  96  	}
  97  	fmt.Fprintln(&buf)
  98  	fmt.Fprintf(&buf, "  Orphaned sei mappings:   %d\n", r.OrphanedSerialEventIds)
  99  	fmt.Fprintf(&buf, "  Pubkey serial mismatch:  %d\n", r.PubkeySerialMismatches)
 100  	fmt.Fprintf(&buf, "  Orphaned indexes:        %d\n\n", r.OrphanedIndexes)
 101  
 102  	if len(r.MissingSeiSamples) > 0 {
 103  		fmt.Fprintln(&buf, "Sample missing sei serials:")
 104  		for i, s := range r.MissingSeiSamples {
 105  			if i >= 10 {
 106  				fmt.Fprintf(&buf, "  ... and %d more\n", len(r.MissingSeiSamples)-10)
 107  				break
 108  			}
 109  			fmt.Fprintf(&buf, "  - %d\n", s)
 110  		}
 111  		fmt.Fprintln(&buf)
 112  	}
 113  
 114  	fmt.Fprintf(&buf, "Health Score: %d/100\n", r.HealthScore)
 115  
 116  	if r.HealthScore < 50 {
 117  		fmt.Fprintln(&buf, "\n⚠️  Database has critical issues. Run 'orly db repair' to fix.")
 118  	} else if r.HealthScore < 80 {
 119  		fmt.Fprintln(&buf, "\n⚠️  Database has some issues. Consider running 'orly db repair'.")
 120  	} else {
 121  		fmt.Fprintln(&buf, "\n✓ Database is healthy.")
 122  	}
 123  
 124  	return buf.String()
 125  }
 126  
 127  // HealthCheck performs a comprehensive health check of the database.
 128  // It scans all index prefixes and verifies referential integrity.
 129  func (d *D) HealthCheck(progress io.Writer) (report *HealthReport, err error) {
 130  	report = &HealthReport{
 131  		ScanStarted:       time.Now(),
 132  		MissingSeiSamples: make([]uint64, 0, 100),
 133  	}
 134  
 135  	if progress != nil {
 136  		fmt.Fprintln(progress, "Starting database health check...")
 137  	}
 138  
 139  	// Build prefix buffers for all index types
 140  	cmpPrf := buildPrefix(indexes.CompactEventEnc(nil))
 141  	seiPrf := buildPrefix(indexes.SerialEventIdEnc(nil))
 142  	evtPrf := buildPrefix(indexes.EventEnc(nil))
 143  	sevPrf := buildPrefix(indexes.SmallEventEnc(nil))
 144  	pksPrf := buildPrefix(indexes.PubkeySerialEnc(nil, nil))
 145  	spkPrf := buildPrefix(indexes.SerialPubkeyEnc(nil))
 146  	epgPrf := buildPrefix(indexes.EventPubkeyGraphEnc(nil, nil, nil, nil))
 147  	pegPrf := buildPrefix(indexes.PubkeyEventGraphEnc(nil, nil, nil, nil))
 148  	eegPrf := buildPrefix(indexes.EventEventGraphEnc(nil, nil, nil, nil))
 149  	geePrf := buildPrefix(indexes.GraphEventEventEnc(nil, nil, nil, nil))
 150  	kcPrf := buildPrefix(indexes.KindEnc(nil, nil, nil))
 151  	pcPrf := buildPrefix(indexes.PubkeyEnc(nil, nil, nil))
 152  	tcPrf := buildPrefix(indexes.TagEnc(nil, nil, nil, nil))
 153  	wrdPrf := buildPrefix(indexes.WordEnc(nil, nil))
 154  	eidPrf := buildPrefix(indexes.IdEnc(nil, nil))
 155  
 156  	// Phase 1: Count all entries with each prefix
 157  	if progress != nil {
 158  		fmt.Fprintln(progress, "Phase 1: Counting entries by prefix...")
 159  	}
 160  
 161  	err = d.View(func(txn *badger.Txn) error {
 162  		// Count compact events
 163  		report.CompactEvents = countPrefix(txn, cmpPrf)
 164  		if progress != nil {
 165  			fmt.Fprintf(progress, "  Compact events (cmp): %d\n", report.CompactEvents)
 166  		}
 167  
 168  		// Count serial->eventID mappings
 169  		report.SerialIdCount = countPrefix(txn, seiPrf)
 170  		if progress != nil {
 171  			fmt.Fprintf(progress, "  Serial->ID maps (sei): %d\n", report.SerialIdCount)
 172  		}
 173  
 174  		// Count legacy events
 175  		report.LegacyEvents = countPrefix(txn, evtPrf)
 176  		report.SmallEvents = countPrefix(txn, sevPrf)
 177  		report.TotalEvents = report.CompactEvents + report.LegacyEvents + report.SmallEvents
 178  		if progress != nil {
 179  			fmt.Fprintf(progress, "  Legacy events (evt): %d\n", report.LegacyEvents)
 180  			fmt.Fprintf(progress, "  Small events (sev): %d\n", report.SmallEvents)
 181  		}
 182  
 183  		// Count pubkey serial mappings
 184  		report.PubkeySerials = countPrefix(txn, pksPrf)
 185  		report.SerialPubkeys = countPrefix(txn, spkPrf)
 186  		if progress != nil {
 187  			fmt.Fprintf(progress, "  Pubkey serials (pks): %d, (spk): %d\n", report.PubkeySerials, report.SerialPubkeys)
 188  		}
 189  
 190  		// Count graph edges
 191  		report.EventPubkeyEdges = countPrefix(txn, epgPrf)
 192  		report.PubkeyEventEdges = countPrefix(txn, pegPrf)
 193  		report.EventEventEdges = countPrefix(txn, eegPrf)
 194  		report.GraphEventEdges = countPrefix(txn, geePrf)
 195  		if progress != nil {
 196  			fmt.Fprintf(progress, "  Graph edges: epg=%d, peg=%d, eeg=%d, gee=%d\n",
 197  				report.EventPubkeyEdges, report.PubkeyEventEdges, report.EventEventEdges, report.GraphEventEdges)
 198  		}
 199  
 200  		// Count search indexes
 201  		report.KindIndexes = countPrefix(txn, kcPrf)
 202  		report.PubkeyIndexes = countPrefix(txn, pcPrf)
 203  		report.TagIndexes = countPrefix(txn, tcPrf)
 204  		report.WordIndexes = countPrefix(txn, wrdPrf)
 205  		report.IdIndexes = countPrefix(txn, eidPrf)
 206  		if progress != nil {
 207  			fmt.Fprintf(progress, "  Indexes: kc=%d, pc=%d, tc=%d, wrd=%d, eid=%d\n",
 208  				report.KindIndexes, report.PubkeyIndexes, report.TagIndexes, report.WordIndexes, report.IdIndexes)
 209  		}
 210  
 211  		return nil
 212  	})
 213  	if chk.E(err) {
 214  		return nil, err
 215  	}
 216  
 217  	// Phase 2: Check cmp->sei integrity (CRITICAL)
 218  	if progress != nil {
 219  		fmt.Fprintln(progress, "\nPhase 2: Checking compact event -> serial ID integrity...")
 220  	}
 221  
 222  	err = d.View(func(txn *badger.Txn) error {
 223  		it := txn.NewIterator(badger.IteratorOptions{Prefix: cmpPrf})
 224  		defer it.Close()
 225  
 226  		checked := int64(0)
 227  		for it.Rewind(); it.Valid(); it.Next() {
 228  			// Extract serial from cmp key: prefix (3 bytes) + serial (5 bytes)
 229  			key := it.Item().Key()
 230  			if len(key) < 8 {
 231  				continue
 232  			}
 233  
 234  			// Extract the serial
 235  			serial := extractSerial(key[3:8])
 236  
 237  			// Check if sei entry exists for this serial
 238  			seiKey := buildSeiKey(serial)
 239  			_, err := txn.Get(seiKey)
 240  			if err == badger.ErrKeyNotFound {
 241  				report.MissingSerialEventIds++
 242  				if len(report.MissingSeiSamples) < 100 {
 243  					report.MissingSeiSamples = append(report.MissingSeiSamples, serial)
 244  				}
 245  			} else if err != nil {
 246  				log.W.F("error checking sei for serial %d: %v", serial, err)
 247  			}
 248  
 249  			checked++
 250  			if progress != nil && checked%100000 == 0 {
 251  				fmt.Fprintf(progress, "  Checked %d compact events, %d missing sei so far...\n",
 252  					checked, report.MissingSerialEventIds)
 253  			}
 254  		}
 255  
 256  		if progress != nil {
 257  			fmt.Fprintf(progress, "  Checked %d compact events, found %d missing sei entries\n",
 258  				checked, report.MissingSerialEventIds)
 259  		}
 260  
 261  		return nil
 262  	})
 263  	if chk.E(err) {
 264  		return nil, err
 265  	}
 266  
 267  	// Phase 3: Check for orphaned sei entries (sei without cmp)
 268  	if progress != nil {
 269  		fmt.Fprintln(progress, "\nPhase 3: Checking for orphaned serial ID mappings...")
 270  	}
 271  
 272  	err = d.View(func(txn *badger.Txn) error {
 273  		it := txn.NewIterator(badger.IteratorOptions{Prefix: seiPrf})
 274  		defer it.Close()
 275  
 276  		checked := int64(0)
 277  		for it.Rewind(); it.Valid(); it.Next() {
 278  			key := it.Item().Key()
 279  			if len(key) < 8 {
 280  				continue
 281  			}
 282  
 283  			// Extract serial from sei key
 284  			serial := extractSerial(key[3:8])
 285  
 286  			// Check if cmp entry exists for this serial
 287  			cmpKey := buildCmpKey(serial)
 288  			_, err := txn.Get(cmpKey)
 289  			if err == badger.ErrKeyNotFound {
 290  				// Also check legacy evt format
 291  				evtKey := buildEvtKey(serial)
 292  				_, err2 := txn.Get(evtKey)
 293  				if err2 == badger.ErrKeyNotFound {
 294  					report.OrphanedSerialEventIds++
 295  				}
 296  			} else if err != nil {
 297  				log.W.F("error checking cmp for serial %d: %v", serial, err)
 298  			}
 299  
 300  			checked++
 301  			if progress != nil && checked%100000 == 0 {
 302  				fmt.Fprintf(progress, "  Checked %d sei entries, %d orphaned so far...\n",
 303  					checked, report.OrphanedSerialEventIds)
 304  			}
 305  		}
 306  
 307  		if progress != nil {
 308  			fmt.Fprintf(progress, "  Checked %d sei entries, found %d orphaned\n",
 309  				checked, report.OrphanedSerialEventIds)
 310  		}
 311  
 312  		return nil
 313  	})
 314  	if chk.E(err) {
 315  		return nil, err
 316  	}
 317  
 318  	// Phase 4: Check pubkey serial consistency
 319  	if progress != nil {
 320  		fmt.Fprintln(progress, "\nPhase 4: Checking pubkey serial consistency...")
 321  	}
 322  
 323  	err = d.View(func(txn *badger.Txn) error {
 324  		// Check that pks count roughly matches spk count
 325  		// A small difference is acceptable due to timing, but large differences indicate corruption
 326  		diff := report.PubkeySerials - report.SerialPubkeys
 327  		if diff < 0 {
 328  			diff = -diff
 329  		}
 330  		// Allow 1% difference
 331  		threshold := report.PubkeySerials / 100
 332  		if threshold < 10 {
 333  			threshold = 10
 334  		}
 335  		if diff > threshold {
 336  			report.PubkeySerialMismatches = diff
 337  			if progress != nil {
 338  				fmt.Fprintf(progress, "  Found %d pubkey serial mismatches (pks=%d, spk=%d)\n",
 339  					diff, report.PubkeySerials, report.SerialPubkeys)
 340  			}
 341  		} else if progress != nil {
 342  			fmt.Fprintln(progress, "  Pubkey serial counts are consistent")
 343  		}
 344  
 345  		return nil
 346  	})
 347  	if chk.E(err) {
 348  		return nil, err
 349  	}
 350  
 351  	// Calculate health score
 352  	report.ScanDuration = time.Since(report.ScanStarted)
 353  	report.HealthScore = calculateHealthScore(report)
 354  
 355  	if progress != nil {
 356  		fmt.Fprintf(progress, "\nHealth check complete. Score: %d/100\n", report.HealthScore)
 357  	}
 358  
 359  	return report, nil
 360  }
 361  
 362  // buildPrefix creates a prefix buffer from an encoder.
 363  func buildPrefix(enc *indexes.T) []byte {
 364  	buf := new(bytes.Buffer)
 365  	if err := enc.MarshalWrite(buf); err != nil {
 366  		return nil
 367  	}
 368  	// Return only the prefix part (3 bytes)
 369  	b := buf.Bytes()
 370  	if len(b) >= 3 {
 371  		return b[:3]
 372  	}
 373  	return b
 374  }
 375  
 376  // countPrefix counts the number of entries with the given prefix.
 377  func countPrefix(txn *badger.Txn, prefix []byte) int64 {
 378  	it := txn.NewIterator(badger.IteratorOptions{
 379  		Prefix:         prefix,
 380  		PrefetchValues: false,
 381  	})
 382  	defer it.Close()
 383  
 384  	var count int64
 385  	for it.Rewind(); it.Valid(); it.Next() {
 386  		count++
 387  	}
 388  	return count
 389  }
 390  
 391  // extractSerial extracts a 40-bit serial from 5 bytes (big-endian).
 392  func extractSerial(b []byte) uint64 {
 393  	if len(b) < 5 {
 394  		return 0
 395  	}
 396  	return (uint64(b[0]) << 32) |
 397  		(uint64(b[1]) << 24) |
 398  		(uint64(b[2]) << 16) |
 399  		(uint64(b[3]) << 8) |
 400  		uint64(b[4])
 401  }
 402  
 403  // buildSeiKey builds a sei (serial->eventID) key for the given serial.
 404  func buildSeiKey(serial uint64) []byte {
 405  	ser := new(types.Uint40)
 406  	ser.Set(serial)
 407  	buf := new(bytes.Buffer)
 408  	indexes.SerialEventIdEnc(ser).MarshalWrite(buf)
 409  	return buf.Bytes()
 410  }
 411  
 412  // buildCmpKey builds a cmp (compact event) key for the given serial.
 413  func buildCmpKey(serial uint64) []byte {
 414  	ser := new(types.Uint40)
 415  	ser.Set(serial)
 416  	buf := new(bytes.Buffer)
 417  	indexes.CompactEventEnc(ser).MarshalWrite(buf)
 418  	return buf.Bytes()
 419  }
 420  
 421  // buildEvtKey builds an evt (legacy event) key for the given serial.
 422  func buildEvtKey(serial uint64) []byte {
 423  	ser := new(types.Uint40)
 424  	ser.Set(serial)
 425  	buf := new(bytes.Buffer)
 426  	indexes.EventEnc(ser).MarshalWrite(buf)
 427  	return buf.Bytes()
 428  }
 429  
 430  // calculateHealthScore calculates a health score from 0-100 based on the report.
 431  func calculateHealthScore(r *HealthReport) int {
 432  	score := 100
 433  
 434  	// Missing sei is critical - each one costs 1 point, max 50 point penalty
 435  	if r.MissingSerialEventIds > 0 {
 436  		penalty := int(r.MissingSerialEventIds)
 437  		if penalty > 50 {
 438  			penalty = 50
 439  		}
 440  		score -= penalty
 441  	}
 442  
 443  	// Orphaned sei is less critical - each one costs 0.1 points, max 20 point penalty
 444  	if r.OrphanedSerialEventIds > 0 {
 445  		penalty := int(r.OrphanedSerialEventIds / 10)
 446  		if penalty > 20 {
 447  			penalty = 20
 448  		}
 449  		score -= penalty
 450  	}
 451  
 452  	// Pubkey mismatches cost 0.5 points each, max 20 point penalty
 453  	if r.PubkeySerialMismatches > 0 {
 454  		penalty := int(r.PubkeySerialMismatches / 2)
 455  		if penalty > 20 {
 456  			penalty = 20
 457  		}
 458  		score -= penalty
 459  	}
 460  
 461  	// Orphaned indexes cost 0.01 points each, max 10 point penalty
 462  	if r.OrphanedIndexes > 0 {
 463  		penalty := int(r.OrphanedIndexes / 100)
 464  		if penalty > 10 {
 465  			penalty = 10
 466  		}
 467  		score -= penalty
 468  	}
 469  
 470  	if score < 0 {
 471  		score = 0
 472  	}
 473  	return score
 474  }
 475