compact_event.go raw

   1  //go:build !(js && wasm)
   2  
   3  package database
   4  
   5  import (
   6  	"bytes"
   7  	"encoding/binary"
   8  	"errors"
   9  	"io"
  10  
  11  	"next.orly.dev/pkg/nostr/crypto/ec/schnorr"
  12  	"next.orly.dev/pkg/nostr/encoders/event"
  13  	"next.orly.dev/pkg/nostr/encoders/tag"
  14  	"next.orly.dev/pkg/nostr/encoders/varint"
  15  	"next.orly.dev/pkg/lol/chk"
  16  	"next.orly.dev/pkg/database/bufpool"
  17  )
  18  
  19  // CompactEventFormat defines the binary format for compact event storage.
  20  // This format uses 5-byte serial references instead of 32-byte IDs/pubkeys,
  21  // dramatically reducing storage requirements.
  22  //
  23  // Format:
  24  //   - 1 byte:  Version (currently 1)
  25  //   - 5 bytes: Author pubkey serial (reference to spk table)
  26  //   - varint:  CreatedAt timestamp
  27  //   - 2 bytes: Kind (uint16 big-endian)
  28  //   - varint:  Number of tags
  29  //   - For each tag:
  30  //   - varint: Number of elements in tag
  31  //   - For each element:
  32  //   - 1 byte: Element type flag
  33  //   - 0x00 = raw bytes (followed by varint length + data)
  34  //   - 0x01 = pubkey serial reference (followed by 5-byte serial)
  35  //   - 0x02 = event ID serial reference (followed by 5-byte serial)
  36  //   - 0x03 = unknown event ID (followed by 32-byte full ID)
  37  //   - Element data based on type
  38  //   - varint:  Content length
  39  //   - Content bytes
  40  //   - 64 bytes: Signature
  41  //
  42  // Space savings example (event with 3 p-tags, 1 e-tag):
  43  //   - Original: 32 (ID) + 32 (pubkey) + 32*4 (tags) = 192 bytes
  44  //   - Compact:  5 (pubkey serial) + 5*4 (tag serials) = 25 bytes
  45  //   - Savings: 167 bytes per event (87%)
  46  
  47  const (
  48  	CompactFormatVersion = 1
  49  
  50  	// Tag element type flags
  51  	TagElementRaw          = 0x00 // Raw bytes (varint length + data)
  52  	TagElementPubkeySerial = 0x01 // Pubkey serial reference (5 bytes)
  53  	TagElementEventSerial  = 0x02 // Event ID serial reference (5 bytes)
  54  	TagElementEventIdFull  = 0x03 // Full event ID (32 bytes) - for unknown refs
  55  
  56  	// Sanity limits to prevent OOM from corrupt data
  57  	MaxTagsPerEvent       = 10000    // Maximum number of tags in an event
  58  	MaxTagElements        = 100      // Maximum elements in a single tag
  59  	MaxContentLength      = 10 << 20 // 10MB max content
  60  	MaxTagElementLength   = 1 << 20  // 1MB max for a single tag element
  61  )
  62  
  63  var (
  64  	ErrTooManyTags          = errors.New("corrupt data: too many tags")
  65  	ErrTooManyTagElems      = errors.New("corrupt data: too many tag elements")
  66  	ErrContentTooLarge      = errors.New("corrupt data: content too large")
  67  	ErrTagElementTooLong    = errors.New("corrupt data: tag element too long")
  68  	ErrUnknownTagElemType   = errors.New("corrupt data: unknown tag element type")
  69  )
  70  
  71  // SerialResolver is an interface for resolving serials during compact encoding/decoding.
  72  // This allows the encoder/decoder to look up or create serial mappings.
  73  type SerialResolver interface {
  74  	// GetOrCreatePubkeySerial returns the serial for a pubkey, creating one if needed.
  75  	GetOrCreatePubkeySerial(pubkey []byte) (serial uint64, err error)
  76  
  77  	// GetPubkeyBySerial returns the full pubkey for a serial.
  78  	GetPubkeyBySerial(serial uint64) (pubkey []byte, err error)
  79  
  80  	// GetEventSerialById returns the serial for an event ID, or 0 if not found.
  81  	GetEventSerialById(eventId []byte) (serial uint64, found bool, err error)
  82  
  83  	// GetEventIdBySerial returns the full event ID for a serial.
  84  	GetEventIdBySerial(serial uint64) (eventId []byte, err error)
  85  }
  86  
  87  // MarshalCompactEvent encodes an event using compact serial references.
  88  // The resolver is used to look up/create serial mappings for pubkeys and event IDs.
  89  func MarshalCompactEvent(ev *event.E, resolver SerialResolver) (data []byte, err error) {
  90  	buf := bufpool.GetMedium()
  91  	defer bufpool.PutMedium(buf)
  92  
  93  	// Version byte
  94  	buf.WriteByte(CompactFormatVersion)
  95  
  96  	// Author pubkey serial (5 bytes)
  97  	var authorSerial uint64
  98  	if authorSerial, err = resolver.GetOrCreatePubkeySerial(ev.Pubkey); chk.E(err) {
  99  		return nil, err
 100  	}
 101  	writeUint40(buf, authorSerial)
 102  
 103  	// CreatedAt (varint)
 104  	varint.Encode(buf, uint64(ev.CreatedAt))
 105  
 106  	// Kind (2 bytes big-endian)
 107  	binary.Write(buf, binary.BigEndian, ev.Kind)
 108  
 109  	// Tags
 110  	if ev.Tags == nil || ev.Tags.Len() == 0 {
 111  		varint.Encode(buf, 0)
 112  	} else {
 113  		varint.Encode(buf, uint64(ev.Tags.Len()))
 114  		for _, t := range *ev.Tags {
 115  			if err = encodeCompactTag(buf, t, resolver); chk.E(err) {
 116  				return nil, err
 117  			}
 118  		}
 119  	}
 120  
 121  	// Content
 122  	varint.Encode(buf, uint64(len(ev.Content)))
 123  	buf.Write(ev.Content)
 124  
 125  	// Signature (64 bytes)
 126  	buf.Write(ev.Sig)
 127  
 128  	// Copy bytes before returning buffer to pool
 129  	return bufpool.CopyBytes(buf), nil
 130  }
 131  
 132  // encodeCompactTag encodes a single tag with serial references for e/p tags.
 133  func encodeCompactTag(w io.Writer, t *tag.T, resolver SerialResolver) (err error) {
 134  	if t == nil || t.Len() == 0 {
 135  		varint.Encode(w, 0)
 136  		return nil
 137  	}
 138  
 139  	varint.Encode(w, uint64(t.Len()))
 140  
 141  	// Get tag key to determine if we should use serial references
 142  	key := t.Key()
 143  	isPTag := len(key) == 1 && key[0] == 'p'
 144  	isETag := len(key) == 1 && key[0] == 'e'
 145  
 146  	for i, elem := range t.T {
 147  		if i == 0 {
 148  			// First element is always the tag key - store as raw
 149  			writeTagElement(w, TagElementRaw, elem)
 150  			continue
 151  		}
 152  
 153  		if i == 1 {
 154  			// Second element is the value - potentially a serial reference
 155  			if isPTag && len(elem) == 32 {
 156  				// Binary pubkey - look up serial
 157  				serial, serErr := resolver.GetOrCreatePubkeySerial(elem)
 158  				if serErr == nil {
 159  					writeTagElementSerial(w, TagElementPubkeySerial, serial)
 160  					continue
 161  				}
 162  				// Fall through to raw encoding on error
 163  			} else if isPTag && len(elem) == 64 {
 164  				// Hex pubkey - decode and look up serial
 165  				var pubkey []byte
 166  				if pubkey, err = hexDecode(elem); err == nil && len(pubkey) == 32 {
 167  					serial, serErr := resolver.GetOrCreatePubkeySerial(pubkey)
 168  					if serErr == nil {
 169  						writeTagElementSerial(w, TagElementPubkeySerial, serial)
 170  						continue
 171  					}
 172  				}
 173  				// Fall through to raw encoding on error
 174  			} else if isETag && len(elem) == 32 {
 175  				// Binary event ID - look up serial if exists
 176  				serial, found, serErr := resolver.GetEventSerialById(elem)
 177  				if serErr == nil && found {
 178  					writeTagElementSerial(w, TagElementEventSerial, serial)
 179  					continue
 180  				}
 181  				// Event not found - store full ID
 182  				writeTagElement(w, TagElementEventIdFull, elem)
 183  				continue
 184  			} else if isETag && len(elem) == 64 {
 185  				// Hex event ID - decode and look up serial
 186  				var eventId []byte
 187  				if eventId, err = hexDecode(elem); err == nil && len(eventId) == 32 {
 188  					serial, found, serErr := resolver.GetEventSerialById(eventId)
 189  					if serErr == nil && found {
 190  						writeTagElementSerial(w, TagElementEventSerial, serial)
 191  						continue
 192  					}
 193  					// Event not found - store full ID
 194  					writeTagElement(w, TagElementEventIdFull, eventId)
 195  					continue
 196  				}
 197  				// Fall through to raw encoding on error
 198  			}
 199  		}
 200  
 201  		// Default: raw encoding
 202  		writeTagElement(w, TagElementRaw, elem)
 203  	}
 204  
 205  	return nil
 206  }
 207  
 208  // writeTagElement writes a tag element with type flag.
 209  func writeTagElement(w io.Writer, typeFlag byte, data []byte) {
 210  	w.Write([]byte{typeFlag})
 211  	if typeFlag == TagElementEventIdFull {
 212  		// Full event ID - no length prefix, always 32 bytes
 213  		w.Write(data)
 214  	} else {
 215  		// Raw data - length prefix
 216  		varint.Encode(w, uint64(len(data)))
 217  		w.Write(data)
 218  	}
 219  }
 220  
 221  // writeTagElementSerial writes a serial reference tag element.
 222  func writeTagElementSerial(w io.Writer, typeFlag byte, serial uint64) {
 223  	w.Write([]byte{typeFlag})
 224  	writeUint40(w, serial)
 225  }
 226  
 227  // writeUint40 writes a 5-byte big-endian unsigned integer.
 228  func writeUint40(w io.Writer, value uint64) {
 229  	buf := []byte{
 230  		byte((value >> 32) & 0xFF),
 231  		byte((value >> 24) & 0xFF),
 232  		byte((value >> 16) & 0xFF),
 233  		byte((value >> 8) & 0xFF),
 234  		byte(value & 0xFF),
 235  	}
 236  	w.Write(buf)
 237  }
 238  
 239  // readUint40 reads a 5-byte big-endian unsigned integer.
 240  func readUint40(r io.Reader) (value uint64, err error) {
 241  	var buf [5]byte // Fixed array avoids heap escape
 242  	if _, err = io.ReadFull(r, buf[:]); err != nil {
 243  		return 0, err
 244  	}
 245  	value = (uint64(buf[0]) << 32) |
 246  		(uint64(buf[1]) << 24) |
 247  		(uint64(buf[2]) << 16) |
 248  		(uint64(buf[3]) << 8) |
 249  		uint64(buf[4])
 250  	return value, nil
 251  }
 252  
 253  // UnmarshalCompactEvent decodes a compact event back to a full event.E.
 254  // The resolver is used to look up pubkeys and event IDs from serials.
 255  // The eventId parameter is the full 32-byte event ID (from SerialEventId table).
 256  func UnmarshalCompactEvent(data []byte, eventId []byte, resolver SerialResolver) (ev *event.E, err error) {
 257  	// Validate eventId upfront to prevent returning events with zero IDs
 258  	if len(eventId) != 32 {
 259  		return nil, errors.New("invalid eventId: must be exactly 32 bytes")
 260  	}
 261  
 262  	r := bytes.NewReader(data)
 263  	ev = new(event.E)
 264  
 265  	// Version byte
 266  	version, err := r.ReadByte()
 267  	if err != nil {
 268  		return nil, err
 269  	}
 270  	if version != CompactFormatVersion {
 271  		return nil, errors.New("unsupported compact event format version")
 272  	}
 273  
 274  	// Set the event ID (passed separately from SerialEventId lookup)
 275  	ev.ID = make([]byte, 32)
 276  	copy(ev.ID, eventId)
 277  
 278  	// Author pubkey serial (5 bytes) -> full pubkey
 279  	authorSerial, err := readUint40(r)
 280  	if err != nil {
 281  		return nil, err
 282  	}
 283  	if ev.Pubkey, err = resolver.GetPubkeyBySerial(authorSerial); chk.E(err) {
 284  		return nil, err
 285  	}
 286  
 287  	// CreatedAt (varint)
 288  	var ca uint64
 289  	if ca, err = varint.Decode(r); chk.E(err) {
 290  		return nil, err
 291  	}
 292  	ev.CreatedAt = int64(ca)
 293  
 294  	// Kind (2 bytes big-endian)
 295  	if err = binary.Read(r, binary.BigEndian, &ev.Kind); chk.E(err) {
 296  		return nil, err
 297  	}
 298  
 299  	// Tags
 300  	var nTags uint64
 301  	if nTags, err = varint.Decode(r); chk.E(err) {
 302  		return nil, err
 303  	}
 304  	if nTags > MaxTagsPerEvent {
 305  		return nil, ErrTooManyTags // Don't log - caller handles gracefully
 306  	}
 307  	if nTags > 0 {
 308  		ev.Tags = tag.NewSWithCap(int(nTags))
 309  		for i := uint64(0); i < nTags; i++ {
 310  			var t *tag.T
 311  			if t, err = decodeCompactTag(r, resolver); err != nil {
 312  				return nil, err // Don't log corruption errors
 313  			}
 314  			*ev.Tags = append(*ev.Tags, t)
 315  		}
 316  	}
 317  
 318  	// Content
 319  	var contentLen uint64
 320  	if contentLen, err = varint.Decode(r); chk.E(err) {
 321  		return nil, err
 322  	}
 323  	if contentLen > MaxContentLength {
 324  		return nil, ErrContentTooLarge
 325  	}
 326  	ev.Content = make([]byte, contentLen)
 327  	if _, err = io.ReadFull(r, ev.Content); chk.E(err) {
 328  		return nil, err
 329  	}
 330  
 331  	// Signature (64 bytes)
 332  	ev.Sig = make([]byte, schnorr.SignatureSize)
 333  	if _, err = io.ReadFull(r, ev.Sig); chk.E(err) {
 334  		return nil, err
 335  	}
 336  
 337  	return ev, nil
 338  }
 339  
 340  // decodeCompactTag decodes a single tag from compact format.
 341  func decodeCompactTag(r io.Reader, resolver SerialResolver) (t *tag.T, err error) {
 342  	var nElems uint64
 343  	if nElems, err = varint.Decode(r); err != nil {
 344  		return nil, err
 345  	}
 346  	if nElems > MaxTagElements {
 347  		return nil, ErrTooManyTagElems
 348  	}
 349  
 350  	t = tag.NewWithCap(int(nElems))
 351  
 352  	for i := uint64(0); i < nElems; i++ {
 353  		var elem []byte
 354  		if elem, err = decodeTagElement(r, resolver); err != nil {
 355  			return nil, err // Don't log corruption errors
 356  		}
 357  		t.T = append(t.T, elem)
 358  	}
 359  
 360  	return t, nil
 361  }
 362  
 363  // decodeTagElement decodes a single tag element from compact format.
 364  func decodeTagElement(r io.Reader, resolver SerialResolver) (elem []byte, err error) {
 365  	// Read type flag (fixed array avoids heap escape)
 366  	var typeBuf [1]byte
 367  	if _, err = io.ReadFull(r, typeBuf[:]); err != nil {
 368  		return nil, err
 369  	}
 370  	typeFlag := typeBuf[0]
 371  
 372  	switch typeFlag {
 373  	case TagElementRaw:
 374  		// Raw bytes: varint length + data
 375  		var length uint64
 376  		if length, err = varint.Decode(r); err != nil {
 377  			return nil, err
 378  		}
 379  		if length > MaxTagElementLength {
 380  			return nil, ErrTagElementTooLong
 381  		}
 382  		elem = make([]byte, length)
 383  		if _, err = io.ReadFull(r, elem); err != nil {
 384  			return nil, err
 385  		}
 386  		return elem, nil
 387  
 388  	case TagElementPubkeySerial:
 389  		// Pubkey serial: 5 bytes -> lookup full pubkey -> return as 33-byte binary
 390  		serial, err := readUint40(r)
 391  		if err != nil {
 392  			return nil, err
 393  		}
 394  		pubkey, err := resolver.GetPubkeyBySerial(serial)
 395  		if err != nil {
 396  			return nil, err
 397  		}
 398  		// Return as 33-byte binary (32 bytes + null terminator) for tag.Marshal detection
 399  		result := make([]byte, 33)
 400  		copy(result, pubkey)
 401  		result[32] = 0 // null terminator
 402  		return result, nil
 403  
 404  	case TagElementEventSerial:
 405  		// Event serial: 5 bytes -> lookup full event ID -> return as 33-byte binary
 406  		serial, err := readUint40(r)
 407  		if err != nil {
 408  			return nil, err
 409  		}
 410  		eventId, err := resolver.GetEventIdBySerial(serial)
 411  		if err != nil {
 412  			return nil, err
 413  		}
 414  		// Return as 33-byte binary (32 bytes + null terminator) for tag.Marshal detection
 415  		result := make([]byte, 33)
 416  		copy(result, eventId)
 417  		result[32] = 0 // null terminator
 418  		return result, nil
 419  
 420  	case TagElementEventIdFull:
 421  		// Full event ID: 32 bytes (for unknown/forward references)
 422  		// Return as 33-byte binary (32 bytes + null terminator) for tag.Marshal detection
 423  		elem = make([]byte, 33)
 424  		if _, err = io.ReadFull(r, elem[:32]); err != nil {
 425  			return nil, err
 426  		}
 427  		elem[32] = 0 // null terminator
 428  		return elem, nil
 429  
 430  	default:
 431  		return nil, ErrUnknownTagElemType
 432  	}
 433  }
 434  
 435  // hexDecode decodes hex bytes to binary.
 436  // This is a simple implementation - the real one uses the optimized hex package.
 437  func hexDecode(src []byte) (dst []byte, err error) {
 438  	if len(src)%2 != 0 {
 439  		return nil, errors.New("hex string has odd length")
 440  	}
 441  	dst = make([]byte, len(src)/2)
 442  	for i := 0; i < len(dst); i++ {
 443  		a := unhex(src[i*2])
 444  		b := unhex(src[i*2+1])
 445  		if a == 0xFF || b == 0xFF {
 446  			return nil, errors.New("invalid hex character")
 447  		}
 448  		dst[i] = (a << 4) | b
 449  	}
 450  	return dst, nil
 451  }
 452  
 453  func unhex(c byte) byte {
 454  	switch {
 455  	case '0' <= c && c <= '9':
 456  		return c - '0'
 457  	case 'a' <= c && c <= 'f':
 458  		return c - 'a' + 10
 459  	case 'A' <= c && c <= 'F':
 460  		return c - 'A' + 10
 461  	}
 462  	return 0xFF
 463  }
 464