compact_event.go raw
1 //go:build !(js && wasm)
2
3 package database
4
5 import (
6 "bytes"
7 "encoding/binary"
8 "errors"
9 "io"
10
11 "next.orly.dev/pkg/nostr/crypto/ec/schnorr"
12 "next.orly.dev/pkg/nostr/encoders/event"
13 "next.orly.dev/pkg/nostr/encoders/tag"
14 "next.orly.dev/pkg/nostr/encoders/varint"
15 "next.orly.dev/pkg/lol/chk"
16 "next.orly.dev/pkg/database/bufpool"
17 )
18
19 // CompactEventFormat defines the binary format for compact event storage.
20 // This format uses 5-byte serial references instead of 32-byte IDs/pubkeys,
21 // dramatically reducing storage requirements.
22 //
23 // Format:
24 // - 1 byte: Version (currently 1)
25 // - 5 bytes: Author pubkey serial (reference to spk table)
26 // - varint: CreatedAt timestamp
27 // - 2 bytes: Kind (uint16 big-endian)
28 // - varint: Number of tags
29 // - For each tag:
30 // - varint: Number of elements in tag
31 // - For each element:
32 // - 1 byte: Element type flag
33 // - 0x00 = raw bytes (followed by varint length + data)
34 // - 0x01 = pubkey serial reference (followed by 5-byte serial)
35 // - 0x02 = event ID serial reference (followed by 5-byte serial)
36 // - 0x03 = unknown event ID (followed by 32-byte full ID)
37 // - Element data based on type
38 // - varint: Content length
39 // - Content bytes
40 // - 64 bytes: Signature
41 //
42 // Space savings example (event with 3 p-tags, 1 e-tag):
43 // - Original: 32 (ID) + 32 (pubkey) + 32*4 (tags) = 192 bytes
44 // - Compact: 5 (pubkey serial) + 5*4 (tag serials) = 25 bytes
45 // - Savings: 167 bytes per event (87%)
46
47 const (
48 CompactFormatVersion = 1
49
50 // Tag element type flags
51 TagElementRaw = 0x00 // Raw bytes (varint length + data)
52 TagElementPubkeySerial = 0x01 // Pubkey serial reference (5 bytes)
53 TagElementEventSerial = 0x02 // Event ID serial reference (5 bytes)
54 TagElementEventIdFull = 0x03 // Full event ID (32 bytes) - for unknown refs
55
56 // Sanity limits to prevent OOM from corrupt data
57 MaxTagsPerEvent = 10000 // Maximum number of tags in an event
58 MaxTagElements = 100 // Maximum elements in a single tag
59 MaxContentLength = 10 << 20 // 10MB max content
60 MaxTagElementLength = 1 << 20 // 1MB max for a single tag element
61 )
62
63 var (
64 ErrTooManyTags = errors.New("corrupt data: too many tags")
65 ErrTooManyTagElems = errors.New("corrupt data: too many tag elements")
66 ErrContentTooLarge = errors.New("corrupt data: content too large")
67 ErrTagElementTooLong = errors.New("corrupt data: tag element too long")
68 ErrUnknownTagElemType = errors.New("corrupt data: unknown tag element type")
69 )
70
71 // SerialResolver is an interface for resolving serials during compact encoding/decoding.
72 // This allows the encoder/decoder to look up or create serial mappings.
73 type SerialResolver interface {
74 // GetOrCreatePubkeySerial returns the serial for a pubkey, creating one if needed.
75 GetOrCreatePubkeySerial(pubkey []byte) (serial uint64, err error)
76
77 // GetPubkeyBySerial returns the full pubkey for a serial.
78 GetPubkeyBySerial(serial uint64) (pubkey []byte, err error)
79
80 // GetEventSerialById returns the serial for an event ID, or 0 if not found.
81 GetEventSerialById(eventId []byte) (serial uint64, found bool, err error)
82
83 // GetEventIdBySerial returns the full event ID for a serial.
84 GetEventIdBySerial(serial uint64) (eventId []byte, err error)
85 }
86
87 // MarshalCompactEvent encodes an event using compact serial references.
88 // The resolver is used to look up/create serial mappings for pubkeys and event IDs.
89 func MarshalCompactEvent(ev *event.E, resolver SerialResolver) (data []byte, err error) {
90 buf := bufpool.GetMedium()
91 defer bufpool.PutMedium(buf)
92
93 // Version byte
94 buf.WriteByte(CompactFormatVersion)
95
96 // Author pubkey serial (5 bytes)
97 var authorSerial uint64
98 if authorSerial, err = resolver.GetOrCreatePubkeySerial(ev.Pubkey); chk.E(err) {
99 return nil, err
100 }
101 writeUint40(buf, authorSerial)
102
103 // CreatedAt (varint)
104 varint.Encode(buf, uint64(ev.CreatedAt))
105
106 // Kind (2 bytes big-endian)
107 binary.Write(buf, binary.BigEndian, ev.Kind)
108
109 // Tags
110 if ev.Tags == nil || ev.Tags.Len() == 0 {
111 varint.Encode(buf, 0)
112 } else {
113 varint.Encode(buf, uint64(ev.Tags.Len()))
114 for _, t := range *ev.Tags {
115 if err = encodeCompactTag(buf, t, resolver); chk.E(err) {
116 return nil, err
117 }
118 }
119 }
120
121 // Content
122 varint.Encode(buf, uint64(len(ev.Content)))
123 buf.Write(ev.Content)
124
125 // Signature (64 bytes)
126 buf.Write(ev.Sig)
127
128 // Copy bytes before returning buffer to pool
129 return bufpool.CopyBytes(buf), nil
130 }
131
132 // encodeCompactTag encodes a single tag with serial references for e/p tags.
133 func encodeCompactTag(w io.Writer, t *tag.T, resolver SerialResolver) (err error) {
134 if t == nil || t.Len() == 0 {
135 varint.Encode(w, 0)
136 return nil
137 }
138
139 varint.Encode(w, uint64(t.Len()))
140
141 // Get tag key to determine if we should use serial references
142 key := t.Key()
143 isPTag := len(key) == 1 && key[0] == 'p'
144 isETag := len(key) == 1 && key[0] == 'e'
145
146 for i, elem := range t.T {
147 if i == 0 {
148 // First element is always the tag key - store as raw
149 writeTagElement(w, TagElementRaw, elem)
150 continue
151 }
152
153 if i == 1 {
154 // Second element is the value - potentially a serial reference
155 if isPTag && len(elem) == 32 {
156 // Binary pubkey - look up serial
157 serial, serErr := resolver.GetOrCreatePubkeySerial(elem)
158 if serErr == nil {
159 writeTagElementSerial(w, TagElementPubkeySerial, serial)
160 continue
161 }
162 // Fall through to raw encoding on error
163 } else if isPTag && len(elem) == 64 {
164 // Hex pubkey - decode and look up serial
165 var pubkey []byte
166 if pubkey, err = hexDecode(elem); err == nil && len(pubkey) == 32 {
167 serial, serErr := resolver.GetOrCreatePubkeySerial(pubkey)
168 if serErr == nil {
169 writeTagElementSerial(w, TagElementPubkeySerial, serial)
170 continue
171 }
172 }
173 // Fall through to raw encoding on error
174 } else if isETag && len(elem) == 32 {
175 // Binary event ID - look up serial if exists
176 serial, found, serErr := resolver.GetEventSerialById(elem)
177 if serErr == nil && found {
178 writeTagElementSerial(w, TagElementEventSerial, serial)
179 continue
180 }
181 // Event not found - store full ID
182 writeTagElement(w, TagElementEventIdFull, elem)
183 continue
184 } else if isETag && len(elem) == 64 {
185 // Hex event ID - decode and look up serial
186 var eventId []byte
187 if eventId, err = hexDecode(elem); err == nil && len(eventId) == 32 {
188 serial, found, serErr := resolver.GetEventSerialById(eventId)
189 if serErr == nil && found {
190 writeTagElementSerial(w, TagElementEventSerial, serial)
191 continue
192 }
193 // Event not found - store full ID
194 writeTagElement(w, TagElementEventIdFull, eventId)
195 continue
196 }
197 // Fall through to raw encoding on error
198 }
199 }
200
201 // Default: raw encoding
202 writeTagElement(w, TagElementRaw, elem)
203 }
204
205 return nil
206 }
207
208 // writeTagElement writes a tag element with type flag.
209 func writeTagElement(w io.Writer, typeFlag byte, data []byte) {
210 w.Write([]byte{typeFlag})
211 if typeFlag == TagElementEventIdFull {
212 // Full event ID - no length prefix, always 32 bytes
213 w.Write(data)
214 } else {
215 // Raw data - length prefix
216 varint.Encode(w, uint64(len(data)))
217 w.Write(data)
218 }
219 }
220
221 // writeTagElementSerial writes a serial reference tag element.
222 func writeTagElementSerial(w io.Writer, typeFlag byte, serial uint64) {
223 w.Write([]byte{typeFlag})
224 writeUint40(w, serial)
225 }
226
227 // writeUint40 writes a 5-byte big-endian unsigned integer.
228 func writeUint40(w io.Writer, value uint64) {
229 buf := []byte{
230 byte((value >> 32) & 0xFF),
231 byte((value >> 24) & 0xFF),
232 byte((value >> 16) & 0xFF),
233 byte((value >> 8) & 0xFF),
234 byte(value & 0xFF),
235 }
236 w.Write(buf)
237 }
238
239 // readUint40 reads a 5-byte big-endian unsigned integer.
240 func readUint40(r io.Reader) (value uint64, err error) {
241 var buf [5]byte // Fixed array avoids heap escape
242 if _, err = io.ReadFull(r, buf[:]); err != nil {
243 return 0, err
244 }
245 value = (uint64(buf[0]) << 32) |
246 (uint64(buf[1]) << 24) |
247 (uint64(buf[2]) << 16) |
248 (uint64(buf[3]) << 8) |
249 uint64(buf[4])
250 return value, nil
251 }
252
253 // UnmarshalCompactEvent decodes a compact event back to a full event.E.
254 // The resolver is used to look up pubkeys and event IDs from serials.
255 // The eventId parameter is the full 32-byte event ID (from SerialEventId table).
256 func UnmarshalCompactEvent(data []byte, eventId []byte, resolver SerialResolver) (ev *event.E, err error) {
257 // Validate eventId upfront to prevent returning events with zero IDs
258 if len(eventId) != 32 {
259 return nil, errors.New("invalid eventId: must be exactly 32 bytes")
260 }
261
262 r := bytes.NewReader(data)
263 ev = new(event.E)
264
265 // Version byte
266 version, err := r.ReadByte()
267 if err != nil {
268 return nil, err
269 }
270 if version != CompactFormatVersion {
271 return nil, errors.New("unsupported compact event format version")
272 }
273
274 // Set the event ID (passed separately from SerialEventId lookup)
275 ev.ID = make([]byte, 32)
276 copy(ev.ID, eventId)
277
278 // Author pubkey serial (5 bytes) -> full pubkey
279 authorSerial, err := readUint40(r)
280 if err != nil {
281 return nil, err
282 }
283 if ev.Pubkey, err = resolver.GetPubkeyBySerial(authorSerial); chk.E(err) {
284 return nil, err
285 }
286
287 // CreatedAt (varint)
288 var ca uint64
289 if ca, err = varint.Decode(r); chk.E(err) {
290 return nil, err
291 }
292 ev.CreatedAt = int64(ca)
293
294 // Kind (2 bytes big-endian)
295 if err = binary.Read(r, binary.BigEndian, &ev.Kind); chk.E(err) {
296 return nil, err
297 }
298
299 // Tags
300 var nTags uint64
301 if nTags, err = varint.Decode(r); chk.E(err) {
302 return nil, err
303 }
304 if nTags > MaxTagsPerEvent {
305 return nil, ErrTooManyTags // Don't log - caller handles gracefully
306 }
307 if nTags > 0 {
308 ev.Tags = tag.NewSWithCap(int(nTags))
309 for i := uint64(0); i < nTags; i++ {
310 var t *tag.T
311 if t, err = decodeCompactTag(r, resolver); err != nil {
312 return nil, err // Don't log corruption errors
313 }
314 *ev.Tags = append(*ev.Tags, t)
315 }
316 }
317
318 // Content
319 var contentLen uint64
320 if contentLen, err = varint.Decode(r); chk.E(err) {
321 return nil, err
322 }
323 if contentLen > MaxContentLength {
324 return nil, ErrContentTooLarge
325 }
326 ev.Content = make([]byte, contentLen)
327 if _, err = io.ReadFull(r, ev.Content); chk.E(err) {
328 return nil, err
329 }
330
331 // Signature (64 bytes)
332 ev.Sig = make([]byte, schnorr.SignatureSize)
333 if _, err = io.ReadFull(r, ev.Sig); chk.E(err) {
334 return nil, err
335 }
336
337 return ev, nil
338 }
339
340 // decodeCompactTag decodes a single tag from compact format.
341 func decodeCompactTag(r io.Reader, resolver SerialResolver) (t *tag.T, err error) {
342 var nElems uint64
343 if nElems, err = varint.Decode(r); err != nil {
344 return nil, err
345 }
346 if nElems > MaxTagElements {
347 return nil, ErrTooManyTagElems
348 }
349
350 t = tag.NewWithCap(int(nElems))
351
352 for i := uint64(0); i < nElems; i++ {
353 var elem []byte
354 if elem, err = decodeTagElement(r, resolver); err != nil {
355 return nil, err // Don't log corruption errors
356 }
357 t.T = append(t.T, elem)
358 }
359
360 return t, nil
361 }
362
363 // decodeTagElement decodes a single tag element from compact format.
364 func decodeTagElement(r io.Reader, resolver SerialResolver) (elem []byte, err error) {
365 // Read type flag (fixed array avoids heap escape)
366 var typeBuf [1]byte
367 if _, err = io.ReadFull(r, typeBuf[:]); err != nil {
368 return nil, err
369 }
370 typeFlag := typeBuf[0]
371
372 switch typeFlag {
373 case TagElementRaw:
374 // Raw bytes: varint length + data
375 var length uint64
376 if length, err = varint.Decode(r); err != nil {
377 return nil, err
378 }
379 if length > MaxTagElementLength {
380 return nil, ErrTagElementTooLong
381 }
382 elem = make([]byte, length)
383 if _, err = io.ReadFull(r, elem); err != nil {
384 return nil, err
385 }
386 return elem, nil
387
388 case TagElementPubkeySerial:
389 // Pubkey serial: 5 bytes -> lookup full pubkey -> return as 33-byte binary
390 serial, err := readUint40(r)
391 if err != nil {
392 return nil, err
393 }
394 pubkey, err := resolver.GetPubkeyBySerial(serial)
395 if err != nil {
396 return nil, err
397 }
398 // Return as 33-byte binary (32 bytes + null terminator) for tag.Marshal detection
399 result := make([]byte, 33)
400 copy(result, pubkey)
401 result[32] = 0 // null terminator
402 return result, nil
403
404 case TagElementEventSerial:
405 // Event serial: 5 bytes -> lookup full event ID -> return as 33-byte binary
406 serial, err := readUint40(r)
407 if err != nil {
408 return nil, err
409 }
410 eventId, err := resolver.GetEventIdBySerial(serial)
411 if err != nil {
412 return nil, err
413 }
414 // Return as 33-byte binary (32 bytes + null terminator) for tag.Marshal detection
415 result := make([]byte, 33)
416 copy(result, eventId)
417 result[32] = 0 // null terminator
418 return result, nil
419
420 case TagElementEventIdFull:
421 // Full event ID: 32 bytes (for unknown/forward references)
422 // Return as 33-byte binary (32 bytes + null terminator) for tag.Marshal detection
423 elem = make([]byte, 33)
424 if _, err = io.ReadFull(r, elem[:32]); err != nil {
425 return nil, err
426 }
427 elem[32] = 0 // null terminator
428 return elem, nil
429
430 default:
431 return nil, ErrUnknownTagElemType
432 }
433 }
434
435 // hexDecode decodes hex bytes to binary.
436 // This is a simple implementation - the real one uses the optimized hex package.
437 func hexDecode(src []byte) (dst []byte, err error) {
438 if len(src)%2 != 0 {
439 return nil, errors.New("hex string has odd length")
440 }
441 dst = make([]byte, len(src)/2)
442 for i := 0; i < len(dst); i++ {
443 a := unhex(src[i*2])
444 b := unhex(src[i*2+1])
445 if a == 0xFF || b == 0xFF {
446 return nil, errors.New("invalid hex character")
447 }
448 dst[i] = (a << 4) | b
449 }
450 return dst, nil
451 }
452
453 func unhex(c byte) byte {
454 switch {
455 case '0' <= c && c <= '9':
456 return c - '0'
457 case 'a' <= c && c <= 'f':
458 return c - 'a' + 10
459 case 'A' <= c && c <= 'F':
460 return c - 'A' + 10
461 }
462 return 0xFF
463 }
464