tag.go raw
1 // Package tag provides an implementation of a nostr tag list, an array of
2 // strings with a usually single letter first "key" field, including methods to
3 // compare, marshal/unmarshal and access elements with their proper semantics.
4 package tag
5
6 import (
7 "bytes"
8
9 "next.orly.dev/pkg/nostr/encoders/hex"
10 "next.orly.dev/pkg/nostr/encoders/text"
11 "next.orly.dev/pkg/nostr/utils"
12 "next.orly.dev/pkg/lol/errorf"
13 )
14
15 // The tag position meanings, so they are clear when reading.
16 const (
17 Key = iota
18 Value
19 Relay
20 )
21
22 // Binary encoding constants for optimized storage of hex-encoded identifiers
23 const (
24 // BinaryEncodedLen is the length of a binary-encoded 32-byte hash with null terminator
25 BinaryEncodedLen = 33
26 // HexEncodedLen is the length of a hex-encoded 32-byte hash
27 HexEncodedLen = 64
28 // HashLen is the raw length of a hash (pubkey/event ID)
29 HashLen = 32
30 )
31
32 // Tags that use binary encoding optimization for their value field
33 var binaryOptimizedTags = map[string]bool{
34 "e": true, // event references
35 "p": true, // pubkey references
36 }
37
38 type T struct {
39 T [][]byte
40 }
41
42 func New() *T { return &T{} }
43
44 func NewFromBytesSlice(t ...[]byte) (tt *T) {
45 tt = &T{T: t}
46 return
47 }
48
49 func NewFromAny(t ...any) (tt *T) {
50 tt = &T{}
51 for _, v := range t {
52 switch vv := v.(type) {
53 case []byte:
54 tt.T = append(tt.T, vv)
55 case string:
56 tt.T = append(tt.T, []byte(vv))
57 default:
58 panic("invalid type for tag fields, must be []byte or string")
59 }
60 }
61 return
62 }
63
64 func NewWithCap(c int) *T {
65 return &T{T: make([][]byte, 0, c)}
66 }
67
68 func (t *T) Free() {
69 t.T = nil
70 }
71
72 func (t *T) Len() int {
73 if t == nil {
74 return 0
75 }
76 return len(t.T)
77 }
78
79 func (t *T) Less(i, j int) bool {
80 return bytes.Compare(t.T[i], t.T[j]) < 0
81 }
82
83 func (t *T) Swap(i, j int) { t.T[i], t.T[j] = t.T[j], t.T[i] }
84
85 // Contains returns true if the provided element is found in the tag slice.
86 func (t *T) Contains(s []byte) (b bool) {
87 for i := range t.T {
88 if utils.FastEqual(t.T[i], s) {
89 return true
90 }
91 }
92 return false
93 }
94
95 // Marshal encodes a tag.T as standard minified JSON array of strings.
96 // Binary-encoded values (e/p tags) are automatically converted back to hex.
97 func (t *T) Marshal(dst []byte) (b []byte) {
98 b = dst
99 // Pre-allocate buffer if nil to reduce reallocations
100 // Estimate: [ + (quoted field + comma) * n + ]
101 // Each field might be escaped, so estimate len(field) * 1.5 + 2 quotes + comma
102 if b == nil && len(t.T) > 0 {
103 estimatedSize := 2 // brackets
104 for i, s := range t.T {
105 fieldLen := len(s)
106 // Binary-encoded fields become hex (33 -> 64 chars)
107 if i == Value && isBinaryEncoded(s) {
108 fieldLen = HexEncodedLen
109 }
110 estimatedSize += fieldLen*3/2 + 4 // escaped field + quotes + comma
111 }
112 b = make([]byte, 0, estimatedSize)
113 }
114 b = append(b, '[')
115 for i, s := range t.T {
116 // Convert binary-encoded value fields back to hex for JSON
117 if i == Value && isBinaryEncoded(s) {
118 hexVal := hex.EncAppend(nil, s[:HashLen])
119 b = text.AppendQuote(b, hexVal, text.NostrEscape)
120 } else {
121 b = text.AppendQuote(b, s, text.NostrEscape)
122 }
123 if i < len(t.T)-1 {
124 b = append(b, ',')
125 }
126 }
127 b = append(b, ']')
128 return
129 }
130
131 // MarshalJSON encodes a tag.T as standard minified JSON array of strings.
132 //
133 // Warning: this will mangle the output if the tag fields contain <, > or &
134 // characters. do not use json.Marshal in the hopes of rendering tags verbatim
135 // in an event as you will have a bad time. Use the json.Marshal function in the
136 // pkg/encoders/json package instead, this has a fork of the json library that
137 // disables html escaping for json.Marshal.
138 func (t *T) MarshalJSON() (b []byte, err error) {
139 b = t.Marshal(nil)
140 return
141 }
142
143 // Unmarshal decodes a standard minified JSON array of strings to a tags.T.
144 // For "e" and "p" tags with 64-character hex values, it converts them to
145 // 33-byte binary format (32 bytes hash + null terminator) for efficiency.
146 func (t *T) Unmarshal(b []byte) (r []byte, err error) {
147 var inQuotes, openedBracket bool
148 var quoteStart int
149 // Pre-allocate slice with estimated capacity to reduce reallocations
150 // Estimate based on typical tag sizes (can grow if needed)
151 t.T = make([][]byte, 0, 4)
152 for i := 0; i < len(b); i++ {
153 if !openedBracket && b[i] == '[' {
154 openedBracket = true
155 } else if !inQuotes {
156 if b[i] == '"' {
157 inQuotes, quoteStart = true, i+1
158 } else if b[i] == ']' {
159 return b[i+1:], err
160 }
161 } else if b[i] == '\\' && i < len(b)-1 {
162 i++
163 } else if b[i] == '"' {
164 inQuotes = false
165 // Copy the quoted substring before unescaping so we don't mutate the
166 // original JSON buffer in-place (which would corrupt subsequent parsing).
167 copyBuf := make([]byte, i-quoteStart)
168 copy(copyBuf, b[quoteStart:i])
169 unescaped := text.NostrUnescape(copyBuf)
170
171 // Optimize e/p tag values by converting hex to binary
172 fieldIdx := len(t.T)
173 if fieldIdx == Value && len(t.T) > 0 && shouldOptimize(
174 t.T[Key], unescaped,
175 ) {
176 // Decode hex to binary format: 32 bytes + null terminator
177 binVal := make([]byte, BinaryEncodedLen)
178 if _, err = hex.DecBytes(
179 binVal[:HashLen], unescaped,
180 ); err == nil {
181 binVal[HashLen] = 0 // null terminator
182 t.T = append(t.T, binVal)
183 } else {
184 // If decode fails, store as-is
185 t.T = append(t.T, unescaped)
186 }
187 } else {
188 t.T = append(t.T, unescaped)
189 }
190 }
191 }
192 if !openedBracket || inQuotes {
193 return nil, errorf.E("tag: failed to parse tag")
194 }
195 return
196 }
197
198 func (t *T) UnmarshalJSON(b []byte) (err error) {
199 _, err = t.Unmarshal(b)
200 return
201 }
202
203 func (t *T) Key() (key []byte) {
204 if len(t.T) > Key {
205 return t.T[Key]
206 }
207 return
208 }
209
210 func (t *T) Value() (key []byte) {
211 if t == nil {
212 return
213 }
214 if len(t.T) > Value {
215 return t.T[Value]
216 }
217 return
218 }
219
220 func (t *T) Relay() (key []byte) {
221 if len(t.T) > Relay {
222 return t.T[Relay]
223 }
224 return
225 }
226
227 // ToSliceOfStrings returns the tag's bytes slices as a slice of strings. This
228 // method provides a convenient way to access the tag's contents in string format.
229 //
230 // # Return Values
231 //
232 // - s ([]string): A slice containing all tag elements converted to strings.
233 //
234 // # Expected Behaviour
235 //
236 // Returns an empty slice if the tag is empty, otherwise returns a new slice with
237 // each byte slice element converted to a string.
238 func (t *T) ToSliceOfStrings() (s []string) {
239 if len(t.T) == 0 {
240 return
241 }
242 // Pre-allocate slice with exact capacity to reduce reallocations
243 s = make([]string, 0, len(t.T))
244 for _, v := range t.T {
245 s = append(s, string(v))
246 }
247 return
248 }
249
250 // isBinaryEncoded checks if a value field is stored in optimized binary format
251 // (32-byte hash + null terminator = 33 bytes total)
252 func isBinaryEncoded(val []byte) bool {
253 return len(val) == BinaryEncodedLen && val[HashLen] == 0
254 }
255
256 // shouldOptimize checks if a tag should use binary encoding optimization
257 func shouldOptimize(key []byte, val []byte) bool {
258 if len(key) != 1 {
259 return false
260 }
261 keyStr := string(key)
262 if !binaryOptimizedTags[keyStr] {
263 return false
264 }
265 // Only optimize if it's a valid 64-character hex string
266 return len(val) == HexEncodedLen && isValidHex(val)
267 }
268
269 // isValidHex checks if all bytes are valid hex characters
270 func isValidHex(b []byte) bool {
271 for _, c := range b {
272 if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
273 return false
274 }
275 }
276 return true
277 }
278
279 // ValueHex returns the value field as hex string. If the value is stored in
280 // binary format, it converts it to hex. Otherwise, it returns the value as-is.
281 func (t *T) ValueHex() []byte {
282 if t == nil || len(t.T) <= Value {
283 return nil
284 }
285 val := t.T[Value]
286 if isBinaryEncoded(val) {
287 // Convert binary back to hex
288 return hex.EncAppend(nil, val[:HashLen])
289 }
290 return val
291 }
292
293 // ValueBinary returns the raw binary value if it's binary-encoded, or nil otherwise.
294 // This is useful for database operations that need the raw hash bytes.
295 func (t *T) ValueBinary() []byte {
296 if t == nil || len(t.T) <= Value {
297 return nil
298 }
299 val := t.T[Value]
300 if isBinaryEncoded(val) {
301 return val[:HashLen]
302 }
303 return nil
304 }
305
306 // Equals compares two tags for equality, handling both binary and hex encodings.
307 // This ensures that ["e", "abc..."] and ["e", <binary>] are equal if they
308 // represent the same hash. This method does NOT allocate memory.
309 func (t *T) Equals(other *T) bool {
310 if t == nil && other == nil {
311 return true
312 }
313 if t == nil || other == nil {
314 return false
315 }
316 if len(t.T) != len(other.T) {
317 return false
318 }
319 for i := range t.T {
320 if i == Value && len(t.T) > Value {
321 // Special handling for value field to compare binary vs hex without allocating
322 tVal := t.T[Value]
323 oVal := other.T[Value]
324
325 tIsBinary := isBinaryEncoded(tVal)
326 oIsBinary := isBinaryEncoded(oVal)
327
328 // Both binary - compare first 32 bytes directly
329 if tIsBinary && oIsBinary {
330 if !bytes.Equal(tVal[:HashLen], oVal[:HashLen]) {
331 return false
332 }
333 } else if tIsBinary || oIsBinary {
334 // One is binary, one is hex - need to compare carefully
335 // Compare the binary one's raw bytes with hex-decoded version of the other
336 var binBytes, hexBytes []byte
337 if tIsBinary {
338 binBytes = tVal[:HashLen]
339 hexBytes = oVal
340 } else {
341 binBytes = oVal[:HashLen]
342 hexBytes = tVal
343 }
344
345 // Decode hex inline without allocation by comparing byte by byte
346 if len(hexBytes) != HexEncodedLen {
347 return false
348 }
349 for j := 0; j < HashLen; j++ {
350 // Convert two hex chars to one byte and compare
351 hi := hexBytes[j*2]
352 lo := hexBytes[j*2+1]
353
354 var hiByte, loByte byte
355 if hi >= '0' && hi <= '9' {
356 hiByte = hi - '0'
357 } else if hi >= 'a' && hi <= 'f' {
358 hiByte = hi - 'a' + 10
359 } else if hi >= 'A' && hi <= 'F' {
360 hiByte = hi - 'A' + 10
361 } else {
362 return false
363 }
364
365 if lo >= '0' && lo <= '9' {
366 loByte = lo - '0'
367 } else if lo >= 'a' && lo <= 'f' {
368 loByte = lo - 'a' + 10
369 } else if lo >= 'A' && lo <= 'F' {
370 loByte = lo - 'A' + 10
371 } else {
372 return false
373 }
374
375 expectedByte := (hiByte << 4) | loByte
376 if binBytes[j] != expectedByte {
377 return false
378 }
379 }
380 } else {
381 // Both are regular (hex or other) - direct comparison
382 if !bytes.Equal(tVal, oVal) {
383 return false
384 }
385 }
386 } else {
387 if !bytes.Equal(t.T[i], other.T[i]) {
388 return false
389 }
390 }
391 }
392 return true
393 }
394