tag.go raw

   1  // Package tag provides an implementation of a nostr tag list, an array of
   2  // strings with a usually single letter first "key" field, including methods to
   3  // compare, marshal/unmarshal and access elements with their proper semantics.
   4  package tag
   5  
   6  import (
   7  	"bytes"
   8  
   9  	"next.orly.dev/pkg/nostr/encoders/hex"
  10  	"next.orly.dev/pkg/nostr/encoders/text"
  11  	"next.orly.dev/pkg/nostr/utils"
  12  	"next.orly.dev/pkg/lol/errorf"
  13  )
  14  
  15  // The tag position meanings, so they are clear when reading.
  16  const (
  17  	Key = iota
  18  	Value
  19  	Relay
  20  )
  21  
  22  // Binary encoding constants for optimized storage of hex-encoded identifiers
  23  const (
  24  	// BinaryEncodedLen is the length of a binary-encoded 32-byte hash with null terminator
  25  	BinaryEncodedLen = 33
  26  	// HexEncodedLen is the length of a hex-encoded 32-byte hash
  27  	HexEncodedLen = 64
  28  	// HashLen is the raw length of a hash (pubkey/event ID)
  29  	HashLen = 32
  30  )
  31  
  32  // Tags that use binary encoding optimization for their value field
  33  var binaryOptimizedTags = map[string]bool{
  34  	"e": true, // event references
  35  	"p": true, // pubkey references
  36  }
  37  
  38  type T struct {
  39  	T [][]byte
  40  }
  41  
  42  func New() *T { return &T{} }
  43  
  44  func NewFromBytesSlice(t ...[]byte) (tt *T) {
  45  	tt = &T{T: t}
  46  	return
  47  }
  48  
  49  func NewFromAny(t ...any) (tt *T) {
  50  	tt = &T{}
  51  	for _, v := range t {
  52  		switch vv := v.(type) {
  53  		case []byte:
  54  			tt.T = append(tt.T, vv)
  55  		case string:
  56  			tt.T = append(tt.T, []byte(vv))
  57  		default:
  58  			panic("invalid type for tag fields, must be []byte or string")
  59  		}
  60  	}
  61  	return
  62  }
  63  
  64  func NewWithCap(c int) *T {
  65  	return &T{T: make([][]byte, 0, c)}
  66  }
  67  
  68  func (t *T) Free() {
  69  	t.T = nil
  70  }
  71  
  72  func (t *T) Len() int {
  73  	if t == nil {
  74  		return 0
  75  	}
  76  	return len(t.T)
  77  }
  78  
  79  func (t *T) Less(i, j int) bool {
  80  	return bytes.Compare(t.T[i], t.T[j]) < 0
  81  }
  82  
  83  func (t *T) Swap(i, j int) { t.T[i], t.T[j] = t.T[j], t.T[i] }
  84  
  85  // Contains returns true if the provided element is found in the tag slice.
  86  func (t *T) Contains(s []byte) (b bool) {
  87  	for i := range t.T {
  88  		if utils.FastEqual(t.T[i], s) {
  89  			return true
  90  		}
  91  	}
  92  	return false
  93  }
  94  
  95  // Marshal encodes a tag.T as standard minified JSON array of strings.
  96  // Binary-encoded values (e/p tags) are automatically converted back to hex.
  97  func (t *T) Marshal(dst []byte) (b []byte) {
  98  	b = dst
  99  	// Pre-allocate buffer if nil to reduce reallocations
 100  	// Estimate: [ + (quoted field + comma) * n + ]
 101  	// Each field might be escaped, so estimate len(field) * 1.5 + 2 quotes + comma
 102  	if b == nil && len(t.T) > 0 {
 103  		estimatedSize := 2 // brackets
 104  		for i, s := range t.T {
 105  			fieldLen := len(s)
 106  			// Binary-encoded fields become hex (33 -> 64 chars)
 107  			if i == Value && isBinaryEncoded(s) {
 108  				fieldLen = HexEncodedLen
 109  			}
 110  			estimatedSize += fieldLen*3/2 + 4 // escaped field + quotes + comma
 111  		}
 112  		b = make([]byte, 0, estimatedSize)
 113  	}
 114  	b = append(b, '[')
 115  	for i, s := range t.T {
 116  		// Convert binary-encoded value fields back to hex for JSON
 117  		if i == Value && isBinaryEncoded(s) {
 118  			hexVal := hex.EncAppend(nil, s[:HashLen])
 119  			b = text.AppendQuote(b, hexVal, text.NostrEscape)
 120  		} else {
 121  			b = text.AppendQuote(b, s, text.NostrEscape)
 122  		}
 123  		if i < len(t.T)-1 {
 124  			b = append(b, ',')
 125  		}
 126  	}
 127  	b = append(b, ']')
 128  	return
 129  }
 130  
 131  // MarshalJSON encodes a tag.T as standard minified JSON array of strings.
 132  //
 133  // Warning: this will mangle the output if the tag fields contain <, > or &
 134  // characters. do not use json.Marshal in the hopes of rendering tags verbatim
 135  // in an event as you will have a bad time. Use the json.Marshal function in the
 136  // pkg/encoders/json package instead, this has a fork of the json library that
 137  // disables html escaping for json.Marshal.
 138  func (t *T) MarshalJSON() (b []byte, err error) {
 139  	b = t.Marshal(nil)
 140  	return
 141  }
 142  
 143  // Unmarshal decodes a standard minified JSON array of strings to a tags.T.
 144  // For "e" and "p" tags with 64-character hex values, it converts them to
 145  // 33-byte binary format (32 bytes hash + null terminator) for efficiency.
 146  func (t *T) Unmarshal(b []byte) (r []byte, err error) {
 147  	var inQuotes, openedBracket bool
 148  	var quoteStart int
 149  	// Pre-allocate slice with estimated capacity to reduce reallocations
 150  	// Estimate based on typical tag sizes (can grow if needed)
 151  	t.T = make([][]byte, 0, 4)
 152  	for i := 0; i < len(b); i++ {
 153  		if !openedBracket && b[i] == '[' {
 154  			openedBracket = true
 155  		} else if !inQuotes {
 156  			if b[i] == '"' {
 157  				inQuotes, quoteStart = true, i+1
 158  			} else if b[i] == ']' {
 159  				return b[i+1:], err
 160  			}
 161  		} else if b[i] == '\\' && i < len(b)-1 {
 162  			i++
 163  		} else if b[i] == '"' {
 164  			inQuotes = false
 165  			// Copy the quoted substring before unescaping so we don't mutate the
 166  			// original JSON buffer in-place (which would corrupt subsequent parsing).
 167  			copyBuf := make([]byte, i-quoteStart)
 168  			copy(copyBuf, b[quoteStart:i])
 169  			unescaped := text.NostrUnescape(copyBuf)
 170  
 171  			// Optimize e/p tag values by converting hex to binary
 172  			fieldIdx := len(t.T)
 173  			if fieldIdx == Value && len(t.T) > 0 && shouldOptimize(
 174  				t.T[Key], unescaped,
 175  			) {
 176  				// Decode hex to binary format: 32 bytes + null terminator
 177  				binVal := make([]byte, BinaryEncodedLen)
 178  				if _, err = hex.DecBytes(
 179  					binVal[:HashLen], unescaped,
 180  				); err == nil {
 181  					binVal[HashLen] = 0 // null terminator
 182  					t.T = append(t.T, binVal)
 183  				} else {
 184  					// If decode fails, store as-is
 185  					t.T = append(t.T, unescaped)
 186  				}
 187  			} else {
 188  				t.T = append(t.T, unescaped)
 189  			}
 190  		}
 191  	}
 192  	if !openedBracket || inQuotes {
 193  		return nil, errorf.E("tag: failed to parse tag")
 194  	}
 195  	return
 196  }
 197  
 198  func (t *T) UnmarshalJSON(b []byte) (err error) {
 199  	_, err = t.Unmarshal(b)
 200  	return
 201  }
 202  
 203  func (t *T) Key() (key []byte) {
 204  	if len(t.T) > Key {
 205  		return t.T[Key]
 206  	}
 207  	return
 208  }
 209  
 210  func (t *T) Value() (key []byte) {
 211  	if t == nil {
 212  		return
 213  	}
 214  	if len(t.T) > Value {
 215  		return t.T[Value]
 216  	}
 217  	return
 218  }
 219  
 220  func (t *T) Relay() (key []byte) {
 221  	if len(t.T) > Relay {
 222  		return t.T[Relay]
 223  	}
 224  	return
 225  }
 226  
 227  // ToSliceOfStrings returns the tag's bytes slices as a slice of strings. This
 228  // method provides a convenient way to access the tag's contents in string format.
 229  //
 230  // # Return Values
 231  //
 232  // - s ([]string): A slice containing all tag elements converted to strings.
 233  //
 234  // # Expected Behaviour
 235  //
 236  // Returns an empty slice if the tag is empty, otherwise returns a new slice with
 237  // each byte slice element converted to a string.
 238  func (t *T) ToSliceOfStrings() (s []string) {
 239  	if len(t.T) == 0 {
 240  		return
 241  	}
 242  	// Pre-allocate slice with exact capacity to reduce reallocations
 243  	s = make([]string, 0, len(t.T))
 244  	for _, v := range t.T {
 245  		s = append(s, string(v))
 246  	}
 247  	return
 248  }
 249  
 250  // isBinaryEncoded checks if a value field is stored in optimized binary format
 251  // (32-byte hash + null terminator = 33 bytes total)
 252  func isBinaryEncoded(val []byte) bool {
 253  	return len(val) == BinaryEncodedLen && val[HashLen] == 0
 254  }
 255  
 256  // shouldOptimize checks if a tag should use binary encoding optimization
 257  func shouldOptimize(key []byte, val []byte) bool {
 258  	if len(key) != 1 {
 259  		return false
 260  	}
 261  	keyStr := string(key)
 262  	if !binaryOptimizedTags[keyStr] {
 263  		return false
 264  	}
 265  	// Only optimize if it's a valid 64-character hex string
 266  	return len(val) == HexEncodedLen && isValidHex(val)
 267  }
 268  
 269  // isValidHex checks if all bytes are valid hex characters
 270  func isValidHex(b []byte) bool {
 271  	for _, c := range b {
 272  		if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
 273  			return false
 274  		}
 275  	}
 276  	return true
 277  }
 278  
 279  // ValueHex returns the value field as hex string. If the value is stored in
 280  // binary format, it converts it to hex. Otherwise, it returns the value as-is.
 281  func (t *T) ValueHex() []byte {
 282  	if t == nil || len(t.T) <= Value {
 283  		return nil
 284  	}
 285  	val := t.T[Value]
 286  	if isBinaryEncoded(val) {
 287  		// Convert binary back to hex
 288  		return hex.EncAppend(nil, val[:HashLen])
 289  	}
 290  	return val
 291  }
 292  
 293  // ValueBinary returns the raw binary value if it's binary-encoded, or nil otherwise.
 294  // This is useful for database operations that need the raw hash bytes.
 295  func (t *T) ValueBinary() []byte {
 296  	if t == nil || len(t.T) <= Value {
 297  		return nil
 298  	}
 299  	val := t.T[Value]
 300  	if isBinaryEncoded(val) {
 301  		return val[:HashLen]
 302  	}
 303  	return nil
 304  }
 305  
 306  // Equals compares two tags for equality, handling both binary and hex encodings.
 307  // This ensures that ["e", "abc..."] and ["e", <binary>] are equal if they
 308  // represent the same hash. This method does NOT allocate memory.
 309  func (t *T) Equals(other *T) bool {
 310  	if t == nil && other == nil {
 311  		return true
 312  	}
 313  	if t == nil || other == nil {
 314  		return false
 315  	}
 316  	if len(t.T) != len(other.T) {
 317  		return false
 318  	}
 319  	for i := range t.T {
 320  		if i == Value && len(t.T) > Value {
 321  			// Special handling for value field to compare binary vs hex without allocating
 322  			tVal := t.T[Value]
 323  			oVal := other.T[Value]
 324  
 325  			tIsBinary := isBinaryEncoded(tVal)
 326  			oIsBinary := isBinaryEncoded(oVal)
 327  
 328  			// Both binary - compare first 32 bytes directly
 329  			if tIsBinary && oIsBinary {
 330  				if !bytes.Equal(tVal[:HashLen], oVal[:HashLen]) {
 331  					return false
 332  				}
 333  			} else if tIsBinary || oIsBinary {
 334  				// One is binary, one is hex - need to compare carefully
 335  				// Compare the binary one's raw bytes with hex-decoded version of the other
 336  				var binBytes, hexBytes []byte
 337  				if tIsBinary {
 338  					binBytes = tVal[:HashLen]
 339  					hexBytes = oVal
 340  				} else {
 341  					binBytes = oVal[:HashLen]
 342  					hexBytes = tVal
 343  				}
 344  
 345  				// Decode hex inline without allocation by comparing byte by byte
 346  				if len(hexBytes) != HexEncodedLen {
 347  					return false
 348  				}
 349  				for j := 0; j < HashLen; j++ {
 350  					// Convert two hex chars to one byte and compare
 351  					hi := hexBytes[j*2]
 352  					lo := hexBytes[j*2+1]
 353  
 354  					var hiByte, loByte byte
 355  					if hi >= '0' && hi <= '9' {
 356  						hiByte = hi - '0'
 357  					} else if hi >= 'a' && hi <= 'f' {
 358  						hiByte = hi - 'a' + 10
 359  					} else if hi >= 'A' && hi <= 'F' {
 360  						hiByte = hi - 'A' + 10
 361  					} else {
 362  						return false
 363  					}
 364  
 365  					if lo >= '0' && lo <= '9' {
 366  						loByte = lo - '0'
 367  					} else if lo >= 'a' && lo <= 'f' {
 368  						loByte = lo - 'a' + 10
 369  					} else if lo >= 'A' && lo <= 'F' {
 370  						loByte = lo - 'A' + 10
 371  					} else {
 372  						return false
 373  					}
 374  
 375  					expectedByte := (hiByte << 4) | loByte
 376  					if binBytes[j] != expectedByte {
 377  						return false
 378  					}
 379  				}
 380  			} else {
 381  				// Both are regular (hex or other) - direct comparison
 382  				if !bytes.Equal(tVal, oVal) {
 383  					return false
 384  				}
 385  			}
 386  		} else {
 387  			if !bytes.Equal(t.T[i], other.T[i]) {
 388  				return false
 389  			}
 390  		}
 391  	}
 392  	return true
 393  }
 394