get-indexes-from-filter.go raw

   1  package database
   2  
   3  import (
   4  	"bytes"
   5  	"math"
   6  	"sort"
   7  
   8  	"next.orly.dev/pkg/lol/chk"
   9  	"next.orly.dev/pkg/lol/errorf"
  10  	"next.orly.dev/pkg/lol/log"
  11  	"next.orly.dev/pkg/database/indexes"
  12  	types2 "next.orly.dev/pkg/database/indexes/types"
  13  	"next.orly.dev/pkg/nostr/encoders/filter"
  14  	"next.orly.dev/pkg/nostr/encoders/tag"
  15  )
  16  
  17  type Range struct {
  18  	Start, End []byte
  19  }
  20  
  21  // IsHexString checks if the byte slice contains only hex characters
  22  func IsHexString(data []byte) (isHex bool) {
  23  	if len(data)%2 != 0 {
  24  		return false
  25  	}
  26  	for _, b := range data {
  27  		if !((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) {
  28  			return false
  29  		}
  30  	}
  31  	return true
  32  }
  33  
  34  // NormalizeTagValueForHash normalizes a tag value for consistent hashing.
  35  // For 'e' and 'p' tags, the nostr library stores values in binary format (32 bytes),
  36  // but filters from clients come with hex strings (64 chars). This function ensures
  37  // that filter values are converted to binary to match the stored index format.
  38  //
  39  // This function delegates to NormalizeTagValue from filter_utils.go for consistency.
  40  func NormalizeTagValueForHash(key byte, valueBytes []byte) []byte {
  41  	return NormalizeTagValue(key, valueBytes)
  42  }
  43  
  44  // CreateIdHashFromData creates an IdHash from data that could be hex or binary
  45  func CreateIdHashFromData(data []byte) (i *types2.IdHash, err error) {
  46  	i = new(types2.IdHash)
  47  
  48  	// Skip empty data to avoid noisy errors
  49  	if len(data) == 0 {
  50  		err = errorf.E("CreateIdHashFromData: empty ID provided")
  51  		return
  52  	}
  53  
  54  	// If data looks like hex string and has the right length for hex-encoded
  55  	// sha256
  56  	if len(data) == 64 {
  57  		if err = i.FromIdHex(string(data)); chk.E(err) {
  58  			err = nil
  59  		} else {
  60  			return
  61  		}
  62  	}
  63  	// Assume it's binary data
  64  	if err = i.FromId(data); chk.E(err) {
  65  		return
  66  	}
  67  	return
  68  }
  69  
  70  // CreatePubHashFromData creates a PubHash from data that could be hex or binary
  71  func CreatePubHashFromData(data []byte) (p *types2.PubHash, err error) {
  72  	p = new(types2.PubHash)
  73  
  74  	// If data looks like hex string and has the right length for hex-encoded
  75  	// pubkey
  76  	if len(data) == 64 {
  77  		if err = p.FromPubkeyHex(string(data)); chk.E(err) {
  78  			err = nil
  79  		} else {
  80  			return
  81  		}
  82  	} else {
  83  		// Assume it's binary data
  84  		if err = p.FromPubkey(data); chk.E(err) {
  85  			return
  86  		}
  87  	}
  88  	return
  89  }
  90  
  91  // GetIndexesFromFilter returns encoded indexes based on the given filter.
  92  //
  93  // An error is returned if any input values are invalid during encoding.
  94  //
  95  // The indexes are designed so that only one table needs to be iterated, being a
  96  // complete set of combinations of all fields in the event, thus there is no
  97  // need to decode events until they are to be delivered.
  98  func GetIndexesFromFilter(f *filter.F) (idxs []Range, err error) {
  99  	// ID eid
 100  	//
 101  	// If there is any Ids in the filter, none of the other fields matter. It
 102  	// should be an error, but convention just ignores it.
 103  	if f.Ids.Len() > 0 {
 104  		for _, id := range f.Ids.T {
 105  			// Skip empty IDs - some filters have empty ID values
 106  			if len(id) == 0 {
 107  				log.D.F("GetIndexesFromFilter: skipping empty ID in filter (ids=%d)", f.Ids.Len())
 108  				continue
 109  			}
 110  			if err = func() (err error) {
 111  				var i *types2.IdHash
 112  				if i, err = CreateIdHashFromData(id); chk.E(err) {
 113  					return
 114  				}
 115  				buf := new(bytes.Buffer)
 116  				// Create an index prefix without the serial number
 117  				idx := indexes.IdEnc(i, nil)
 118  				if err = idx.MarshalWrite(buf); chk.E(err) {
 119  					return
 120  				}
 121  				b := buf.Bytes()
 122  				// For ID filters, both start and end indexes are the same (exact match)
 123  				r := Range{b, b}
 124  				idxs = append(idxs, r)
 125  				return
 126  			}(); chk.E(err) {
 127  				return
 128  			}
 129  		}
 130  		return
 131  	}
 132  
 133  	// Word search: if Search field is present, generate word index ranges
 134  	if len(f.Search) > 0 {
 135  		for _, h := range TokenHashes(f.Search) {
 136  			w := new(types2.Word)
 137  			w.FromWord(h)
 138  			buf := new(bytes.Buffer)
 139  			idx := indexes.WordEnc(w, nil)
 140  			if err = idx.MarshalWrite(buf); chk.E(err) {
 141  				return
 142  			}
 143  			b := buf.Bytes()
 144  			end := make([]byte, len(b))
 145  			copy(end, b)
 146  			for i := 0; i < 5; i++ { // match any serial
 147  				end = append(end, 0xff)
 148  			}
 149  			idxs = append(idxs, Range{b, end})
 150  		}
 151  		return
 152  	}
 153  
 154  	caStart := new(types2.Uint64)
 155  	caEnd := new(types2.Uint64)
 156  
 157  	// Set the start of range (Since or default to zero)
 158  	if f.Since != nil && f.Since.V != 0 {
 159  		caStart.Set(uint64(f.Since.V))
 160  	} else {
 161  		caStart.Set(uint64(0))
 162  	}
 163  
 164  	// Set the end of range (Until or default to math.MaxInt64)
 165  	if f.Until != nil && f.Until.V != 0 {
 166  		caEnd.Set(uint64(f.Until.V))
 167  	} else {
 168  		caEnd.Set(uint64(math.MaxInt64))
 169  	}
 170  
 171  	// Filter out special tags that shouldn't affect index selection
 172  	var filteredTags *tag.S
 173  	var pTags *tag.S // Separate collection for p-tags that can use graph index
 174  	if f.Tags != nil && f.Tags.Len() > 0 {
 175  		filteredTags = tag.NewSWithCap(f.Tags.Len())
 176  		pTags = tag.NewS()
 177  		for _, t := range *f.Tags {
 178  			// Skip the special "show_all_versions" tag
 179  			if bytes.Equal(t.Key(), []byte("show_all_versions")) {
 180  				continue
 181  			}
 182  			// Collect p-tags separately for potential graph optimization
 183  			keyBytes := t.Key()
 184  			if (len(keyBytes) == 1 && keyBytes[0] == 'p') ||
 185  			   (len(keyBytes) == 2 && keyBytes[0] == '#' && keyBytes[1] == 'p') {
 186  				pTags.Append(t)
 187  			}
 188  			filteredTags.Append(t)
 189  		}
 190  		// sort the filtered tags so they are in iteration order (reverse)
 191  		if filteredTags.Len() > 0 {
 192  			sort.Sort(filteredTags)
 193  		}
 194  	}
 195  
 196  	// Note: P-tag graph optimization is handled in query-for-ptag-graph.go
 197  	// when appropriate (requires database context for serial lookup)
 198  
 199  	// TagKindPubkey tkp
 200  	if f.Kinds != nil && f.Kinds.Len() > 0 && f.Authors != nil && f.Authors.Len() > 0 && filteredTags != nil && filteredTags.Len() > 0 {
 201  		for _, k := range f.Kinds.ToUint16() {
 202  			for _, author := range f.Authors.T {
 203  				for _, t := range *filteredTags {
 204  					// accept single-letter keys like "e" or filter-style keys like "#e"
 205  					if t.Len() >= 2 && (len(t.Key()) == 1 || (len(t.Key()) == 2 && t.Key()[0] == '#')) {
 206  						kind := new(types2.Uint16)
 207  						kind.Set(k)
 208  						var p *types2.PubHash
 209  						if p, err = CreatePubHashFromData(author); chk.E(err) {
 210  							return
 211  						}
 212  						keyBytes := t.Key()
 213  						key := new(types2.Letter)
 214  						// If the tag key starts with '#', use the second character as the key
 215  						var keyByte byte
 216  						if len(keyBytes) == 2 && keyBytes[0] == '#' {
 217  							keyByte = keyBytes[1]
 218  						} else {
 219  							keyByte = keyBytes[0]
 220  						}
 221  						key.Set(keyByte)
 222  						for _, valueBytes := range t.T[1:] {
 223  							// Normalize e/p tag values from hex to binary for consistent hashing
 224  							normalizedValue := NormalizeTagValueForHash(keyByte, valueBytes)
 225  							valueHash := new(types2.Ident)
 226  							valueHash.FromIdent(normalizedValue)
 227  							start, end := new(bytes.Buffer), new(bytes.Buffer)
 228  							idxS := indexes.TagKindPubkeyEnc(
 229  								key, valueHash, kind, p, caStart, nil,
 230  							)
 231  							if err = idxS.MarshalWrite(start); chk.E(err) {
 232  								return
 233  							}
 234  							idxE := indexes.TagKindPubkeyEnc(
 235  								key, valueHash, kind, p, caEnd, nil,
 236  							)
 237  							if err = idxE.MarshalWrite(end); chk.E(err) {
 238  								return
 239  							}
 240  							idxs = append(
 241  								idxs, Range{
 242  									start.Bytes(), end.Bytes(),
 243  								},
 244  							)
 245  						}
 246  					}
 247  				}
 248  			}
 249  		}
 250  		return
 251  	}
 252  
 253  	// TagKind tkc
 254  	if f.Kinds != nil && f.Kinds.Len() > 0 && filteredTags != nil && filteredTags.Len() > 0 {
 255  		for _, k := range f.Kinds.ToUint16() {
 256  			for _, t := range *filteredTags {
 257  				if t.Len() >= 2 && (len(t.Key()) == 1 || (len(t.Key()) == 2 && t.Key()[0] == '#')) {
 258  					kind := new(types2.Uint16)
 259  					kind.Set(k)
 260  					keyBytes := t.Key()
 261  					key := new(types2.Letter)
 262  					// If the tag key starts with '#', use the second character as the key
 263  					var keyByte byte
 264  					if len(keyBytes) == 2 && keyBytes[0] == '#' {
 265  						keyByte = keyBytes[1]
 266  					} else {
 267  						keyByte = keyBytes[0]
 268  					}
 269  					key.Set(keyByte)
 270  					for _, valueBytes := range t.T[1:] {
 271  						// Normalize e/p tag values from hex to binary for consistent hashing
 272  						normalizedValue := NormalizeTagValueForHash(keyByte, valueBytes)
 273  						valueHash := new(types2.Ident)
 274  						valueHash.FromIdent(normalizedValue)
 275  						start, end := new(bytes.Buffer), new(bytes.Buffer)
 276  						idxS := indexes.TagKindEnc(
 277  							key, valueHash, kind, caStart, nil,
 278  						)
 279  						if err = idxS.MarshalWrite(start); chk.E(err) {
 280  							return
 281  						}
 282  						idxE := indexes.TagKindEnc(
 283  							key, valueHash, kind, caEnd, nil,
 284  						)
 285  						if err = idxE.MarshalWrite(end); chk.E(err) {
 286  							return
 287  						}
 288  						idxs = append(
 289  							idxs, Range{
 290  								start.Bytes(), end.Bytes(),
 291  							},
 292  						)
 293  					}
 294  				}
 295  			}
 296  		}
 297  		return
 298  	}
 299  
 300  	// TagPubkey tpc
 301  	if f.Authors != nil && f.Authors.Len() > 0 && filteredTags != nil && filteredTags.Len() > 0 {
 302  		for _, author := range f.Authors.T {
 303  			for _, t := range *filteredTags {
 304  				if t.Len() >= 2 && (len(t.Key()) == 1 || (len(t.Key()) == 2 && t.Key()[0] == '#')) {
 305  					var p *types2.PubHash
 306  					log.I.S(author)
 307  					if p, err = CreatePubHashFromData(author); chk.E(err) {
 308  						return
 309  					}
 310  					keyBytes := t.Key()
 311  					key := new(types2.Letter)
 312  					// If the tag key starts with '#', use the second character as the key
 313  					var keyByte byte
 314  					if len(keyBytes) == 2 && keyBytes[0] == '#' {
 315  						keyByte = keyBytes[1]
 316  					} else {
 317  						keyByte = keyBytes[0]
 318  					}
 319  					key.Set(keyByte)
 320  					for _, valueBytes := range t.T[1:] {
 321  						// Normalize e/p tag values from hex to binary for consistent hashing
 322  						normalizedValue := NormalizeTagValueForHash(keyByte, valueBytes)
 323  						valueHash := new(types2.Ident)
 324  						valueHash.FromIdent(normalizedValue)
 325  						start, end := new(bytes.Buffer), new(bytes.Buffer)
 326  						idxS := indexes.TagPubkeyEnc(
 327  							key, valueHash, p, caStart, nil,
 328  						)
 329  						if err = idxS.MarshalWrite(start); chk.E(err) {
 330  							return
 331  						}
 332  						idxE := indexes.TagPubkeyEnc(
 333  							key, valueHash, p, caEnd, nil,
 334  						)
 335  						if err = idxE.MarshalWrite(end); chk.E(err) {
 336  							return
 337  						}
 338  						idxs = append(
 339  							idxs, Range{start.Bytes(), end.Bytes()},
 340  						)
 341  					}
 342  				}
 343  			}
 344  		}
 345  		return
 346  	}
 347  
 348  	// Tag tc-
 349  	if filteredTags != nil && filteredTags.Len() > 0 && (f.Authors == nil || f.Authors.Len() == 0) && (f.Kinds == nil || f.Kinds.Len() == 0) {
 350  		for _, t := range *filteredTags {
 351  			if t.Len() >= 2 && (len(t.Key()) == 1 || (len(t.Key()) == 2 && t.Key()[0] == '#')) {
 352  				keyBytes := t.Key()
 353  				key := new(types2.Letter)
 354  				// If the tag key starts with '#', use the second character as the key
 355  				var keyByte byte
 356  				if len(keyBytes) == 2 && keyBytes[0] == '#' {
 357  					keyByte = keyBytes[1]
 358  				} else {
 359  					keyByte = keyBytes[0]
 360  				}
 361  				key.Set(keyByte)
 362  				for _, valueBytes := range t.T[1:] {
 363  					// Normalize e/p tag values from hex to binary for consistent hashing
 364  					normalizedValue := NormalizeTagValueForHash(keyByte, valueBytes)
 365  					valueHash := new(types2.Ident)
 366  					valueHash.FromIdent(normalizedValue)
 367  					start, end := new(bytes.Buffer), new(bytes.Buffer)
 368  					idxS := indexes.TagEnc(key, valueHash, caStart, nil)
 369  					if err = idxS.MarshalWrite(start); chk.E(err) {
 370  						return
 371  					}
 372  					idxE := indexes.TagEnc(key, valueHash, caEnd, nil)
 373  					if err = idxE.MarshalWrite(end); chk.E(err) {
 374  						return
 375  					}
 376  					idxs = append(
 377  						idxs, Range{start.Bytes(), end.Bytes()},
 378  					)
 379  				}
 380  			}
 381  		}
 382  		return
 383  	}
 384  
 385  	// KindPubkey kpc
 386  	if f.Kinds != nil && f.Kinds.Len() > 0 && f.Authors != nil && f.Authors.Len() > 0 {
 387  		for _, k := range f.Kinds.ToUint16() {
 388  			for _, author := range f.Authors.T {
 389  				kind := new(types2.Uint16)
 390  				kind.Set(k)
 391  				var p *types2.PubHash
 392  				if p, err = CreatePubHashFromData(author); chk.E(err) {
 393  					return
 394  				}
 395  				start, end := new(bytes.Buffer), new(bytes.Buffer)
 396  				idxS := indexes.KindPubkeyEnc(kind, p, caStart, nil)
 397  				if err = idxS.MarshalWrite(start); chk.E(err) {
 398  					return
 399  				}
 400  				idxE := indexes.KindPubkeyEnc(kind, p, caEnd, nil)
 401  				if err = idxE.MarshalWrite(end); chk.E(err) {
 402  					return
 403  				}
 404  				idxs = append(
 405  					idxs, Range{start.Bytes(), end.Bytes()},
 406  				)
 407  			}
 408  		}
 409  		return
 410  	}
 411  
 412  	// Kind kc-
 413  	if f.Kinds != nil && f.Kinds.Len() > 0 && (f.Authors == nil || f.Authors.Len() == 0) && (filteredTags == nil || filteredTags.Len() == 0) {
 414  		for _, k := range f.Kinds.ToUint16() {
 415  			kind := new(types2.Uint16)
 416  			kind.Set(k)
 417  			start, end := new(bytes.Buffer), new(bytes.Buffer)
 418  			idxS := indexes.KindEnc(kind, caStart, nil)
 419  			if err = idxS.MarshalWrite(start); chk.E(err) {
 420  				return
 421  			}
 422  			idxE := indexes.KindEnc(kind, caEnd, nil)
 423  			if err = idxE.MarshalWrite(end); chk.E(err) {
 424  				return
 425  			}
 426  			idxs = append(
 427  				idxs, Range{start.Bytes(), end.Bytes()},
 428  			)
 429  		}
 430  		return
 431  	}
 432  
 433  	// Pubkey pc-
 434  	if f.Authors != nil && f.Authors.Len() > 0 {
 435  		for _, author := range f.Authors.T {
 436  			var p *types2.PubHash
 437  			if p, err = CreatePubHashFromData(author); chk.E(err) {
 438  				return
 439  			}
 440  			start, end := new(bytes.Buffer), new(bytes.Buffer)
 441  			idxS := indexes.PubkeyEnc(p, caStart, nil)
 442  			if err = idxS.MarshalWrite(start); chk.E(err) {
 443  				return
 444  			}
 445  			idxE := indexes.PubkeyEnc(p, caEnd, nil)
 446  			if err = idxE.MarshalWrite(end); chk.E(err) {
 447  				return
 448  			}
 449  			idxs = append(
 450  				idxs, Range{start.Bytes(), end.Bytes()},
 451  			)
 452  		}
 453  		return
 454  	}
 455  
 456  	// CreatedAt c--
 457  	start, end := new(bytes.Buffer), new(bytes.Buffer)
 458  	idxS := indexes.CreatedAtEnc(caStart, nil)
 459  	if err = idxS.MarshalWrite(start); chk.E(err) {
 460  		return
 461  	}
 462  	idxE := indexes.CreatedAtEnc(caEnd, nil)
 463  	if err = idxE.MarshalWrite(end); chk.E(err) {
 464  		return
 465  	}
 466  	idxs = append(
 467  		idxs, Range{start.Bytes(), end.Bytes()},
 468  	)
 469  	return
 470  }
 471