tree.mx raw

   1  package iskra
   2  
   3  import "git.smesh.lol/iskradb/lattice"
   4  
   5  const (
   6  	L3Size       = 4 << 20
   7  	MaxNodesInL3 = L3Size / 64
   8  )
   9  
  10  type Tree struct {
  11  	db         *lattice.Tree
  12  
  13  	// Per-record metadata indexed by record index.
  14  	// Populated atomically with db.Insert in Tree.Insert.
  15  	RecMeta    []MetaEntry
  16  
  17  	// Content pools.
  18  	StringPool []byte    // overflow for form names > 23 bytes
  19  	TokenPool  []uint32  // tokenized content
  20  	Dict       *Dict
  21  
  22  	// Performance data.
  23  	CostMap map[uint32]CostEntry // recIdx → cost
  24  }
  25  
  26  func NewTree(db *lattice.Tree) *Tree {
  27  	return &Tree{
  28  		db:         db,
  29  		RecMeta:    []MetaEntry{},
  30  		StringPool: []byte{:0:4096},
  31  		TokenPool:  []uint32{:0:4096},
  32  		Dict:       NewDict(),
  33  	}
  34  }
  35  
  36  // Insert adds a segment to the appropriate branch, populating RecMeta[recIdx]
  37  // atomically before returning. recIdx may not be strictly monotone (iskradb
  38  // reuses free-list slots), so RecMeta is grown to cover recIdx in place.
  39  func (t *Tree) Insert(branch lattice.Branch, key lattice.Key, form string, kind NodeKind, stage uint8) uint32 {
  40  	var rec lattice.Record
  41  	SetFormOnRecord(&rec, form, &t.StringPool)
  42  	rec.Branch = uint8(branch)
  43  	recIdx := t.db.InsertRec(branch, key, rec)
  44  	for uint32(len(t.RecMeta)) <= recIdx {
  45  		t.RecMeta = append(t.RecMeta, MetaEntry{})
  46  	}
  47  	t.RecMeta[recIdx] = MetaEntry{Kind: kind, StageTag: stage}
  48  	return recIdx
  49  }
  50  
  51  // Walk searches for key in branch's B-tree. Returns (nodeIdx, pos, found).
  52  func (t *Tree) Walk(branch lattice.Branch, key lattice.Key) (uint32, int, bool) {
  53  	return t.db.Walk(branch, key)
  54  }
  55  
  56  // Lookup returns the Record and its recIdx for key in branch, or nil/NullRec.
  57  func (t *Tree) Lookup(branch lattice.Branch, key lattice.Key) (*lattice.Record, uint32) {
  58  	rec := t.db.Lookup(branch, key)
  59  	if rec == nil {
  60  		return nil, lattice.NullRec
  61  	}
  62  	recIdx := t.db.LookupRecIdx(branch, key)
  63  	return rec, recIdx
  64  }
  65  
  66  // LookupRecIdx returns just the record index for key in branch.
  67  func (t *Tree) LookupRecIdx(branch lattice.Branch, key lattice.Key) uint32 {
  68  	return t.db.LookupRecIdx(branch, key)
  69  }
  70  
  71  // FinalizeLinks wires Record.Link[2] cross-branch connections after all
  72  // inserts are complete. Branch is derived from Kind (not StageTag).
  73  func (t *Tree) FinalizeLinks() {
  74  	for recIdx := range t.RecMeta {
  75  		meta := &t.RecMeta[recIdx]
  76  		b := KindToBranch(meta.Kind)
  77  		key, ok := t.db.RecKey[uint32(recIdx)]
  78  		if !ok {
  79  			continue
  80  		}
  81  		stage := KeyStage(key)
  82  		hash := KeyHash(key)
  83  		others := otherBranches(b)
  84  		rec := t.db.GetRecord(uint32(recIdx))
  85  		for i, other := range others {
  86  			okey := MakeCodeKey(stage, hash)
  87  			ori := t.db.LookupRecIdx(other, okey)
  88  			if ori != lattice.NullRec {
  89  				rec.Link[i] = ori
  90  			}
  91  		}
  92  	}
  93  }
  94  
  95  // GetContent returns the decoded token content for the record at recIdx.
  96  func (t *Tree) GetContent(recIdx uint32) []byte {
  97  	if int(recIdx) >= len(t.RecMeta) {
  98  		return nil
  99  	}
 100  	meta := &t.RecMeta[recIdx]
 101  	off := meta.ContentOffset()
 102  	length := meta.ContentLen()
 103  	if length == 0 {
 104  		return nil
 105  	}
 106  	end := off + length
 107  	if int(end) > len(t.TokenPool) {
 108  		return nil
 109  	}
 110  	return t.Dict.Decode(t.TokenPool[off:end])
 111  }
 112  
 113  // SetContent tokenizes data and stores it in the TokenPool, updating
 114  // RecMeta[recIdx].ContentOffset/ContentLen.
 115  func (t *Tree) SetContent(recIdx uint32, data []byte) {
 116  	if len(data) == 0 || int(recIdx) >= len(t.RecMeta) {
 117  		return
 118  	}
 119  	tokens := Tokenize(data)
 120  	off := uint32(len(t.TokenPool))
 121  	for _, tok := range tokens {
 122  		idx := t.Dict.Add(tok.Text, tok.Class)
 123  		t.TokenPool = append(t.TokenPool, idx)
 124  	}
 125  	count := uint32(len(t.TokenPool)) - off
 126  	t.RecMeta[recIdx].SetContentRef(off, count)
 127  }
 128  
 129  // GetTokenSeq returns the raw token index slice for recIdx.
 130  func (t *Tree) GetTokenSeq(recIdx uint32) []uint32 {
 131  	if int(recIdx) >= len(t.RecMeta) {
 132  		return nil
 133  	}
 134  	meta := &t.RecMeta[recIdx]
 135  	off := meta.ContentOffset()
 136  	length := meta.ContentLen()
 137  	if length == 0 {
 138  		return nil
 139  	}
 140  	end := off + length
 141  	if int(end) > len(t.TokenPool) {
 142  		return nil
 143  	}
 144  	return t.TokenPool[off:end]
 145  }
 146  
 147  func (t *Tree) NodeCount() int  { return t.db.NodeCount() }
 148  func (t *Tree) EntryCount() int { return t.db.RecordCount() }
 149  func (t *Tree) NeedsRepack() bool {
 150  	return t.db.NodeCount()*64 > L3Size
 151  }
 152  
 153  // MetaAt returns a pointer to the MetaEntry for recIdx.
 154  func (t *Tree) MetaAt(recIdx uint32) *MetaEntry {
 155  	if int(recIdx) >= len(t.RecMeta) {
 156  		return nil
 157  	}
 158  	return &t.RecMeta[recIdx]
 159  }
 160  
 161  // FormAt returns the surface form for the record at recIdx.
 162  func (t *Tree) FormAt(recIdx uint32) string {
 163  	rec := t.db.GetRecord(recIdx)
 164  	if rec == nil {
 165  		return ""
 166  	}
 167  	return FormFromRecord(rec, t.StringPool)
 168  }
 169  
 170  // StageLink is a cross-stage reference for the same named entity.
 171  type StageLink struct {
 172  	RecIdx uint32
 173  	Stage  uint8
 174  }
 175  
 176  // StageLinksFor returns cross-stage RecIdx references for the same entity
 177  // as recIdx. Cross-stage adjacency is key-implicit: same hash, different
 178  // stage prefix, same branch.
 179  func (t *Tree) StageLinksFor(recIdx uint32) []StageLink {
 180  	key, ok := t.db.RecKey[recIdx]
 181  	if !ok {
 182  		return nil
 183  	}
 184  	hash := KeyHash(key)
 185  	srcStage := KeyStage(key)
 186  	branch := KindToBranch(t.RecMeta[recIdx].Kind)
 187  	var links []StageLink
 188  	for _, stage := range []uint8{StageSRC, StageAST, StageIR, StageASM, StageBIN} {
 189  		if stage == srcStage {
 190  			continue
 191  		}
 192  		ri := t.db.LookupRecIdx(branch, MakeCodeKey(stage, hash))
 193  		if ri != NullLatticeRec {
 194  			links = append(links, StageLink{RecIdx: ri, Stage: stage})
 195  		}
 196  	}
 197  	return links
 198  }
 199  
 200  // FindByName returns all recIdxs whose surface form equals name.
 201  func (t *Tree) FindByName(name string) []uint32 {
 202  	var out []uint32
 203  	for i := range t.RecMeta {
 204  		if t.FormAt(uint32(i)) == name {
 205  			out = append(out, uint32(i))
 206  		}
 207  	}
 208  	return out
 209  }
 210  
 211  // KeyForRecord returns the key stored in the iskradb RecKey map for recIdx.
 212  func (t *Tree) KeyForRecord(recIdx uint32) (lattice.Key, bool) {
 213  	key, ok := t.db.RecKey[recIdx]
 214  	return key, ok
 215  }
 216  
 217  // NewInMemoryTree creates a tree with no disk backing.
 218  func NewInMemoryTree(cap int) *Tree {
 219  	return NewTree(lattice.NewTree(cap))
 220  }
 221  
 222  // LookupLexByMeta is a compatibility shim: recIdx == metaIdx == lexIdx now.
 223  func (t *Tree) LookupLexByMeta(recIdx uint32) (uint32, bool) {
 224  	if int(recIdx) >= len(t.RecMeta) {
 225  		return 0, false
 226  	}
 227  	return recIdx, true
 228  }
 229  
 230  // AdjLinks is a compatibility shim for code that used to call t.AdjLinks(meta).
 231  // Returns cross-stage links for the record at recIdx.
 232  func (t *Tree) AdjLinks(meta *MetaEntry) []StageLink {
 233  	// Find recIdx from meta pointer.
 234  	for i := range t.RecMeta {
 235  		if &t.RecMeta[i] == meta {
 236  			return t.StageLinksFor(uint32(i))
 237  		}
 238  	}
 239  	return nil
 240  }
 241  
 242  // AddAdj is a no-op compatibility shim. Cross-stage adjacency is now
 243  // key-implicit (same hash, different stage prefix); no explicit links needed.
 244  func (t *Tree) AddAdj(srcMeta, tgtMeta uint32) {}
 245  
 246  // FinalizeAdj is a compatibility alias for FinalizeLinks.
 247  func (t *Tree) FinalizeAdj() { t.FinalizeLinks() }
 248  
 249  // WalkAll searches all three branches for key. Returns the first hit.
 250  func (t *Tree) WalkAll(key lattice.Key) (uint32, int, bool) {
 251  	for b := 0; b < 3; b++ {
 252  		ni, pos, found := t.db.Walk(lattice.Branch(b), key)
 253  		if found {
 254  			return ni, pos, true
 255  		}
 256  	}
 257  	return 0, 0, false
 258  }
 259  
 260  // LookupMeta searches all branches for key and returns its MetaEntry.
 261  func (t *Tree) LookupMeta(key lattice.Key) *MetaEntry {
 262  	for b := 0; b < 3; b++ {
 263  		ri := t.db.LookupRecIdx(lattice.Branch(b), key)
 264  		if ri != NullLatticeRec && int(ri) < len(t.RecMeta) {
 265  			return &t.RecMeta[ri]
 266  		}
 267  	}
 268  	return nil
 269  }
 270  
 271  // LookupRecIdxAny searches all branches for key and returns its recIdx.
 272  func (t *Tree) LookupRecIdxAny(key lattice.Key) uint32 {
 273  	for b := 0; b < 3; b++ {
 274  		ri := t.db.LookupRecIdx(lattice.Branch(b), key)
 275  		if ri != NullLatticeRec {
 276  			return ri
 277  		}
 278  	}
 279  	return NullLatticeRec
 280  }
 281  
 282  // recIdxOfMeta finds the index of a MetaEntry pointer in RecMeta.
 283  func (t *Tree) recIdxOfMeta(meta *MetaEntry) uint32 {
 284  	for i := range t.RecMeta {
 285  		if &t.RecMeta[i] == meta {
 286  			return uint32(i)
 287  		}
 288  	}
 289  	return NullLatticeRec
 290  }
 291  
 292  // GetContentM is a compatibility shim for code that passes a *MetaEntry.
 293  func (t *Tree) GetContentM(meta *MetaEntry) []byte {
 294  	ri := t.recIdxOfMeta(meta)
 295  	if ri == NullLatticeRec {
 296  		return nil
 297  	}
 298  	return t.GetContent(ri)
 299  }
 300  
 301  // GetTokenSeqM is a compatibility shim for code that passes a *MetaEntry.
 302  func (t *Tree) GetTokenSeqM(meta *MetaEntry) []uint32 {
 303  	ri := t.recIdxOfMeta(meta)
 304  	if ri == NullLatticeRec {
 305  		return nil
 306  	}
 307  	return t.GetTokenSeq(ri)
 308  }
 309  
 310  // NodeBytes returns the approximate memory used by nodes.
 311  func (t *Tree) NodeBytes() int { return t.db.NodeCount() * 64 }
 312