package iskra import "git.smesh.lol/iskradb/lattice" const ( L3Size = 4 << 20 MaxNodesInL3 = L3Size / 64 ) type Tree struct { db *lattice.Tree // Per-record metadata indexed by record index. // Populated atomically with db.Insert in Tree.Insert. RecMeta []MetaEntry // Content pools. StringPool []byte // overflow for form names > 23 bytes TokenPool []uint32 // tokenized content Dict *Dict // Performance data. CostMap map[uint32]CostEntry // recIdx → cost } func NewTree(db *lattice.Tree) *Tree { return &Tree{ db: db, RecMeta: []MetaEntry{}, StringPool: []byte{:0:4096}, TokenPool: []uint32{:0:4096}, Dict: NewDict(), } } // Insert adds a segment to the appropriate branch, populating RecMeta[recIdx] // atomically before returning. recIdx may not be strictly monotone (iskradb // reuses free-list slots), so RecMeta is grown to cover recIdx in place. func (t *Tree) Insert(branch lattice.Branch, key lattice.Key, form string, kind NodeKind, stage uint8) uint32 { var rec lattice.Record SetFormOnRecord(&rec, form, &t.StringPool) rec.Branch = uint8(branch) recIdx := t.db.InsertRec(branch, key, rec) for uint32(len(t.RecMeta)) <= recIdx { t.RecMeta = append(t.RecMeta, MetaEntry{}) } t.RecMeta[recIdx] = MetaEntry{Kind: kind, StageTag: stage} return recIdx } // Walk searches for key in branch's B-tree. Returns (nodeIdx, pos, found). func (t *Tree) Walk(branch lattice.Branch, key lattice.Key) (uint32, int, bool) { return t.db.Walk(branch, key) } // Lookup returns the Record and its recIdx for key in branch, or nil/NullRec. func (t *Tree) Lookup(branch lattice.Branch, key lattice.Key) (*lattice.Record, uint32) { rec := t.db.Lookup(branch, key) if rec == nil { return nil, lattice.NullRec } recIdx := t.db.LookupRecIdx(branch, key) return rec, recIdx } // LookupRecIdx returns just the record index for key in branch. func (t *Tree) LookupRecIdx(branch lattice.Branch, key lattice.Key) uint32 { return t.db.LookupRecIdx(branch, key) } // FinalizeLinks wires Record.Link[2] cross-branch connections after all // inserts are complete. Branch is derived from Kind (not StageTag). func (t *Tree) FinalizeLinks() { for recIdx := range t.RecMeta { meta := &t.RecMeta[recIdx] b := KindToBranch(meta.Kind) key, ok := t.db.RecKey[uint32(recIdx)] if !ok { continue } stage := KeyStage(key) hash := KeyHash(key) others := otherBranches(b) rec := t.db.GetRecord(uint32(recIdx)) for i, other := range others { okey := MakeCodeKey(stage, hash) ori := t.db.LookupRecIdx(other, okey) if ori != lattice.NullRec { rec.Link[i] = ori } } } } // GetContent returns the decoded token content for the record at recIdx. func (t *Tree) GetContent(recIdx uint32) []byte { if int(recIdx) >= len(t.RecMeta) { return nil } meta := &t.RecMeta[recIdx] off := meta.ContentOffset() length := meta.ContentLen() if length == 0 { return nil } end := off + length if int(end) > len(t.TokenPool) { return nil } return t.Dict.Decode(t.TokenPool[off:end]) } // SetContent tokenizes data and stores it in the TokenPool, updating // RecMeta[recIdx].ContentOffset/ContentLen. func (t *Tree) SetContent(recIdx uint32, data []byte) { if len(data) == 0 || int(recIdx) >= len(t.RecMeta) { return } tokens := Tokenize(data) off := uint32(len(t.TokenPool)) for _, tok := range tokens { idx := t.Dict.Add(tok.Text, tok.Class) t.TokenPool = append(t.TokenPool, idx) } count := uint32(len(t.TokenPool)) - off t.RecMeta[recIdx].SetContentRef(off, count) } // GetTokenSeq returns the raw token index slice for recIdx. func (t *Tree) GetTokenSeq(recIdx uint32) []uint32 { if int(recIdx) >= len(t.RecMeta) { return nil } meta := &t.RecMeta[recIdx] off := meta.ContentOffset() length := meta.ContentLen() if length == 0 { return nil } end := off + length if int(end) > len(t.TokenPool) { return nil } return t.TokenPool[off:end] } func (t *Tree) NodeCount() int { return t.db.NodeCount() } func (t *Tree) EntryCount() int { return t.db.RecordCount() } func (t *Tree) NeedsRepack() bool { return t.db.NodeCount()*64 > L3Size } // MetaAt returns a pointer to the MetaEntry for recIdx. func (t *Tree) MetaAt(recIdx uint32) *MetaEntry { if int(recIdx) >= len(t.RecMeta) { return nil } return &t.RecMeta[recIdx] } // FormAt returns the surface form for the record at recIdx. func (t *Tree) FormAt(recIdx uint32) string { rec := t.db.GetRecord(recIdx) if rec == nil { return "" } return FormFromRecord(rec, t.StringPool) } // StageLink is a cross-stage reference for the same named entity. type StageLink struct { RecIdx uint32 Stage uint8 } // StageLinksFor returns cross-stage RecIdx references for the same entity // as recIdx. Cross-stage adjacency is key-implicit: same hash, different // stage prefix, same branch. func (t *Tree) StageLinksFor(recIdx uint32) []StageLink { key, ok := t.db.RecKey[recIdx] if !ok { return nil } hash := KeyHash(key) srcStage := KeyStage(key) branch := KindToBranch(t.RecMeta[recIdx].Kind) var links []StageLink for _, stage := range []uint8{StageSRC, StageAST, StageIR, StageASM, StageBIN} { if stage == srcStage { continue } ri := t.db.LookupRecIdx(branch, MakeCodeKey(stage, hash)) if ri != NullLatticeRec { links = append(links, StageLink{RecIdx: ri, Stage: stage}) } } return links } // FindByName returns all recIdxs whose surface form equals name. func (t *Tree) FindByName(name string) []uint32 { var out []uint32 for i := range t.RecMeta { if t.FormAt(uint32(i)) == name { out = append(out, uint32(i)) } } return out } // KeyForRecord returns the key stored in the iskradb RecKey map for recIdx. func (t *Tree) KeyForRecord(recIdx uint32) (lattice.Key, bool) { key, ok := t.db.RecKey[recIdx] return key, ok } // NewInMemoryTree creates a tree with no disk backing. func NewInMemoryTree(cap int) *Tree { return NewTree(lattice.NewTree(cap)) } // LookupLexByMeta is a compatibility shim: recIdx == metaIdx == lexIdx now. func (t *Tree) LookupLexByMeta(recIdx uint32) (uint32, bool) { if int(recIdx) >= len(t.RecMeta) { return 0, false } return recIdx, true } // AdjLinks is a compatibility shim for code that used to call t.AdjLinks(meta). // Returns cross-stage links for the record at recIdx. func (t *Tree) AdjLinks(meta *MetaEntry) []StageLink { // Find recIdx from meta pointer. for i := range t.RecMeta { if &t.RecMeta[i] == meta { return t.StageLinksFor(uint32(i)) } } return nil } // AddAdj is a no-op compatibility shim. Cross-stage adjacency is now // key-implicit (same hash, different stage prefix); no explicit links needed. func (t *Tree) AddAdj(srcMeta, tgtMeta uint32) {} // FinalizeAdj is a compatibility alias for FinalizeLinks. func (t *Tree) FinalizeAdj() { t.FinalizeLinks() } // WalkAll searches all three branches for key. Returns the first hit. func (t *Tree) WalkAll(key lattice.Key) (uint32, int, bool) { for b := 0; b < 3; b++ { ni, pos, found := t.db.Walk(lattice.Branch(b), key) if found { return ni, pos, true } } return 0, 0, false } // LookupMeta searches all branches for key and returns its MetaEntry. func (t *Tree) LookupMeta(key lattice.Key) *MetaEntry { for b := 0; b < 3; b++ { ri := t.db.LookupRecIdx(lattice.Branch(b), key) if ri != NullLatticeRec && int(ri) < len(t.RecMeta) { return &t.RecMeta[ri] } } return nil } // LookupRecIdxAny searches all branches for key and returns its recIdx. func (t *Tree) LookupRecIdxAny(key lattice.Key) uint32 { for b := 0; b < 3; b++ { ri := t.db.LookupRecIdx(lattice.Branch(b), key) if ri != NullLatticeRec { return ri } } return NullLatticeRec } // recIdxOfMeta finds the index of a MetaEntry pointer in RecMeta. func (t *Tree) recIdxOfMeta(meta *MetaEntry) uint32 { for i := range t.RecMeta { if &t.RecMeta[i] == meta { return uint32(i) } } return NullLatticeRec } // GetContentM is a compatibility shim for code that passes a *MetaEntry. func (t *Tree) GetContentM(meta *MetaEntry) []byte { ri := t.recIdxOfMeta(meta) if ri == NullLatticeRec { return nil } return t.GetContent(ri) } // GetTokenSeqM is a compatibility shim for code that passes a *MetaEntry. func (t *Tree) GetTokenSeqM(meta *MetaEntry) []uint32 { ri := t.recIdxOfMeta(meta) if ri == NullLatticeRec { return nil } return t.GetTokenSeq(ri) } // NodeBytes returns the approximate memory used by nodes. func (t *Tree) NodeBytes() int { return t.db.NodeCount() * 64 }