tree.mx raw
1 package iskra
2
3 import "git.smesh.lol/iskradb/lattice"
4
5 const (
6 L3Size = 4 << 20
7 MaxNodesInL3 = L3Size / 64
8 )
9
10 type Tree struct {
11 db *lattice.Tree
12
13 // Per-record metadata indexed by record index.
14 // Populated atomically with db.Insert in Tree.Insert.
15 RecMeta []MetaEntry
16
17 // Content pools.
18 StringPool []byte // overflow for form names > 23 bytes
19 TokenPool []uint32 // tokenized content
20 Dict *Dict
21
22 // Performance data.
23 CostMap map[uint32]CostEntry // recIdx → cost
24 }
25
26 func NewTree(db *lattice.Tree) *Tree {
27 return &Tree{
28 db: db,
29 RecMeta: []MetaEntry{},
30 StringPool: []byte{:0:4096},
31 TokenPool: []uint32{:0:4096},
32 Dict: NewDict(),
33 }
34 }
35
36 // Insert adds a segment to the appropriate branch, populating RecMeta[recIdx]
37 // atomically before returning. recIdx may not be strictly monotone (iskradb
38 // reuses free-list slots), so RecMeta is grown to cover recIdx in place.
39 func (t *Tree) Insert(branch lattice.Branch, key lattice.Key, form string, kind NodeKind, stage uint8) uint32 {
40 var rec lattice.Record
41 SetFormOnRecord(&rec, form, &t.StringPool)
42 rec.Branch = uint8(branch)
43 recIdx := t.db.InsertRec(branch, key, rec)
44 for uint32(len(t.RecMeta)) <= recIdx {
45 t.RecMeta = append(t.RecMeta, MetaEntry{})
46 }
47 t.RecMeta[recIdx] = MetaEntry{Kind: kind, StageTag: stage}
48 return recIdx
49 }
50
51 // Walk searches for key in branch's B-tree. Returns (nodeIdx, pos, found).
52 func (t *Tree) Walk(branch lattice.Branch, key lattice.Key) (uint32, int, bool) {
53 return t.db.Walk(branch, key)
54 }
55
56 // Lookup returns the Record and its recIdx for key in branch, or nil/NullRec.
57 func (t *Tree) Lookup(branch lattice.Branch, key lattice.Key) (*lattice.Record, uint32) {
58 rec := t.db.Lookup(branch, key)
59 if rec == nil {
60 return nil, lattice.NullRec
61 }
62 recIdx := t.db.LookupRecIdx(branch, key)
63 return rec, recIdx
64 }
65
66 // LookupRecIdx returns just the record index for key in branch.
67 func (t *Tree) LookupRecIdx(branch lattice.Branch, key lattice.Key) uint32 {
68 return t.db.LookupRecIdx(branch, key)
69 }
70
71 // FinalizeLinks wires Record.Link[2] cross-branch connections after all
72 // inserts are complete. Branch is derived from Kind (not StageTag).
73 func (t *Tree) FinalizeLinks() {
74 for recIdx := range t.RecMeta {
75 meta := &t.RecMeta[recIdx]
76 b := KindToBranch(meta.Kind)
77 key, ok := t.db.RecKey[uint32(recIdx)]
78 if !ok {
79 continue
80 }
81 stage := KeyStage(key)
82 hash := KeyHash(key)
83 others := otherBranches(b)
84 rec := t.db.GetRecord(uint32(recIdx))
85 for i, other := range others {
86 okey := MakeCodeKey(stage, hash)
87 ori := t.db.LookupRecIdx(other, okey)
88 if ori != lattice.NullRec {
89 rec.Link[i] = ori
90 }
91 }
92 }
93 }
94
95 // GetContent returns the decoded token content for the record at recIdx.
96 func (t *Tree) GetContent(recIdx uint32) []byte {
97 if int(recIdx) >= len(t.RecMeta) {
98 return nil
99 }
100 meta := &t.RecMeta[recIdx]
101 off := meta.ContentOffset()
102 length := meta.ContentLen()
103 if length == 0 {
104 return nil
105 }
106 end := off + length
107 if int(end) > len(t.TokenPool) {
108 return nil
109 }
110 return t.Dict.Decode(t.TokenPool[off:end])
111 }
112
113 // SetContent tokenizes data and stores it in the TokenPool, updating
114 // RecMeta[recIdx].ContentOffset/ContentLen.
115 func (t *Tree) SetContent(recIdx uint32, data []byte) {
116 if len(data) == 0 || int(recIdx) >= len(t.RecMeta) {
117 return
118 }
119 tokens := Tokenize(data)
120 off := uint32(len(t.TokenPool))
121 for _, tok := range tokens {
122 idx := t.Dict.Add(tok.Text, tok.Class)
123 t.TokenPool = append(t.TokenPool, idx)
124 }
125 count := uint32(len(t.TokenPool)) - off
126 t.RecMeta[recIdx].SetContentRef(off, count)
127 }
128
129 // GetTokenSeq returns the raw token index slice for recIdx.
130 func (t *Tree) GetTokenSeq(recIdx uint32) []uint32 {
131 if int(recIdx) >= len(t.RecMeta) {
132 return nil
133 }
134 meta := &t.RecMeta[recIdx]
135 off := meta.ContentOffset()
136 length := meta.ContentLen()
137 if length == 0 {
138 return nil
139 }
140 end := off + length
141 if int(end) > len(t.TokenPool) {
142 return nil
143 }
144 return t.TokenPool[off:end]
145 }
146
147 func (t *Tree) NodeCount() int { return t.db.NodeCount() }
148 func (t *Tree) EntryCount() int { return t.db.RecordCount() }
149 func (t *Tree) NeedsRepack() bool {
150 return t.db.NodeCount()*64 > L3Size
151 }
152
153 // MetaAt returns a pointer to the MetaEntry for recIdx.
154 func (t *Tree) MetaAt(recIdx uint32) *MetaEntry {
155 if int(recIdx) >= len(t.RecMeta) {
156 return nil
157 }
158 return &t.RecMeta[recIdx]
159 }
160
161 // FormAt returns the surface form for the record at recIdx.
162 func (t *Tree) FormAt(recIdx uint32) string {
163 rec := t.db.GetRecord(recIdx)
164 if rec == nil {
165 return ""
166 }
167 return FormFromRecord(rec, t.StringPool)
168 }
169
170 // StageLink is a cross-stage reference for the same named entity.
171 type StageLink struct {
172 RecIdx uint32
173 Stage uint8
174 }
175
176 // StageLinksFor returns cross-stage RecIdx references for the same entity
177 // as recIdx. Cross-stage adjacency is key-implicit: same hash, different
178 // stage prefix, same branch.
179 func (t *Tree) StageLinksFor(recIdx uint32) []StageLink {
180 key, ok := t.db.RecKey[recIdx]
181 if !ok {
182 return nil
183 }
184 hash := KeyHash(key)
185 srcStage := KeyStage(key)
186 branch := KindToBranch(t.RecMeta[recIdx].Kind)
187 var links []StageLink
188 for _, stage := range []uint8{StageSRC, StageAST, StageIR, StageASM, StageBIN} {
189 if stage == srcStage {
190 continue
191 }
192 ri := t.db.LookupRecIdx(branch, MakeCodeKey(stage, hash))
193 if ri != NullLatticeRec {
194 links = append(links, StageLink{RecIdx: ri, Stage: stage})
195 }
196 }
197 return links
198 }
199
200 // FindByName returns all recIdxs whose surface form equals name.
201 func (t *Tree) FindByName(name string) []uint32 {
202 var out []uint32
203 for i := range t.RecMeta {
204 if t.FormAt(uint32(i)) == name {
205 out = append(out, uint32(i))
206 }
207 }
208 return out
209 }
210
211 // KeyForRecord returns the key stored in the iskradb RecKey map for recIdx.
212 func (t *Tree) KeyForRecord(recIdx uint32) (lattice.Key, bool) {
213 key, ok := t.db.RecKey[recIdx]
214 return key, ok
215 }
216
217 // NewInMemoryTree creates a tree with no disk backing.
218 func NewInMemoryTree(cap int) *Tree {
219 return NewTree(lattice.NewTree(cap))
220 }
221
222 // LookupLexByMeta is a compatibility shim: recIdx == metaIdx == lexIdx now.
223 func (t *Tree) LookupLexByMeta(recIdx uint32) (uint32, bool) {
224 if int(recIdx) >= len(t.RecMeta) {
225 return 0, false
226 }
227 return recIdx, true
228 }
229
230 // AdjLinks is a compatibility shim for code that used to call t.AdjLinks(meta).
231 // Returns cross-stage links for the record at recIdx.
232 func (t *Tree) AdjLinks(meta *MetaEntry) []StageLink {
233 // Find recIdx from meta pointer.
234 for i := range t.RecMeta {
235 if &t.RecMeta[i] == meta {
236 return t.StageLinksFor(uint32(i))
237 }
238 }
239 return nil
240 }
241
242 // AddAdj is a no-op compatibility shim. Cross-stage adjacency is now
243 // key-implicit (same hash, different stage prefix); no explicit links needed.
244 func (t *Tree) AddAdj(srcMeta, tgtMeta uint32) {}
245
246 // FinalizeAdj is a compatibility alias for FinalizeLinks.
247 func (t *Tree) FinalizeAdj() { t.FinalizeLinks() }
248
249 // WalkAll searches all three branches for key. Returns the first hit.
250 func (t *Tree) WalkAll(key lattice.Key) (uint32, int, bool) {
251 for b := 0; b < 3; b++ {
252 ni, pos, found := t.db.Walk(lattice.Branch(b), key)
253 if found {
254 return ni, pos, true
255 }
256 }
257 return 0, 0, false
258 }
259
260 // LookupMeta searches all branches for key and returns its MetaEntry.
261 func (t *Tree) LookupMeta(key lattice.Key) *MetaEntry {
262 for b := 0; b < 3; b++ {
263 ri := t.db.LookupRecIdx(lattice.Branch(b), key)
264 if ri != NullLatticeRec && int(ri) < len(t.RecMeta) {
265 return &t.RecMeta[ri]
266 }
267 }
268 return nil
269 }
270
271 // LookupRecIdxAny searches all branches for key and returns its recIdx.
272 func (t *Tree) LookupRecIdxAny(key lattice.Key) uint32 {
273 for b := 0; b < 3; b++ {
274 ri := t.db.LookupRecIdx(lattice.Branch(b), key)
275 if ri != NullLatticeRec {
276 return ri
277 }
278 }
279 return NullLatticeRec
280 }
281
282 // recIdxOfMeta finds the index of a MetaEntry pointer in RecMeta.
283 func (t *Tree) recIdxOfMeta(meta *MetaEntry) uint32 {
284 for i := range t.RecMeta {
285 if &t.RecMeta[i] == meta {
286 return uint32(i)
287 }
288 }
289 return NullLatticeRec
290 }
291
292 // GetContentM is a compatibility shim for code that passes a *MetaEntry.
293 func (t *Tree) GetContentM(meta *MetaEntry) []byte {
294 ri := t.recIdxOfMeta(meta)
295 if ri == NullLatticeRec {
296 return nil
297 }
298 return t.GetContent(ri)
299 }
300
301 // GetTokenSeqM is a compatibility shim for code that passes a *MetaEntry.
302 func (t *Tree) GetTokenSeqM(meta *MetaEntry) []uint32 {
303 ri := t.recIdxOfMeta(meta)
304 if ri == NullLatticeRec {
305 return nil
306 }
307 return t.GetTokenSeq(ri)
308 }
309
310 // NodeBytes returns the approximate memory used by nodes.
311 func (t *Tree) NodeBytes() int { return t.db.NodeCount() * 64 }
312