package iskra import "git.smesh.lol/iskradb/lattice" // Reference types - what a reference from src to target means. const ( RefNone uint8 = 0 // not a reference (regular bigram) RefContains uint8 = 1 // src contains target (directory/package/scope) RefRequires uint8 = 2 // src requires target (type constraint/dependency) RefProduces uint8 = 3 // src produces target (return type/output) RefOperatesOn uint8 = 4 // src operates on target (parameter/argument) RefIsA uint8 = 5 // src is a target (subtype/inheritance/hypernym) RefBinds uint8 = 6 // src modifies relationship between neighbors RefContext uint8 = 7 // src is disambiguated by target (path segment) ) // RefCoordBits encodes RefKind into the reserved coord bits [15:13]. // This ensures reference records hash to different keys than sequential bigrams // for the same word pair, landing them at different (deeper) lattice positions. const RefCoordShift = 13 func RefCoord(base uint64, kind uint8) uint64 { return base | (uint64(kind&0x7) << RefCoordShift) } func CoordRefKind(coord uint64) uint8 { return uint8((coord >> RefCoordShift) & 0x7) } // IngestRef records a typed reference from src to target. // Uses the same storage as bigrams (Bcooccur branch, BigramIdx) but with // RefKind set on the MetaEntry. The coord gets RefKind bits set so the key // hashes differently from sequential bigrams of the same pair. func IngestRef(t *Tree, domain uint8, coord uint64, src, target string, kind uint8) uint32 { refCoord := RefCoord(coord, kind) key := BigramWalkKey(domain, refCoord, src, target) ri := t.LookupRecIdx(lattice.Bcooccur, key) if ri != lattice.NullRec { if t.metaHas(ri) { t.metaInc(ri) } return ri } form := src | "|" | target var rec lattice.Record t.setFormOnRec(&rec, form) rec.Branch = uint8(lattice.Bcooccur) recIdx := t.db.InsertRec(lattice.Bcooccur, key, rec) t.metaSet(recIdx, MetaEntry{Count: 1, StageTag: domain, RefKind: kind}) if !t.BulkMode { if t.BigramIdx == nil { t.BigramIdx = map[string][]uint32{} } t.BigramIdx[src] = append(t.BigramIdx[src], recIdx) } return recIdx } // RefEntry is a single resolved reference from a source symbol. type RefEntry struct { RecIdx uint32 Target string Kind uint8 Weight uint32 } // GetRefs returns all typed references originating from src at the given domain. // Filters BigramIdx entries by RefKind != 0. func GetRefs(t *Tree, domain uint8, src string) []RefEntry { idxEntries := t.BigramIdx[src] if len(idxEntries) == 0 { return nil } var refs []RefEntry prefix := src | "|" for _, ri := range idxEntries { if int32(ri) >= len(t.RecMeta) { continue } meta := &t.RecMeta[ri] if meta.RefKind == RefNone { continue } if meta.StageTag != domain { continue } rec := t.db.GetRecord(ri) if rec == nil { continue } form := FormFromRecord(rec, t.StringPool) if len(form) <= len(prefix) { continue } target := form[len(prefix):] refs = append(refs, RefEntry{ RecIdx: ri, Target: target, Kind: meta.RefKind, Weight: meta.Count, }) } return refs } // GetRefsOfKind returns references of a specific type from src. func GetRefsOfKind(t *Tree, domain uint8, src string, kind uint8) []RefEntry { all := GetRefs(t, domain, src) var filtered []RefEntry for _, r := range all { if r.Kind == kind { filtered = append(filtered, r) } } return filtered } // GetChildren returns all symbols contained by src (RefContains). func GetChildren(t *Tree, domain uint8, src string) []string { refs := GetRefsOfKind(t, domain, src, RefContains) out := []string{:0:len(refs)} for _, r := range refs { out = append(out, r.Target) } return out } // GetContext returns the disambiguation chain for src (RefContext). // Ordered by descending weight - most relevant context first. func GetContext(t *Tree, domain uint8, src string) []string { refs := GetRefsOfKind(t, domain, src, RefContext) sortRefsByWeight(refs) out := []string{:0:len(refs)} for _, r := range refs { out = append(out, r.Target) } return out } // IngestDefn stores a definition as an ordered sequence of typed references. // Each entry in the definition becomes a reference from src to the entry's target. // The order is encoded by coord: base coord + position offset in reserved bits [11:8]. func IngestDefn(t *Tree, domain uint8, coord uint64, src string, defn []DefnEntry) { for i, d := range defn { posCoord := coord | (uint64(i&0xF) << 8) IngestRef(t, domain, posCoord, src, d.Target, d.Kind) } } // DefnEntry is one step in a definition sequence. type DefnEntry struct { Target string Kind uint8 } // ResolveDefn collects the full definition of src by gathering all references // from it and ordering by weight (strongest associations first). // This is the "frozen walk" - the reified compositional structure. func ResolveDefn(t *Tree, domain uint8, src string) []RefEntry { refs := GetRefs(t, domain, src) sortRefsByWeight(refs) return refs } // FindByContext disambiguates src by walking a context chain. // Returns the recIdx of the most specific match, or NullLatticeRec if none. // Context is tried from most specific (last element) to least specific (first). // // Example: FindByContext(t, 0x01, "run", []string{"computing", "execution"}) // looks for "run" in context "execution", falling back to "run" in context "computing". func FindByContext(t *Tree, domain uint8, src string, context []string) uint32 { for i := len(context) - 1; i >= 0; i-- { coord := RefCoord(0, RefContext) key := BigramWalkKey(domain, coord, src, context[i]) ri := t.LookupRecIdx(lattice.Bcooccur, key) if ri != lattice.NullRec { return ri } } return NullLatticeRec } // WalkOutward follows references from src, one level at a time, up to maxDepth. // Returns the full tree of reachable symbols with their relationship types. // This is the expensive operation - cost proportional to ambiguity depth. func WalkOutward(t *Tree, domain uint8, src string, maxDepth int32) []RefLayer { layers := []RefLayer{:0:maxDepth} current := []string{src} seen := map[string]bool{src: true} for depth := 0; depth < maxDepth; depth++ { var layer RefLayer for _, word := range current { refs := GetRefs(t, domain, word) for _, r := range refs { if seen[r.Target] { continue } seen[r.Target] = true layer.Entries = append(layer.Entries, r) } } if len(layer.Entries) == 0 { break } layers = append(layers, layer) current = current[:0] for _, e := range layer.Entries { current = append(current, e.Target) } } return layers } // RefLayer is one depth level in an outward walk. type RefLayer struct { Entries []RefEntry } func sortRefsByWeight(refs []RefEntry) { for i := 1; i < len(refs); i++ { j := i for j > 0 && refs[j].Weight > refs[j-1].Weight { refs[j], refs[j-1] = refs[j-1], refs[j] j-- } } }