ref.mx raw

   1  package iskra
   2  
   3  import "git.smesh.lol/iskradb/lattice"
   4  
   5  // Reference types - what a reference from src to target means.
   6  const (
   7  	RefNone       uint8 = 0 // not a reference (regular bigram)
   8  	RefContains   uint8 = 1 // src contains target (directory/package/scope)
   9  	RefRequires   uint8 = 2 // src requires target (type constraint/dependency)
  10  	RefProduces   uint8 = 3 // src produces target (return type/output)
  11  	RefOperatesOn uint8 = 4 // src operates on target (parameter/argument)
  12  	RefIsA        uint8 = 5 // src is a target (subtype/inheritance/hypernym)
  13  	RefBinds      uint8 = 6 // src modifies relationship between neighbors
  14  	RefContext    uint8 = 7 // src is disambiguated by target (path segment)
  15  )
  16  
  17  // RefCoordBits encodes RefKind into the reserved coord bits [15:13].
  18  // This ensures reference records hash to different keys than sequential bigrams
  19  // for the same word pair, landing them at different (deeper) lattice positions.
  20  const RefCoordShift = 13
  21  
  22  func RefCoord(base uint64, kind uint8) uint64 {
  23  	return base | (uint64(kind&0x7) << RefCoordShift)
  24  }
  25  
  26  func CoordRefKind(coord uint64) uint8 {
  27  	return uint8((coord >> RefCoordShift) & 0x7)
  28  }
  29  
  30  // IngestRef records a typed reference from src to target.
  31  // Uses the same storage as bigrams (Bcooccur branch, BigramIdx) but with
  32  // RefKind set on the MetaEntry. The coord gets RefKind bits set so the key
  33  // hashes differently from sequential bigrams of the same pair.
  34  func IngestRef(t *Tree, domain uint8, coord uint64, src, target string, kind uint8) uint32 {
  35  	refCoord := RefCoord(coord, kind)
  36  	key := BigramWalkKey(domain, refCoord, src, target)
  37  	ri := t.LookupRecIdx(lattice.Bcooccur, key)
  38  	if ri != lattice.NullRec {
  39  		if t.metaHas(ri) {
  40  			t.metaInc(ri)
  41  		}
  42  		return ri
  43  	}
  44  	form := src | "|" | target
  45  	var rec lattice.Record
  46  	t.setFormOnRec(&rec, form)
  47  	rec.Branch = uint8(lattice.Bcooccur)
  48  	recIdx := t.db.InsertRec(lattice.Bcooccur, key, rec)
  49  	t.metaSet(recIdx, MetaEntry{Count: 1, StageTag: domain, RefKind: kind})
  50  	if !t.BulkMode {
  51  		if t.BigramIdx == nil {
  52  			t.BigramIdx = map[string][]uint32{}
  53  		}
  54  		t.BigramIdx[src] = append(t.BigramIdx[src], recIdx)
  55  	}
  56  	return recIdx
  57  }
  58  
  59  // RefEntry is a single resolved reference from a source symbol.
  60  type RefEntry struct {
  61  	RecIdx uint32
  62  	Target string
  63  	Kind   uint8
  64  	Weight uint32
  65  }
  66  
  67  // GetRefs returns all typed references originating from src at the given domain.
  68  // Filters BigramIdx entries by RefKind != 0.
  69  func GetRefs(t *Tree, domain uint8, src string) []RefEntry {
  70  	idxEntries := t.BigramIdx[src]
  71  	if len(idxEntries) == 0 {
  72  		return nil
  73  	}
  74  	var refs []RefEntry
  75  	prefix := src | "|"
  76  	for _, ri := range idxEntries {
  77  		if int32(ri) >= len(t.RecMeta) {
  78  			continue
  79  		}
  80  		meta := &t.RecMeta[ri]
  81  		if meta.RefKind == RefNone {
  82  			continue
  83  		}
  84  		if meta.StageTag != domain {
  85  			continue
  86  		}
  87  		rec := t.db.GetRecord(ri)
  88  		if rec == nil {
  89  			continue
  90  		}
  91  		form := FormFromRecord(rec, t.StringPool)
  92  		if len(form) <= len(prefix) {
  93  			continue
  94  		}
  95  		target := form[len(prefix):]
  96  		refs = append(refs, RefEntry{
  97  			RecIdx: ri,
  98  			Target: target,
  99  			Kind:   meta.RefKind,
 100  			Weight: meta.Count,
 101  		})
 102  	}
 103  	return refs
 104  }
 105  
 106  // GetRefsOfKind returns references of a specific type from src.
 107  func GetRefsOfKind(t *Tree, domain uint8, src string, kind uint8) []RefEntry {
 108  	all := GetRefs(t, domain, src)
 109  	var filtered []RefEntry
 110  	for _, r := range all {
 111  		if r.Kind == kind {
 112  			filtered = append(filtered, r)
 113  		}
 114  	}
 115  	return filtered
 116  }
 117  
 118  // GetChildren returns all symbols contained by src (RefContains).
 119  func GetChildren(t *Tree, domain uint8, src string) []string {
 120  	refs := GetRefsOfKind(t, domain, src, RefContains)
 121  	out := []string{:0:len(refs)}
 122  	for _, r := range refs {
 123  		out = append(out, r.Target)
 124  	}
 125  	return out
 126  }
 127  
 128  // GetContext returns the disambiguation chain for src (RefContext).
 129  // Ordered by descending weight - most relevant context first.
 130  func GetContext(t *Tree, domain uint8, src string) []string {
 131  	refs := GetRefsOfKind(t, domain, src, RefContext)
 132  	sortRefsByWeight(refs)
 133  	out := []string{:0:len(refs)}
 134  	for _, r := range refs {
 135  		out = append(out, r.Target)
 136  	}
 137  	return out
 138  }
 139  
 140  // IngestDefn stores a definition as an ordered sequence of typed references.
 141  // Each entry in the definition becomes a reference from src to the entry's target.
 142  // The order is encoded by coord: base coord + position offset in reserved bits [11:8].
 143  func IngestDefn(t *Tree, domain uint8, coord uint64, src string, defn []DefnEntry) {
 144  	for i, d := range defn {
 145  		posCoord := coord | (uint64(i&0xF) << 8)
 146  		IngestRef(t, domain, posCoord, src, d.Target, d.Kind)
 147  	}
 148  }
 149  
 150  // DefnEntry is one step in a definition sequence.
 151  type DefnEntry struct {
 152  	Target string
 153  	Kind   uint8
 154  }
 155  
 156  // ResolveDefn collects the full definition of src by gathering all references
 157  // from it and ordering by weight (strongest associations first).
 158  // This is the "frozen walk" - the reified compositional structure.
 159  func ResolveDefn(t *Tree, domain uint8, src string) []RefEntry {
 160  	refs := GetRefs(t, domain, src)
 161  	sortRefsByWeight(refs)
 162  	return refs
 163  }
 164  
 165  // FindByContext disambiguates src by walking a context chain.
 166  // Returns the recIdx of the most specific match, or NullLatticeRec if none.
 167  // Context is tried from most specific (last element) to least specific (first).
 168  //
 169  // Example: FindByContext(t, 0x01, "run", []string{"computing", "execution"})
 170  // looks for "run" in context "execution", falling back to "run" in context "computing".
 171  func FindByContext(t *Tree, domain uint8, src string, context []string) uint32 {
 172  	for i := len(context) - 1; i >= 0; i-- {
 173  		coord := RefCoord(0, RefContext)
 174  		key := BigramWalkKey(domain, coord, src, context[i])
 175  		ri := t.LookupRecIdx(lattice.Bcooccur, key)
 176  		if ri != lattice.NullRec {
 177  			return ri
 178  		}
 179  	}
 180  	return NullLatticeRec
 181  }
 182  
 183  // WalkOutward follows references from src, one level at a time, up to maxDepth.
 184  // Returns the full tree of reachable symbols with their relationship types.
 185  // This is the expensive operation - cost proportional to ambiguity depth.
 186  func WalkOutward(t *Tree, domain uint8, src string, maxDepth int32) []RefLayer {
 187  	layers := []RefLayer{:0:maxDepth}
 188  	current := []string{src}
 189  	seen := map[string]bool{src: true}
 190  
 191  	for depth := 0; depth < maxDepth; depth++ {
 192  		var layer RefLayer
 193  		for _, word := range current {
 194  			refs := GetRefs(t, domain, word)
 195  			for _, r := range refs {
 196  				if seen[r.Target] {
 197  					continue
 198  				}
 199  				seen[r.Target] = true
 200  				layer.Entries = append(layer.Entries, r)
 201  			}
 202  		}
 203  		if len(layer.Entries) == 0 {
 204  			break
 205  		}
 206  		layers = append(layers, layer)
 207  		current = current[:0]
 208  		for _, e := range layer.Entries {
 209  			current = append(current, e.Target)
 210  		}
 211  	}
 212  	return layers
 213  }
 214  
 215  // RefLayer is one depth level in an outward walk.
 216  type RefLayer struct {
 217  	Entries []RefEntry
 218  }
 219  
 220  func sortRefsByWeight(refs []RefEntry) {
 221  	for i := 1; i < len(refs); i++ {
 222  		j := i
 223  		for j > 0 && refs[j].Weight > refs[j-1].Weight {
 224  			refs[j], refs[j-1] = refs[j-1], refs[j]
 225  			j--
 226  		}
 227  	}
 228  }
 229