symbols.mx raw
1 package iskra
2
3 import (
4 "bytes"
5 "slices"
6 )
7
8 type Symbol struct {
9 Name string
10 Depth int32
11 Kind uint8
12 }
13
14 const (
15 SymDecl uint8 = 1
16 SymRef uint8 = 2
17 SymParam uint8 = 3
18 )
19
20 type SymbolTable struct {
21 Symbols []Symbol
22 Counts map[string]int32
23 ParamTypes []string
24 ResultTypes []string
25 RecvType string
26 RefSeqHash uint64
27 }
28
29 // Binding represents a single variable's geometric footprint:
30 // where it was declared, and the depth of each reference to it.
31 type Binding struct {
32 DeclDepth int32
33 RefDepths []int32
34 }
35
36 // BindingGraph captures the full binding structure of a function.
37 // Each declaration creates a binding; each reference resolves to
38 // the nearest enclosing declaration of that name.
39 type BindingGraph struct {
40 Bindings []Binding
41 // Externals are symbols referenced but never declared in this scope
42 // (package-level constants, functions from other packages, etc).
43 Externals []ExternalRef
44 }
45
46 type ExternalRef struct {
47 Name string
48 RefCount int32
49 Depths []int32
50 }
51
52 // BindingSignature is the geometric fingerprint with type tags.
53 // Two functions are isomorphic if their signatures match.
54 type BindingSignature struct {
55 Locals []LocalSig
56 Externs []ExternSig
57 ParamTypes []string
58 ResultTypes []string
59 RecvType string
60 ExternNames []string
61 RefSeqHash uint64
62 }
63
64 type LocalSig struct {
65 DeclDepth int32
66 RefDepths []int32
67 }
68
69 type ExternSig struct {
70 RefCount int32
71 Depths []int32
72 }
73
74 func ExtractSymbols(astDump string) *SymbolTable {
75 st := &SymbolTable{
76 Counts: map[string]int32{},
77 }
78 lines := bytes.Split([]byte(astDump), []byte("\n"))
79 inParams := false
80 inResults := false
81 sectionDepth := int32(-1)
82 paramOrd := map[string]int32{}
83 resultOrd := map[string]int32{}
84 localOrd := map[string]int32{}
85 var refSeq []byte
86
87 for _, line := range lines {
88 if len(line) == 0 {
89 continue
90 }
91 d := countIndent(line)
92 trimmed := bytes.TrimSpace(line)
93
94 if (inParams || inResults) && d <= sectionDepth {
95 inParams = false
96 inResults = false
97 }
98
99 if bytes.HasPrefix(trimmed, []byte("FuncDecl ")) {
100 name := string(trimmed[9:])
101 if idx := bytes.IndexByte(trimmed[9:], ' '); idx >= 0 {
102 name = string(trimmed[9 : 9+idx])
103 }
104 addSym(st, name, d, SymDecl)
105 if ri := bytes.Index(trimmed, []byte("recv=")); ri >= 0 {
106 rt := trimmed[ri+5:]
107 if si := bytes.IndexByte(rt, ' '); si >= 0 {
108 rt = rt[:si]
109 }
110 st.RecvType = string(rt)
111 }
112 } else if bytes.HasPrefix(trimmed, []byte("Params")) {
113 inParams = true
114 inResults = false
115 sectionDepth = d
116 } else if bytes.HasPrefix(trimmed, []byte("Results")) {
117 inResults = true
118 inParams = false
119 sectionDepth = d
120 } else if inParams && d > sectionDepth {
121 typeName := extractTypeFromParamLine(trimmed)
122 if typeName != "" {
123 st.ParamTypes = append(st.ParamTypes, typeName)
124 }
125 names := extractParamNames(trimmed)
126 for _, n := range names {
127 paramOrd[n] = int32(len(paramOrd))
128 }
129 parseParamLine(st, trimmed, d)
130 } else if inResults && d > sectionDepth {
131 typeName := extractTypeFromParamLine(trimmed)
132 if typeName != "" {
133 st.ResultTypes = append(st.ResultTypes, typeName)
134 }
135 if isParamLine(trimmed) {
136 names := extractParamNames(trimmed)
137 for _, n := range names {
138 resultOrd[n] = int32(len(resultOrd))
139 }
140 parseParamLine(st, trimmed, d)
141 }
142 } else if bytes.HasPrefix(trimmed, []byte("Assign ")) {
143 rest := trimmed[7:]
144 spIdx := bytes.IndexByte(rest, ' ')
145 if spIdx > 0 {
146 lhs := rest[:spIdx]
147 parts := bytes.Split(lhs, []byte(","))
148 for _, p := range parts {
149 p = bytes.TrimSpace(p)
150 nm := string(p)
151 if len(nm) > 0 {
152 if _, ok := localOrd[nm]; !ok {
153 if _, ok2 := paramOrd[nm]; !ok2 {
154 if _, ok3 := resultOrd[nm]; !ok3 {
155 localOrd[nm] = int32(len(localOrd))
156 }
157 }
158 }
159 }
160 }
161 }
162 refSeq = appendRefFingerprint(refSeq, trimmed, paramOrd, resultOrd, localOrd)
163 parseAssign(st, trimmed, d)
164 } else if bytes.HasPrefix(trimmed, []byte("Return")) {
165 refSeq = appendRefFingerprint(refSeq, trimmed, paramOrd, resultOrd, localOrd)
166 parseRefs(st, trimmed, d)
167 } else if bytes.HasPrefix(trimmed, []byte("If")) {
168 refSeq = appendRefFingerprint(refSeq, trimmed, paramOrd, resultOrd, localOrd)
169 parseRefs(st, trimmed, d)
170 } else if bytes.HasPrefix(trimmed, []byte("For")) {
171 refSeq = appendRefFingerprint(refSeq, trimmed, paramOrd, resultOrd, localOrd)
172 parseRefs(st, trimmed, d)
173 } else if bytes.HasPrefix(trimmed, []byte("Range ")) {
174 refSeq = appendRefFingerprint(refSeq, trimmed, paramOrd, resultOrd, localOrd)
175 parseRefs(st, trimmed, d)
176 } else if bytes.HasPrefix(trimmed, []byte("Expr ")) {
177 refSeq = appendRefFingerprint(refSeq, trimmed, paramOrd, resultOrd, localOrd)
178 parseRefs(st, trimmed, d)
179 } else if bytes.HasPrefix(trimmed, []byte("Case")) {
180 refSeq = appendRefFingerprint(refSeq, trimmed, paramOrd, resultOrd, localOrd)
181 parseRefs(st, trimmed, d)
182 } else if bytes.HasPrefix(trimmed, []byte("Switch")) {
183 refSeq = appendRefFingerprint(refSeq, trimmed, paramOrd, resultOrd, localOrd)
184 parseRefs(st, trimmed, d)
185 } else if bytes.HasPrefix(trimmed, []byte("Value ")) {
186 parseParamLine(st, trimmed[6:], d)
187 } else if isParamLine(trimmed) {
188 parseParamLine(st, trimmed, d)
189 }
190 }
191 if len(refSeq) > 0 && !allSeparators(refSeq) {
192 st.RefSeqHash = fnvHash64(refSeq)
193 }
194 return st
195 }
196
197 func allSeparators(b []byte) bool {
198 for _, c := range b {
199 if c != '|' {
200 return false
201 }
202 }
203 return true
204 }
205
206 func extractParamNames(line []byte) []string {
207 idx := bytes.IndexByte(line, ' ')
208 if idx <= 0 {
209 return nil
210 }
211 namesPart := line[:idx]
212 parts := bytes.Split(namesPart, []byte(","))
213 var names []string
214 for _, p := range parts {
215 p = bytes.TrimSpace(p)
216 if len(p) > 0 && p[0] >= 'a' && p[0] <= 'z' {
217 names = append(names, string(p))
218 }
219 }
220 return names
221 }
222
223 func appendRefFingerprint(seq, line []byte, paramOrd, resultOrd, localOrd map[string]int32) []byte {
224 start := bytes.IndexByte(line, '[')
225 if start < 0 {
226 seq = append(seq, '|')
227 return seq
228 }
229 end := bytes.IndexByte(line[start:], ']')
230 if end < 0 {
231 seq = append(seq, '|')
232 return seq
233 }
234 refs := line[start+1 : start+end]
235 parts := bytes.Split(refs, []byte(","))
236 for i, p := range parts {
237 p = bytes.TrimSpace(p)
238 if i > 0 {
239 seq = append(seq, ',')
240 }
241 name := string(p)
242 if ord, ok := paramOrd[name]; ok {
243 seq = append(seq, 'P')
244 seq = append(seq, []byte(intToStr(int(ord)))...)
245 } else if ord, ok := resultOrd[name]; ok {
246 seq = append(seq, 'R')
247 seq = append(seq, []byte(intToStr(int(ord)))...)
248 } else if ord, ok := localOrd[name]; ok {
249 seq = append(seq, 'L')
250 seq = append(seq, []byte(intToStr(int(ord)))...)
251 } else {
252 seq = append(seq, p...)
253 }
254 }
255 seq = append(seq, '|')
256 return seq
257 }
258
259 func fnvHash64(data []byte) uint64 {
260 h := uint64(0xcbf29ce484222325)
261 for _, b := range data {
262 h ^= uint64(b)
263 h *= 0x100000001b3
264 }
265 return h
266 }
267
268 func extractTypeFromParamLine(line []byte) string {
269 // Param: "name type" or "name,name type" or result: "name type" or just "type"
270 // e.g. "r rune", "s,prefix []byte", "f float64", "bool", "error"
271 idx := bytes.IndexByte(line, ' ')
272 if idx < 0 {
273 return string(line) // no space = bare type (result)
274 }
275 // Check if what's before the space could be a type name (starts with [, *, map, etc.)
276 before := line[:idx]
277 if before[0] == '[' || before[0] == '*' || bytes.HasPrefix(before, []byte("map")) || bytes.HasPrefix(before, []byte("chan")) || bytes.HasPrefix(before, []byte("func")) {
278 return string(line) // entire thing is a type
279 }
280 return string(line[idx+1:])
281 }
282
283 // ExtractBindings builds a BindingGraph from a SymbolTable.
284 // It resolves each reference to the nearest prior declaration of
285 // the same name at equal or lesser depth (lexical scoping).
286 func ExtractBindings(st *SymbolTable) *BindingGraph {
287 bg := &BindingGraph{}
288
289 // Index: name → binding index (for locals).
290 localIdx := map[string]int32{}
291 // Track external refs by name.
292 extIdx := map[string]int32{}
293
294 // First pass: register all declarations.
295 for _, sym := range st.Symbols {
296 if sym.Kind == SymDecl || sym.Kind == SymParam {
297 idx := int32(len(bg.Bindings))
298 bg.Bindings = append(bg.Bindings, Binding{
299 DeclDepth: sym.Depth,
300 })
301 localIdx[sym.Name] = idx
302 }
303 }
304
305 // Second pass: resolve references.
306 for _, sym := range st.Symbols {
307 if sym.Kind != SymRef {
308 continue
309 }
310 if idx, ok := localIdx[sym.Name]; ok {
311 bg.Bindings[idx].RefDepths = append(bg.Bindings[idx].RefDepths, sym.Depth)
312 } else {
313 if ei, ok := extIdx[sym.Name]; ok {
314 bg.Externals[ei].RefCount++
315 bg.Externals[ei].Depths = append(bg.Externals[ei].Depths, sym.Depth)
316 } else {
317 extIdx[sym.Name] = int32(len(bg.Externals))
318 bg.Externals = append(bg.Externals, ExternalRef{
319 Name: sym.Name,
320 RefCount: 1,
321 Depths: []int32{sym.Depth},
322 })
323 }
324 }
325 }
326
327 return bg
328 }
329
330 // Signature erases names and produces a sorted geometric fingerprint.
331 func (bg *BindingGraph) Signature() *BindingSignature {
332 sig := &BindingSignature{}
333
334 for _, b := range bg.Bindings {
335 depths := []int32{:0:len(b.RefDepths)}
336 depths = append(depths, b.RefDepths...)
337 slices.SortFunc(depths, cmpInt32)
338 sig.Locals = append(sig.Locals, LocalSig{
339 DeclDepth: b.DeclDepth,
340 RefDepths: depths,
341 })
342 }
343
344 for _, e := range bg.Externals {
345 depths := []int32{:0:len(e.Depths)}
346 depths = append(depths, e.Depths...)
347 slices.SortFunc(depths, cmpInt32)
348 sig.Externs = append(sig.Externs, ExternSig{
349 RefCount: e.RefCount,
350 Depths: depths,
351 })
352 }
353
354 slices.SortFunc(sig.Locals, cmpLocalSig)
355 slices.SortFunc(sig.Externs, cmpExternSig)
356 return sig
357 }
358
359 func SignatureWithTypes(st *SymbolTable) *BindingSignature {
360 bg := ExtractBindings(st)
361 sig := bg.Signature()
362 sig.ParamTypes = append(sig.ParamTypes, st.ParamTypes...)
363 sig.ResultTypes = append(sig.ResultTypes, st.ResultTypes...)
364 sig.RecvType = st.RecvType
365 for _, e := range bg.Externals {
366 sig.ExternNames = append(sig.ExternNames, e.Name)
367 }
368 slices.Sort(sig.ExternNames)
369 sig.RefSeqHash = st.RefSeqHash
370 return sig
371 }
372
373 func (a *BindingSignature) IsIsomorphic(b *BindingSignature) bool {
374 if len(a.Locals) != len(b.Locals) {
375 return false
376 }
377 if len(a.Externs) != len(b.Externs) {
378 return false
379 }
380 for i := range a.Locals {
381 if a.Locals[i].DeclDepth != b.Locals[i].DeclDepth {
382 return false
383 }
384 if !int32sEqual(a.Locals[i].RefDepths, b.Locals[i].RefDepths) {
385 return false
386 }
387 }
388 for i := range a.Externs {
389 if a.Externs[i].RefCount != b.Externs[i].RefCount {
390 return false
391 }
392 if !int32sEqual(a.Externs[i].Depths, b.Externs[i].Depths) {
393 return false
394 }
395 }
396 if !stringsEqual(a.ParamTypes, b.ParamTypes) {
397 return false
398 }
399 if !stringsEqual(a.ResultTypes, b.ResultTypes) {
400 return false
401 }
402 if a.RecvType != b.RecvType {
403 return false
404 }
405 if !stringsEqual(a.ExternNames, b.ExternNames) {
406 return false
407 }
408 if a.RefSeqHash != 0 && b.RefSeqHash != 0 && a.RefSeqHash != b.RefSeqHash {
409 return false
410 }
411 return true
412 }
413
414 func stringsEqual(a, b []string) bool {
415 if len(a) != len(b) {
416 return false
417 }
418 for i := range a {
419 if a[i] != b[i] {
420 return false
421 }
422 }
423 return true
424 }
425
426 // GeometricDistance measures how far two signatures diverge.
427 // Returns 0 for perfect isomorphism, higher for greater divergence.
428 // The score counts: missing/extra bindings + depth mismatches + ref count diffs.
429 func GeometricDistance(a, b *BindingSignature) int32 {
430 dist := int32(0)
431
432 // Compare locals.
433 la := len(a.Locals)
434 lb := len(b.Locals)
435 minL := la
436 if lb < minL {
437 minL = lb
438 }
439 for i := 0; i < minL; i++ {
440 if a.Locals[i].DeclDepth != b.Locals[i].DeclDepth {
441 dist++
442 }
443 dist += int32SliceDistance(a.Locals[i].RefDepths, b.Locals[i].RefDepths)
444 }
445 // Penalty for unmatched bindings.
446 if la > lb {
447 dist += int32(la - lb)
448 } else {
449 dist += int32(lb - la)
450 }
451
452 // Compare externals.
453 ea := len(a.Externs)
454 eb := len(b.Externs)
455 minE := ea
456 if eb < minE {
457 minE = eb
458 }
459 for i := 0; i < minE; i++ {
460 d := a.Externs[i].RefCount - b.Externs[i].RefCount
461 if d < 0 {
462 d = -d
463 }
464 dist += d
465 dist += int32SliceDistance(a.Externs[i].Depths, b.Externs[i].Depths)
466 }
467 if ea > eb {
468 dist += int32(ea - eb)
469 } else {
470 dist += int32(eb - ea)
471 }
472
473 return dist
474 }
475
476 func int32SliceDistance(a, b []int32) int32 {
477 dist := int32(0)
478 la := len(a)
479 lb := len(b)
480 minN := la
481 if lb < minN {
482 minN = lb
483 }
484 for i := 0; i < minN; i++ {
485 d := a[i] - b[i]
486 if d < 0 {
487 d = -d
488 }
489 dist += d
490 }
491 if la > lb {
492 dist += int32(la - lb)
493 } else {
494 dist += int32(lb - la)
495 }
496 return dist
497 }
498
499 func int32sEqual(a, b []int32) bool {
500 if len(a) != len(b) {
501 return false
502 }
503 for i := range a {
504 if a[i] != b[i] {
505 return false
506 }
507 }
508 return true
509 }
510
511 func cmpInt32(a, b int32) int {
512 if a < b {
513 return -1
514 }
515 if a > b {
516 return 1
517 }
518 return 0
519 }
520
521 func cmpLocalSig(a, b LocalSig) int {
522 if a.DeclDepth != b.DeclDepth {
523 if a.DeclDepth < b.DeclDepth {
524 return -1
525 }
526 return 1
527 }
528 la := int32(len(a.RefDepths))
529 lb := int32(len(b.RefDepths))
530 if la != lb {
531 if la < lb {
532 return -1
533 }
534 return 1
535 }
536 for i := int32(0); i < la; i++ {
537 if a.RefDepths[i] != b.RefDepths[i] {
538 if a.RefDepths[i] < b.RefDepths[i] {
539 return -1
540 }
541 return 1
542 }
543 }
544 return 0
545 }
546
547 func cmpExternSig(a, b ExternSig) int {
548 if a.RefCount != b.RefCount {
549 if a.RefCount < b.RefCount {
550 return -1
551 }
552 return 1
553 }
554 la := int32(len(a.Depths))
555 lb := int32(len(b.Depths))
556 if la != lb {
557 if la < lb {
558 return -1
559 }
560 return 1
561 }
562 for i := int32(0); i < la; i++ {
563 if a.Depths[i] != b.Depths[i] {
564 if a.Depths[i] < b.Depths[i] {
565 return -1
566 }
567 return 1
568 }
569 }
570 return 0
571 }
572
573 // SignatureHash24 produces a 24-bit hash of a binding signature for fast
574 // rejection during signature lookup. Collisions are expected but rare enough
575 // (1 in 16M) to make the full IsIsomorphic check infrequent.
576 func SignatureHash24(sig *BindingSignature) uint32 {
577 h := uint32(0x811c9dc5) // FNV-1a offset basis
578 mix := func(b byte) {
579 h ^= uint32(b)
580 h *= 0x01000193
581 }
582 mix(byte(len(sig.Locals)))
583 for _, l := range sig.Locals {
584 mix(byte(l.DeclDepth))
585 mix(byte(l.DeclDepth >> 8))
586 mix(byte(len(l.RefDepths)))
587 for _, d := range l.RefDepths {
588 mix(byte(d))
589 mix(byte(d >> 8))
590 }
591 }
592 mix(byte(len(sig.Externs)))
593 for _, e := range sig.Externs {
594 mix(byte(e.RefCount))
595 mix(byte(e.RefCount >> 8))
596 for _, d := range e.Depths {
597 mix(byte(d))
598 mix(byte(d >> 8))
599 }
600 }
601 for _, c := range []byte(sig.RecvType) {
602 mix(c)
603 }
604 for _, name := range sig.ExternNames {
605 for _, c := range []byte(name) {
606 mix(c)
607 }
608 mix(0xFF)
609 }
610 rsb := uint64ToBytes(sig.RefSeqHash)
611 for _, c := range rsb {
612 mix(c)
613 }
614 return h & 0xFFFFFF
615 }
616
617 func uint64ToBytes(v uint64) [8]byte {
618 var b [8]byte
619 b[0] = byte(v)
620 b[1] = byte(v >> 8)
621 b[2] = byte(v >> 16)
622 b[3] = byte(v >> 24)
623 b[4] = byte(v >> 32)
624 b[5] = byte(v >> 40)
625 b[6] = byte(v >> 48)
626 b[7] = byte(v >> 56)
627 return b
628 }
629
630 // --- Flat name-based comparison (kept for backward compat) ---
631
632 type SymbolMatch struct {
633 TotalA int32
634 TotalB int32
635 Matched int32
636 OnlyInA []string
637 OnlyInB []string
638 CountDiffs []string
639 }
640
641 func CompareSymbols(a, b *SymbolTable) *SymbolMatch {
642 m := &SymbolMatch{}
643 namesA := map[string]bool{}
644 namesB := map[string]bool{}
645 for name := range a.Counts {
646 namesA[name] = true
647 m.TotalA++
648 }
649 for name := range b.Counts {
650 namesB[name] = true
651 m.TotalB++
652 }
653 for name := range namesA {
654 if !namesB[name] {
655 m.OnlyInA = append(m.OnlyInA, name)
656 } else {
657 m.Matched++
658 if a.Counts[name] != b.Counts[name] {
659 m.CountDiffs = append(m.CountDiffs, name)
660 }
661 }
662 }
663 for name := range namesB {
664 if !namesA[name] {
665 m.OnlyInB = append(m.OnlyInB, name)
666 }
667 }
668 return m
669 }
670
671 func (m *SymbolMatch) IsEquivalent() bool {
672 return len(m.OnlyInA) == 0 && len(m.OnlyInB) == 0
673 }
674
675 func (m *SymbolMatch) Score() int32 {
676 if m.TotalA == 0 && m.TotalB == 0 {
677 return 100
678 }
679 total := m.TotalA
680 if m.TotalB > total {
681 total = m.TotalB
682 }
683 if total == 0 {
684 return 0
685 }
686 return (m.Matched * 100) / total
687 }
688
689 // --- Helpers ---
690
691 func countIndent(line []byte) int32 {
692 n := int32(0)
693 i := int32(0)
694 for i+1 < int32(len(line)) && line[i] == ' ' && line[i+1] == ' ' {
695 n++
696 i += 2
697 }
698 return n
699 }
700
701 func isParamLine(trimmed []byte) bool {
702 if len(trimmed) == 0 {
703 return false
704 }
705 if trimmed[0] >= 'a' && trimmed[0] <= 'z' {
706 return bytes.IndexByte(trimmed, ' ') > 0
707 }
708 return false
709 }
710
711 func parseParamLine(st *SymbolTable, trimmed []byte, depth int32) {
712 idx := bytes.IndexByte(trimmed, ' ')
713 if idx <= 0 {
714 return
715 }
716 names := trimmed[:idx]
717 parts := bytes.Split(names, []byte(","))
718 for _, p := range parts {
719 p = bytes.TrimSpace(p)
720 if len(p) > 0 {
721 addSym(st, string(p), depth, SymParam)
722 }
723 }
724 }
725
726 func parseAssign(st *SymbolTable, trimmed []byte, depth int32) {
727 rest := trimmed[7:]
728 spIdx := bytes.IndexByte(rest, ' ')
729 if spIdx <= 0 {
730 return
731 }
732 lhs := rest[:spIdx]
733 parts := bytes.Split(lhs, []byte(","))
734 for _, p := range parts {
735 p = bytes.TrimSpace(p)
736 if len(p) > 0 {
737 addSym(st, string(p), depth, SymDecl)
738 }
739 }
740 parseRefs(st, trimmed, depth)
741 }
742
743 func parseRefs(st *SymbolTable, trimmed []byte, depth int32) {
744 start := bytes.IndexByte(trimmed, '[')
745 if start < 0 {
746 return
747 }
748 end := bytes.IndexByte(trimmed[start:], ']')
749 if end < 0 {
750 return
751 }
752 refs := trimmed[start+1 : start+end]
753 parts := bytes.Split(refs, []byte(","))
754 for _, p := range parts {
755 p = bytes.TrimSpace(p)
756 if len(p) > 0 {
757 addSym(st, string(p), depth, SymRef)
758 }
759 }
760 }
761
762 func addSym(st *SymbolTable, name string, depth int32, kind uint8) {
763 st.Symbols = append(st.Symbols, Symbol{Name: name, Depth: depth, Kind: kind})
764 st.Counts[name] = st.Counts[name] + 1
765 }
766