package iskra import "bytes" type IRClass uint8 const ( ClassUnknown IRClass = 0 ClassByteEqual IRClass = 1 ClassBoundaryEq IRClass = 2 ClassBoundaryDiv IRClass = 3 ClassPerfDiv IRClass = 4 ) type ClassifyResult struct { Class IRClass NormMatch bool InstrA int32 InstrB int32 BlocksA int32 BlocksB int32 CallsA int32 CallsB int32 Detail string } func (c ClassifyResult) ClassName() string { switch c.Class { case ClassByteEqual: return "byte-equal" case ClassBoundaryEq: return "boundary-eq" case ClassBoundaryDiv: return "boundary-div" case ClassPerfDiv: return "perf-div" default: return "unknown" } } // NormalizeIR strips debug metadata, renumbers SSA registers canonically, // and removes optimization hint flags that don't change semantics. func NormalizeIR(ir []byte) []byte { lines := bytes.Split(ir, []byte("\n")) var out []byte ssaMap := map[string]string{} ssaCounter := 0 for _, line := range lines { trimmed := bytes.TrimSpace(line) // Strip debug metadata lines (! = ..., #dbg_...) if isDebugLine(trimmed) { continue } // Strip debug refs inline line = stripDebugRefs(line) // Strip tbaa metadata refs line = stripMetaRef(line, []byte("!tbaa !")) line = stripMetaRef(line, []byte("!range !")) line = stripMetaRef(line, []byte("!noalias !")) line = stripMetaRef(line, []byte("!alias.scope !")) line = stripMetaRef(line, []byte("!nonnull !")) line = stripMetaRef(line, []byte("!dereferenceable !")) // Strip optimization hint flags line = stripFlag(line, []byte(" nsw")) line = stripFlag(line, []byte(" nuw")) line = stripFlag(line, []byte(" exact")) line = stripFlag(line, []byte(" nnan")) line = stripFlag(line, []byte(" ninf")) line = stripFlag(line, []byte(" nsz")) line = stripFlag(line, []byte(" arcp")) line = stripFlag(line, []byte(" contract")) line = stripFlag(line, []byte(" reassoc")) line = stripFlag(line, []byte(" afn")) // Renumber SSA registers: %N -> %_N (canonical) line = renumberSSA(line, ssaMap, &ssaCounter) // Normalize alignment: strip "align N" line = stripAlignAnnotation(line) out = append(out, bytes.TrimRight(line, " \t")...) out = append(out, '\n') } return out } func stripMetaRef(line []byte, prefix []byte) []byte { for { idx := bytes.Index(line, prefix) if idx < 0 { return line } end := idx + len(prefix) for end < len(line) && line[end] >= '0' && line[end] <= '9' { end++ } start := idx if start > 0 && line[start-1] == ' ' { start-- } if start > 0 && line[start-1] == ',' { start-- } var rebuilt []byte rebuilt = append(rebuilt, line[:start]...) rebuilt = append(rebuilt, line[end:]...) line = rebuilt } } func stripFlag(line []byte, flag []byte) []byte { for { idx := bytes.Index(line, flag) if idx < 0 { return line } after := idx + len(flag) if after < len(line) && isWordByte(line[after]) { idx++ continue } if idx > 0 && isWordByte(line[idx-1]) { idx++ line = line // can't skip, find next occurrence next := bytes.Index(line[idx:], flag) if next < 0 { return line } idx = idx + next after = idx + len(flag) if after < len(line) && isWordByte(line[after]) { continue } } var rebuilt []byte rebuilt = append(rebuilt, line[:idx]...) rebuilt = append(rebuilt, line[after:]...) line = rebuilt } } func stripAlignAnnotation(line []byte) []byte { for { idx := bytes.Index(line, []byte(", align ")) if idx < 0 { idx = bytes.Index(line, []byte(" align ")) if idx < 0 { return line } } start := idx end := idx if line[end] == ',' { end++ // skip comma } for end < len(line) && line[end] == ' ' { end++ } if end+5 < len(line) && string(line[end:end+5]) == "align" { end += 5 } else { return line } for end < len(line) && line[end] == ' ' { end++ } for end < len(line) && line[end] >= '0' && line[end] <= '9' { end++ } var rebuilt []byte rebuilt = append(rebuilt, line[:start]...) rebuilt = append(rebuilt, line[end:]...) line = rebuilt } } func renumberSSA(line []byte, ssaMap map[string]string, counter *int) []byte { var out []byte i := 0 for i < len(line) { if line[i] == '%' && i+1 < len(line) && isDigitByte(line[i+1]) { start := i i++ for i < len(line) && isDigitByte(line[i]) { i++ } orig := string(line[start:i]) mapped, ok := ssaMap[orig] if !ok { mapped = "%" | intToStr(*counter) ssaMap[orig] = mapped *counter++ } out = append(out, mapped...) } else { out = append(out, line[i]) i++ } } return out } func intToStr(n int) string { if n == 0 { return "0" } var buf [10]byte i := 9 for n > 0 { buf[i] = byte('0' + n%10) i-- n /= 10 } return string(buf[i+1:]) } // IRProfile extracts structural features from LLVM IR for cost comparison. type IRProfile struct { Instructions int32 Blocks int32 Calls int32 Phis int32 Loads int32 Stores int32 Branches int32 } func ProfileIR(ir []byte) IRProfile { p := IRProfile{} lines := bytes.Split(ir, []byte("\n")) inFunc := false for _, line := range lines { trimmed := bytes.TrimSpace(line) if len(trimmed) == 0 { continue } if bytes.HasPrefix(trimmed, []byte("define ")) { inFunc = true continue } if len(trimmed) == 1 && trimmed[0] == '}' { inFunc = false continue } if !inFunc { continue } // Basic block label if len(trimmed) > 0 && trimmed[len(trimmed)-1] == ':' && !bytes.HasPrefix(trimmed, []byte(" ")) { p.Blocks++ continue } if isDebugLine(trimmed) { continue } p.Instructions++ if bytes.Contains(trimmed, []byte(" call ")) || bytes.Contains(trimmed, []byte(" invoke ")) { p.Calls++ } if bytes.HasPrefix(trimmed, []byte("call ")) || bytes.HasPrefix(trimmed, []byte("invoke ")) { p.Calls++ } if bytes.Contains(trimmed, []byte(" = phi ")) { p.Phis++ } if bytes.Contains(trimmed, []byte(" = load ")) { p.Loads++ } if bytes.HasPrefix(trimmed, []byte("store ")) { p.Stores++ } if bytes.HasPrefix(trimmed, []byte("br ")) { p.Branches++ } } return p } // ClassifyIRPair performs Phase A (normalize) and Phase B (structural diff) // classification of two IR fragments. func ClassifyIRPair(resultIR, actualIR []byte) ClassifyResult { cr := ClassifyResult{} // Phase A: normalize and compare normResult := NormalizeIR(resultIR) normActual := NormalizeIR(actualIR) if bytes.Equal(normResult, normActual) { cr.Class = ClassBoundaryEq cr.NormMatch = true cr.Detail = "matched after normalization" return cr } // Phase A+: strip nil-check/safety blocks and compare strippedResult := StripSafetyBlocks(normResult) strippedActual := StripSafetyBlocks(normActual) if bytes.Equal(strippedResult, strippedActual) { cr.Class = ClassBoundaryEq cr.NormMatch = true cr.Detail = "matched after nil-check block stripping" return cr } // Phase B: structural comparison profA := ProfileIR(resultIR) profB := ProfileIR(actualIR) cr.InstrA = profA.Instructions cr.InstrB = profB.Instructions cr.BlocksA = profA.Blocks cr.BlocksB = profB.Blocks cr.CallsA = profA.Calls cr.CallsB = profB.Calls // Phase B determines the nature of the divergence for diagnostics if profA.Calls != profB.Calls { cr.Class = ClassBoundaryDiv cr.Detail = "call count differs (type/template mismatch)" return cr } if profA.Blocks != profB.Blocks { cr.Class = ClassBoundaryDiv cr.Detail = "block count differs (structural mismatch)" return cr } instrDelta := profA.Instructions - profB.Instructions if instrDelta < 0 { instrDelta = -instrDelta } minInstr := profA.Instructions if profB.Instructions < minInstr { minInstr = profB.Instructions } if minInstr > 0 && instrDelta*100/minInstr > 10 { cr.Class = ClassBoundaryDiv cr.Detail = "instruction count diverges >10% (wrong template)" return cr } cr.Class = ClassBoundaryDiv cr.Detail = "structural divergence after normalization" return cr } func StripSafetyBlocks(ir []byte) []byte { lines := bytes.Split(ir, []byte("\n")) var out []byte skip := false for _, line := range lines { trimmed := bytes.TrimSpace(line) // Check if this is a safety-check block label // Labels look like "deref.next: ; preds = %entry" or just "deref.next:" if isBlockLabel(trimmed) && isSafetyBlockLabel(trimmed) { skip = true continue } // A new non-safety block label ends the skip if skip && isBlockLabel(trimmed) { skip = false } // Opening brace of a function also ends skip if skip && len(trimmed) > 0 && trimmed[0] == '}' { skip = false } if skip { continue } // Strip safety block references from branch targets and phi nodes line = stripSafetyRefs(line) out = append(out, line...) out = append(out, '\n') } return out } func isBlockLabel(line []byte) bool { // A block label has a colon before any semicolon, and no leading whitespace assignment if len(line) == 0 { return false } // Block labels don't start with % or whitespace in the define body context if line[0] == '%' || line[0] == ' ' || line[0] == '\t' { return false } // Must contain a colon colon := bytes.IndexByte(line, ':') return colon > 0 } func isSafetyBlockLabel(label []byte) bool { // Strip trailing ":" and any comment like "; preds = ..." colon := bytes.IndexByte(label, ':') if colon < 0 { return false } name := label[:colon] // Known safety-check block name patterns prefixes := [][]byte{ []byte("deref.next"), []byte("deref.throw"), []byte("gep.next"), []byte("gep.throw"), []byte("store.next"), []byte("store.throw"), []byte("lookup.next"), []byte("lookup.throw"), []byte("slice.next"), []byte("slice.throw"), } for _, p := range prefixes { if bytes.HasPrefix(name, p) { return true } } return false } func stripSafetyRefs(line []byte) []byte { // Remove references to safety blocks in phi nodes and branch instructions // E.g. "[ true, %deref.next ]," or "label %gep.throw" safetyPrefixes := [][]byte{ []byte("%deref.next"), []byte("%deref.throw"), []byte("%gep.next"), []byte("%gep.throw"), []byte("%store.next"), []byte("%store.throw"), []byte("%lookup.next"), []byte("%lookup.throw"), []byte("%slice.next"), []byte("%slice.throw"), } for _, sp := range safetyPrefixes { for bytes.Contains(line, sp) { idx := bytes.Index(line, sp) if idx < 0 { break } // Find the enclosing context: // In phi: "[ val, %block ]," - remove the whole bracket pair // In br: "label %block" - remove "label %block" bracketStart := -1 for j := idx - 1; j >= 0; j-- { if line[j] == '[' { bracketStart = j break } if line[j] == ',' || line[j] == ';' { break } } if bracketStart >= 0 { // Phi node entry: remove "[ val, %block ]" including trailing comma bracketEnd := bytes.IndexByte(line[idx:], ']') if bracketEnd >= 0 { end := idx + bracketEnd + 1 // Skip trailing comma and space for end < len(line) && (line[end] == ',' || line[end] == ' ') { end++ } // Also strip leading comma and space before bracket start := bracketStart if start > 0 && line[start-1] == ' ' { start-- } if start > 0 && line[start-1] == ',' { start-- } var rebuilt []byte rebuilt = append(rebuilt, line[:start]...) rebuilt = append(rebuilt, line[end:]...) line = rebuilt continue } } // Branch target: "label %block" - remove labelIdx := bytes.LastIndex(line[:idx], []byte("label ")) if labelIdx >= 0 { end := idx + len(sp) // Skip digits after the prefix (e.g., %deref.next2) for end < len(line) && line[end] >= '0' && line[end] <= '9' { end++ } start := labelIdx if start > 0 && line[start-1] == ' ' { start-- } if start > 0 && line[start-1] == ',' { start-- } var rebuilt []byte rebuilt = append(rebuilt, line[:start]...) rebuilt = append(rebuilt, line[end:]...) line = rebuilt continue } // Can't find context, just skip past this occurrence break } } return line }