package iskra

import "bytes"

type IRClass uint8

const (
	ClassUnknown    IRClass = 0
	ClassByteEqual  IRClass = 1
	ClassBoundaryEq IRClass = 2
	ClassBoundaryDiv IRClass = 3
	ClassPerfDiv    IRClass = 4
)

type ClassifyResult struct {
	Class     IRClass
	NormMatch bool
	InstrA    int32
	InstrB    int32
	BlocksA   int32
	BlocksB   int32
	CallsA    int32
	CallsB    int32
	Detail    string
}

func (c ClassifyResult) ClassName() string {
	switch c.Class {
	case ClassByteEqual:
		return "byte-equal"
	case ClassBoundaryEq:
		return "boundary-eq"
	case ClassBoundaryDiv:
		return "boundary-div"
	case ClassPerfDiv:
		return "perf-div"
	default:
		return "unknown"
	}
}

// NormalizeIR strips debug metadata, renumbers SSA registers canonically,
// and removes optimization hint flags that don't change semantics.
func NormalizeIR(ir []byte) []byte {
	lines := bytes.Split(ir, []byte("\n"))
	var out []byte
	ssaMap := map[string]string{}
	ssaCounter := 0

	for _, line := range lines {
		trimmed := bytes.TrimSpace(line)

		// Strip debug metadata lines (! = ..., #dbg_...)
		if isDebugLine(trimmed) {
			continue
		}

		// Strip debug refs inline
		line = stripDebugRefs(line)

		// Strip tbaa metadata refs
		line = stripMetaRef(line, []byte("!tbaa !"))
		line = stripMetaRef(line, []byte("!range !"))
		line = stripMetaRef(line, []byte("!noalias !"))
		line = stripMetaRef(line, []byte("!alias.scope !"))
		line = stripMetaRef(line, []byte("!nonnull !"))
		line = stripMetaRef(line, []byte("!dereferenceable !"))

		// Strip optimization hint flags
		line = stripFlag(line, []byte(" nsw"))
		line = stripFlag(line, []byte(" nuw"))
		line = stripFlag(line, []byte(" exact"))
		line = stripFlag(line, []byte(" nnan"))
		line = stripFlag(line, []byte(" ninf"))
		line = stripFlag(line, []byte(" nsz"))
		line = stripFlag(line, []byte(" arcp"))
		line = stripFlag(line, []byte(" contract"))
		line = stripFlag(line, []byte(" reassoc"))
		line = stripFlag(line, []byte(" afn"))

		// Renumber SSA registers: %N -> %_N (canonical)
		line = renumberSSA(line, ssaMap, &ssaCounter)

		// Normalize alignment: strip "align N"
		line = stripAlignAnnotation(line)

		out = append(out, bytes.TrimRight(line, " \t")...)
		out = append(out, '\n')
	}
	return out
}

func stripMetaRef(line []byte, prefix []byte) []byte {
	for {
		idx := bytes.Index(line, prefix)
		if idx < 0 {
			return line
		}
		end := idx + len(prefix)
		for end < len(line) && line[end] >= '0' && line[end] <= '9' {
			end++
		}
		start := idx
		if start > 0 && line[start-1] == ' ' {
			start--
		}
		if start > 0 && line[start-1] == ',' {
			start--
		}
		var rebuilt []byte
		rebuilt = append(rebuilt, line[:start]...)
		rebuilt = append(rebuilt, line[end:]...)
		line = rebuilt
	}
}

func stripFlag(line []byte, flag []byte) []byte {
	for {
		idx := bytes.Index(line, flag)
		if idx < 0 {
			return line
		}
		after := idx + len(flag)
		if after < len(line) && isWordByte(line[after]) {
			idx++
			continue
		}
		if idx > 0 && isWordByte(line[idx-1]) {
			idx++
			line = line // can't skip, find next occurrence
			next := bytes.Index(line[idx:], flag)
			if next < 0 {
				return line
			}
			idx = idx + next
			after = idx + len(flag)
			if after < len(line) && isWordByte(line[after]) {
				continue
			}
		}
		var rebuilt []byte
		rebuilt = append(rebuilt, line[:idx]...)
		rebuilt = append(rebuilt, line[after:]...)
		line = rebuilt
	}
}

func stripAlignAnnotation(line []byte) []byte {
	for {
		idx := bytes.Index(line, []byte(", align "))
		if idx < 0 {
			idx = bytes.Index(line, []byte(" align "))
			if idx < 0 {
				return line
			}
		}
		start := idx
		end := idx
		if line[end] == ',' {
			end++ // skip comma
		}
		for end < len(line) && line[end] == ' ' {
			end++
		}
		if end+5 < len(line) && string(line[end:end+5]) == "align" {
			end += 5
		} else {
			return line
		}
		for end < len(line) && line[end] == ' ' {
			end++
		}
		for end < len(line) && line[end] >= '0' && line[end] <= '9' {
			end++
		}
		var rebuilt []byte
		rebuilt = append(rebuilt, line[:start]...)
		rebuilt = append(rebuilt, line[end:]...)
		line = rebuilt
	}
}

func renumberSSA(line []byte, ssaMap map[string]string, counter *int) []byte {
	var out []byte
	i := 0
	for i < len(line) {
		if line[i] == '%' && i+1 < len(line) && isDigitByte(line[i+1]) {
			start := i
			i++
			for i < len(line) && isDigitByte(line[i]) {
				i++
			}
			orig := string(line[start:i])
			mapped, ok := ssaMap[orig]
			if !ok {
				mapped = "%" | intToStr(*counter)
				ssaMap[orig] = mapped
				*counter++
			}
			out = append(out, mapped...)
		} else {
			out = append(out, line[i])
			i++
		}
	}
	return out
}

func intToStr(n int) string {
	if n == 0 {
		return "0"
	}
	var buf [10]byte
	i := 9
	for n > 0 {
		buf[i] = byte('0' + n%10)
		i--
		n /= 10
	}
	return string(buf[i+1:])
}

// IRProfile extracts structural features from LLVM IR for cost comparison.
type IRProfile struct {
	Instructions int32
	Blocks       int32
	Calls        int32
	Phis         int32
	Loads        int32
	Stores       int32
	Branches     int32
}

func ProfileIR(ir []byte) IRProfile {
	p := IRProfile{}
	lines := bytes.Split(ir, []byte("\n"))
	inFunc := false
	for _, line := range lines {
		trimmed := bytes.TrimSpace(line)
		if len(trimmed) == 0 {
			continue
		}
		if bytes.HasPrefix(trimmed, []byte("define ")) {
			inFunc = true
			continue
		}
		if len(trimmed) == 1 && trimmed[0] == '}' {
			inFunc = false
			continue
		}
		if !inFunc {
			continue
		}
		// Basic block label
		if len(trimmed) > 0 && trimmed[len(trimmed)-1] == ':' && !bytes.HasPrefix(trimmed, []byte(" ")) {
			p.Blocks++
			continue
		}
		if isDebugLine(trimmed) {
			continue
		}
		p.Instructions++
		if bytes.Contains(trimmed, []byte(" call ")) || bytes.Contains(trimmed, []byte(" invoke ")) {
			p.Calls++
		}
		if bytes.HasPrefix(trimmed, []byte("call ")) || bytes.HasPrefix(trimmed, []byte("invoke ")) {
			p.Calls++
		}
		if bytes.Contains(trimmed, []byte(" = phi ")) {
			p.Phis++
		}
		if bytes.Contains(trimmed, []byte(" = load ")) {
			p.Loads++
		}
		if bytes.HasPrefix(trimmed, []byte("store ")) {
			p.Stores++
		}
		if bytes.HasPrefix(trimmed, []byte("br ")) {
			p.Branches++
		}
	}
	return p
}

// ClassifyIRPair performs Phase A (normalize) and Phase B (structural diff)
// classification of two IR fragments.
func ClassifyIRPair(resultIR, actualIR []byte) ClassifyResult {
	cr := ClassifyResult{}

	// Phase A: normalize and compare
	normResult := NormalizeIR(resultIR)
	normActual := NormalizeIR(actualIR)

	if bytes.Equal(normResult, normActual) {
		cr.Class = ClassBoundaryEq
		cr.NormMatch = true
		cr.Detail = "matched after normalization"
		return cr
	}

	// Phase A+: strip nil-check/safety blocks and compare
	strippedResult := StripSafetyBlocks(normResult)
	strippedActual := StripSafetyBlocks(normActual)
	if bytes.Equal(strippedResult, strippedActual) {
		cr.Class = ClassBoundaryEq
		cr.NormMatch = true
		cr.Detail = "matched after nil-check block stripping"
		return cr
	}

	// Phase B: structural comparison
	profA := ProfileIR(resultIR)
	profB := ProfileIR(actualIR)
	cr.InstrA = profA.Instructions
	cr.InstrB = profB.Instructions
	cr.BlocksA = profA.Blocks
	cr.BlocksB = profB.Blocks
	cr.CallsA = profA.Calls
	cr.CallsB = profB.Calls

	// Phase B determines the nature of the divergence for diagnostics
	if profA.Calls != profB.Calls {
		cr.Class = ClassBoundaryDiv
		cr.Detail = "call count differs (type/template mismatch)"
		return cr
	}
	if profA.Blocks != profB.Blocks {
		cr.Class = ClassBoundaryDiv
		cr.Detail = "block count differs (structural mismatch)"
		return cr
	}
	instrDelta := profA.Instructions - profB.Instructions
	if instrDelta < 0 {
		instrDelta = -instrDelta
	}
	minInstr := profA.Instructions
	if profB.Instructions < minInstr {
		minInstr = profB.Instructions
	}
	if minInstr > 0 && instrDelta*100/minInstr > 10 {
		cr.Class = ClassBoundaryDiv
		cr.Detail = "instruction count diverges >10% (wrong template)"
		return cr
	}

	cr.Class = ClassBoundaryDiv
	cr.Detail = "structural divergence after normalization"
	return cr
}

func StripSafetyBlocks(ir []byte) []byte {
	lines := bytes.Split(ir, []byte("\n"))
	var out []byte
	skip := false
	for _, line := range lines {
		trimmed := bytes.TrimSpace(line)
		// Check if this is a safety-check block label
		// Labels look like "deref.next:     ; preds = %entry" or just "deref.next:"
		if isBlockLabel(trimmed) && isSafetyBlockLabel(trimmed) {
			skip = true
			continue
		}
		// A new non-safety block label ends the skip
		if skip && isBlockLabel(trimmed) {
			skip = false
		}
		// Opening brace of a function also ends skip
		if skip && len(trimmed) > 0 && trimmed[0] == '}' {
			skip = false
		}
		if skip {
			continue
		}
		// Strip safety block references from branch targets and phi nodes
		line = stripSafetyRefs(line)
		out = append(out, line...)
		out = append(out, '\n')
	}
	return out
}

func isBlockLabel(line []byte) bool {
	// A block label has a colon before any semicolon, and no leading whitespace assignment
	if len(line) == 0 {
		return false
	}
	// Block labels don't start with % or whitespace in the define body context
	if line[0] == '%' || line[0] == ' ' || line[0] == '\t' {
		return false
	}
	// Must contain a colon
	colon := bytes.IndexByte(line, ':')
	return colon > 0
}

func isSafetyBlockLabel(label []byte) bool {
	// Strip trailing ":"  and any comment like "; preds = ..."
	colon := bytes.IndexByte(label, ':')
	if colon < 0 {
		return false
	}
	name := label[:colon]
	// Known safety-check block name patterns
	prefixes := [][]byte{
		[]byte("deref.next"),
		[]byte("deref.throw"),
		[]byte("gep.next"),
		[]byte("gep.throw"),
		[]byte("store.next"),
		[]byte("store.throw"),
		[]byte("lookup.next"),
		[]byte("lookup.throw"),
		[]byte("slice.next"),
		[]byte("slice.throw"),
	}
	for _, p := range prefixes {
		if bytes.HasPrefix(name, p) {
			return true
		}
	}
	return false
}

func stripSafetyRefs(line []byte) []byte {
	// Remove references to safety blocks in phi nodes and branch instructions
	// E.g. "[ true, %deref.next ]," or "label %gep.throw"
	safetyPrefixes := [][]byte{
		[]byte("%deref.next"),
		[]byte("%deref.throw"),
		[]byte("%gep.next"),
		[]byte("%gep.throw"),
		[]byte("%store.next"),
		[]byte("%store.throw"),
		[]byte("%lookup.next"),
		[]byte("%lookup.throw"),
		[]byte("%slice.next"),
		[]byte("%slice.throw"),
	}
	for _, sp := range safetyPrefixes {
		for bytes.Contains(line, sp) {
			idx := bytes.Index(line, sp)
			if idx < 0 {
				break
			}
			// Find the enclosing context:
			// In phi: "[ val, %block ]," - remove the whole bracket pair
			// In br: "label %block" - remove "label %block"
			bracketStart := -1
			for j := idx - 1; j >= 0; j-- {
				if line[j] == '[' {
					bracketStart = j
					break
				}
				if line[j] == ',' || line[j] == ';' {
					break
				}
			}
			if bracketStart >= 0 {
				// Phi node entry: remove "[ val, %block ]" including trailing comma
				bracketEnd := bytes.IndexByte(line[idx:], ']')
				if bracketEnd >= 0 {
					end := idx + bracketEnd + 1
					// Skip trailing comma and space
					for end < len(line) && (line[end] == ',' || line[end] == ' ') {
						end++
					}
					// Also strip leading comma and space before bracket
					start := bracketStart
					if start > 0 && line[start-1] == ' ' {
						start--
					}
					if start > 0 && line[start-1] == ',' {
						start--
					}
					var rebuilt []byte
					rebuilt = append(rebuilt, line[:start]...)
					rebuilt = append(rebuilt, line[end:]...)
					line = rebuilt
					continue
				}
			}
			// Branch target: "label %block" - remove
			labelIdx := bytes.LastIndex(line[:idx], []byte("label "))
			if labelIdx >= 0 {
				end := idx + len(sp)
				// Skip digits after the prefix (e.g., %deref.next2)
				for end < len(line) && line[end] >= '0' && line[end] <= '9' {
					end++
				}
				start := labelIdx
				if start > 0 && line[start-1] == ' ' {
					start--
				}
				if start > 0 && line[start-1] == ',' {
					start--
				}
				var rebuilt []byte
				rebuilt = append(rebuilt, line[:start]...)
				rebuilt = append(rebuilt, line[end:]...)
				line = rebuilt
				continue
			}
			// Can't find context, just skip past this occurrence
			break
		}
	}
	return line
}