package iskra

import (
	"math"

	"git.smesh.lol/iskradb/lattice"
)

// Default Gaussian σ for register-coordinate filtering in LookupAtomLink.
// σ_archaic is tighter than σ_discourse: archaism is a stronger semantic
// register mismatch than mere sentence-length difference. Tunable per call.
const (
	DefaultSigmaArchaic   = 64.0
	DefaultSigmaDiscourse = 128.0
	// diversityNearThreshold: the Gaussian factor below which a corpus
	// coord doesn't count toward the per-DstAtom diversity bonus.
	// Coords with Gaussian < 0.05 (i.e. ~20× muted) are too far to be
	// considered "supporting evidence" for a translation. Empirically:
	// bible (255,199) from query (0,0) gives ~1.4e-8; KFTT (5,171) from
	// (0,0) gives ~0.17 - KFTT counts as supporting evidence, bible does not.
	diversityNearThreshold = 0.05
)

// IngestPattern stores atoms and patterns from an extraction result.
// Returns the pattern recIdx for cross-domain linking.
func IngestPattern(t *Tree, domain uint8, ext ExtractResult) uint32 {
	if len(ext.Pattern) == 0 {
		return lattice.NullRec
	}

	// 1. Upsert the pattern record (Bgrammatical branch).
	patKey := PatternKey(domain, ext.Pattern)
	patRI := t.LookupRecIdx(lattice.Bgrammatical, patKey)
	if patRI != lattice.NullRec {
		t.metaInc(patRI)
	} else {
		var rec lattice.Record
		t.setFormOnRec(&rec, string(ext.Pattern))
		rec.Branch = uint8(lattice.Bgrammatical)
		patRI = t.db.InsertRec(lattice.Bgrammatical, patKey, rec)
		t.metaSet(patRI, MetaEntry{Count: 1, StageTag: domain})
	}

	// 2. Upsert each content slot as an atom (Bsemantic branch).
	// Use lemma (from Set) as the atom key when available; fall back to surface form.
	for i, word := range ext.Slots {
		if word == "" {
			continue
		}
		atomForm := word
		if i < len(ext.Set) && ext.Set[i].Atom != "" {
			atomForm = ext.Set[i].Atom
		}
		atomKey := AtomKey(domain, atomForm)
		atomRI := t.LookupRecIdx(lattice.Bsemantic, atomKey)
		if atomRI != lattice.NullRec {
			t.metaInc(atomRI)
			if i < len(ext.Roles) {
				m := t.metaGet(atomRI)
				if m != nil {
					var h RoleHist
					h.Decode(m.Extra)
					h[ext.Roles[i]]++
					h.Encode(&m.Extra)
					if t.BulkMetaStore != nil {
						t.BulkMetaStore.dirty[atomRI] = true
					}
				}
			}
		} else {
			var rec lattice.Record
			t.setFormOnRec(&rec, atomForm)
			rec.Branch = uint8(lattice.Bsemantic)
			atomRI = t.db.InsertRec(lattice.Bsemantic, atomKey, rec)
			m := MetaEntry{Count: 1, StageTag: domain}
			if i < len(ext.Roles) {
				var h RoleHist
				h[ext.Roles[i]] = 1
				h.Encode(&m.Extra)
			}
			t.metaSet(atomRI, m)
		}
	}

	return patRI
}

// IngestCrossDomain records a structural alignment between two patterns.
// Called when a JA sentence pattern corresponds to an EN sentence pattern.
func IngestCrossDomain(t *Tree, srcDomain, dstDomain uint8, srcPat, dstPat []byte) {
	if len(srcPat) == 0 || len(dstPat) == 0 {
		return
	}
	key := CrossPatternKey(srcDomain, dstDomain, srcPat, dstPat)
	ri := t.LookupRecIdx(lattice.Bcooccur, key)
	if ri != lattice.NullRec {
		t.metaInc(ri)
		return
	}
	form := string(srcPat) | "=" | string(dstPat)
	var rec lattice.Record
	t.setFormOnRec(&rec, form)
	rec.Branch = uint8(lattice.Bcooccur)
	ri = t.db.InsertRec(lattice.Bcooccur, key, rec)
	t.metaSet(ri, MetaEntry{Count: 1, StageTag: srcDomain})
}

// IngestDeepPattern stores a canonical deep pattern and increments its count.
// Deep patterns are language-independent role sequences shared across domains.
func IngestDeepPattern(t *Tree, deepPat []byte) {
	if len(deepPat) == 0 {
		return
	}
	key := DeepPatternKey(deepPat)
	ri := t.LookupRecIdx(lattice.Bgrammatical, key)
	if ri != lattice.NullRec {
		t.metaInc(ri)
		return
	}
	var rec lattice.Record
	t.setFormOnRec(&rec, string(deepPat))
	rec.Branch = uint8(lattice.Bgrammatical)
	ri = t.db.InsertRec(lattice.Bgrammatical, key, rec)
	t.metaSet(ri, MetaEntry{Count: 1, StageTag: 0}) // domain 0 = cross-domain
}

// Atom-link generation marker. Stored in MetaEntry.StageTag's high bit-zone
// to distinguish:
//   GenLegacy (0)  - records written by the bilateral IngestAtomLink before
//                    the context-aware schema landed; role/context fields
//                    are empty/unknown. Used as a translation fallback.
//   GenContexted (1) - records written by IngestContextedAtomLink with
//                      role and governing-context populated. The preferred
//                      lookup path.
const (
	GenLegacy    uint8 = 0
	GenContexted uint8 = 1
	GenDictionary uint8 = 2
)

// pronounPerson returns the grammatical person (1, 2, 3) of a pronoun
// atom, or 0 if the atom is not a known pronoun. Used to prevent
// cross-person pronoun links during ingestion.
func pronounPerson(lang uint8, atom string) int32 {
	if lang == 1 {
		switch atom {
		case "i", "me", "my", "myself", "we", "us", "our", "ourselves":
			return 1
		case "you", "your", "yourself", "yourselves":
			return 2
		case "he", "him", "his", "himself",
			"she", "her", "herself",
			"they", "them", "their", "themselves":
			return 3
		case "it", "itself":
			return 4 // inanimate - only links to JA demonstratives, not human pronouns
		}
	} else if lang == 2 {
		switch atom {
		case "\xe7\xa7\x81",         // 私
			"\xe5\x83\x95",         // 僕
			"\xe4\xbf\xba",         // 俺
			"\xe3\x82\x8f\xe3\x81\x97", // わし
			"\xe8\x87\xaa\xe5\x88\x86", // 自分
			"\xe7\xa7\x81\xe3\x81\x9f\xe3\x81\xa1", // 私たち
			"\xe6\x88\x91\xe3\x80\x85": // 我々
			return 1
		case "\xe3\x81\x82\xe3\x81\xaa\xe3\x81\x9f", // あなた
			"\xe5\x90\x9b",         // 君
			"\xe3\x81\x8a\xe5\x89\x8d", // お前
			"\xe3\x81\x82\xe3\x82\x93\xe3\x81\x9f": // あんた
			return 2
		case "\xe5\xbd\xbc",         // 彼
			"\xe5\xbd\xbc\xe5\xa5\xb3", // 彼女
			"\xe5\xbd\xbc\xe3\x82\x89": // 彼ら
			return 3
		case "\xe3\x81\x9d\xe3\x82\x8c", // それ
			"\xe3\x81\x93\xe3\x82\x8c", // これ
			"\xe3\x81\x82\xe3\x82\x8c": // あれ
			return 4 // inanimate demonstratives
		}
	}
	return 0
}

func isSingleKana(s string) bool {
	return len(s) == 3 && s[0] == 0xe3 && (s[1] == 0x81 || s[1] == 0x82 || s[1] == 0x83)
}

// isJunkJAAtom filters JA atoms that are lemmatizer artifacts.
// っ+single-hiragana (e.g. っう, っく) are malformed godan stems
// produced when the lemmatizer over-strips a verb.
func isJunkJAAtom(s string) bool {
	if len(s) == 6 && s[0] == 0xe3 && s[1] == 0x81 && s[2] == 0xa3 &&
		s[3] == 0xe3 && s[4] == 0x81 {
		return true
	}
	return false
}

// AtomLinkKey constructs the lattice key for a context-aware atom-link
// record. Composite of (langA, langB, "X", roleA, gen, rArch, rDisc,
// atomA \0 contextA \0 atomB) hashed via SipHash.
//
// Register coordinate (rArch, rDisc) is in the key so the same atom pair
// from different-register corpora produces distinct records. This keeps
// scripture-derived associations from polluting modern-conversational
// lookups even when the atoms collide.
//
// Note: this is a point-lookup key. Prefix-scan queries are served by a
// sidecar index, not by key structure.
func AtomLinkKey(langA, langB, roleA, gen, rArch, rDisc uint8, atomA, contextA, atomB string) lattice.Key {
	n := 7 + len(atomA) + 1 + len(contextA) + 1 + len(atomB)
	buf := []byte{:n:n}
	buf[0] = langA
	buf[1] = langB
	buf[2] = 'X'
	buf[3] = roleA
	buf[4] = gen
	buf[5] = rArch
	buf[6] = rDisc
	off := 7
	copy(buf[off:], []byte(atomA))
	off += len(atomA)
	buf[off] = 0x00
	off++
	copy(buf[off:], []byte(contextA))
	off += len(contextA)
	buf[off] = 0x00
	off++
	copy(buf[off:], []byte(atomB))
	return lattice.HashKey(buf)
}

// IngestContextedAtomLink records a word-level cross-language link with
// role and governing-context tagging. ContextA is an atom from the same
// language as atomA (the immediate head when Head>=0, the clause's
// predicate atom when Head==-1, or empty string for the predicate itself).
// Same for contextB.
//
// Generation marker distinguishes legacy lossy-migrated records from
// proper context-aware records; the lookup function prefers GenContexted
// matches and falls back to GenLegacy.
func IngestContextedAtomLink(t *Tree,
	langA, langB uint8,
	atomA, contextA string, roleA int32,
	atomB, contextB string, roleB int32,
	rArch, rDisc uint8,
) {
	// Lemmatize per language at ingest time so inflected forms collapse.
	if langA == 1 {
		atomA = LemmatizeEN(atomA).Lemma
	} else if langA == 2 {
		atomA = LemmatizeJA(atomA, roleA == HistVerb).Lemma
	}
	if langB == 1 {
		atomB = LemmatizeEN(atomB).Lemma
	} else if langB == 2 {
		atomB = LemmatizeJA(atomB, roleB == HistVerb).Lemma
	}
	if atomA == "" || atomB == "" {
		return
	}
	if isSingleKana(atomA) || isSingleKana(atomB) {
		return
	}
	if isJunkJAAtom(atomA) || isJunkJAAtom(atomB) {
		return
	}
	// Person-concordance filter: don't link 1st-person pronouns to
	// 2nd/3rd-person pronouns across languages. JA restructures
	// predication (EN "I love you" -> JA "君が好きだ") so role-based
	// alignment creates false cross-person pronoun links.
	pA := pronounPerson(langA, atomA)
	pB := pronounPerson(langB, atomB)
	if pA > 0 && pB > 0 && pA != pB {
		return
	}
	// Pronouns only link to pronouns. Prevents structural misalignment
	// where EN "you" (SUBJECT) links to JA 物 (SUBJECT) because JA
	// restructured the predication.
	if pA > 0 && pB == 0 {
		return
	}
	if pB > 0 && pA == 0 {
		return
	}

	key := AtomLinkKey(langA, langB, uint8(roleA), GenContexted, rArch, rDisc,
		atomA, contextA, atomB)
	ri := t.LookupRecIdx(lattice.Bpragmatic, key)
	if ri != lattice.NullRec {
		t.metaInc(ri)
		return
	}
	form := atomA | "|" | contextA | "|" | atomB | "|" | contextB
	var rec lattice.Record
	t.setFormOnRec(&rec, form)
	rec.Branch = uint8(lattice.Bpragmatic)
	ri = t.db.InsertRec(lattice.Bpragmatic, key, rec)
	stageTag := langA | (GenContexted << 4)
	t.metaSet(ri, MetaEntry{Count: 1, StageTag: stageTag})
	// Extra layout for GenContexted records:
	//   Extra[0]: roleB
	//   Extra[1]: langB
	//   Extra[2]: R_archaic (corpus register coordinate)
	//   Extra[3]: R_discourse
	//   Extra[4]: roleA
	m := t.metaGet(ri)
	if m != nil {
		m.Extra[0] = uint8(roleB)
		m.Extra[1] = langB
		m.Extra[2] = rArch
		m.Extra[3] = rDisc
		m.Extra[4] = uint8(roleA)
		if t.BulkMetaStore != nil {
			t.BulkMetaStore.dirty[ri] = true
		}
	}
}

// AtomLinkResult is the return type of LookupAtomLink. Carries the
// destination atom and provenance information for diagnostic visibility.
type AtomLinkResult struct {
	DstAtom    string
	DstRole    int32
	DstContext string
	Weight     uint32
	Generation uint8 // 0 = legacy fallback, 1 = context-aware preferred match
	Tier       uint8 // 1-4 relaxation tier that produced the pick; 0 = no match
}

// LookupAtomLink finds the best destination atom for (srcLang, srcAtom)
// in dstLang via the sidecar index. Each candidate is scored by:
//
//   score = log(1 + weight) × diversity_near × exp(-distance²/σ²)
//
// Three components:
//
//  1. log(1 + weight) - logarithmic in observation count. Compresses the
//     differentiation between high-frequency records so a Tatoeba-
//     memorized wrong mapping with weight=50 (score ~3.93) doesn't
//     outvote a less-frequent correct one with weight=5 (score ~1.79)
//     by orders of magnitude. Bayesian intuition: the 50 observations
//     from one corpus are correlated, not independent; their information
//     content scales sub-linearly.
//
//  2. diversity_near - count of distinct corpus register-coordinates
//     among candidates with the same DstAtom, FILTERED to coords whose
//     Gaussian factor is above the diversityNearThreshold. A DstAtom
//     backed by 3 corpora near the query has diversity_near=3; one
//     backed by Bible-only (far from a modern query) has near=0,
//     defaulting to 1. This prevents far-register records from padding
//     the diversity of an irrelevant DstAtom.
//
//  3. Gaussian distance - per-record register-axis filter. Records far
//     from the query coord get muted. Documented in the register coord
//     design.
//
// Net effect: corpus diversity outweighs raw count when the diversity
// is in-register. A Tatoeba-only correct mapping at weight=20 scores
// log(21)*1 = 3.04. A multi-corpus correct mapping at weight=5 each
// across 3 near corpora scores log(6)*3 = 5.38. The diverse one wins.
// A Tatoeba+Bible "diverse" but in-register-singular wrong mapping
// scores log(weight)*1 because Bible's coord is filtered out.
//
// Tier order (within tier, highest score wins):
//   Tier 1: GenContexted match with exact (srcContext, srcRole)
//   Tier 2: GenContexted match with same srcRole, any context
//   Tier 3: GenContexted match with any role, any context
//   Tier 4: GenLegacy bilateral fallback
//
// If sigmaArch/sigmaDisc are 0 the Gaussian factor is omitted.
// IngestStats tracks diagnostic counters for the trilateral scoring pipeline.
type IngestStats struct {
	TriFired          int32
	TriConfirmed      int32
	TriSwapped        int32
	TriRescued        int32
	CtxSimFired       int32
	CtxSimBoosted     int32
	DictConfirmFired  int32
	DictAuthorityFired int32
}

// dictPOSMatch returns true when a dictionary entry's POS-derived role
// (dictRole) is compatible with the query atom's contextual role (queryRole).
// POS-role mapping from dict-ingest: verb->3, adj/adv->4, noun/name->1, else->7.
func dictPOSMatch(dictRole, queryRole int32) bool {
	switch dictRole {
	case HistVerb:
		return queryRole == HistVerb
	case HistModifier:
		return queryRole == HistModifier || queryRole == HistScope
	case HistSubject:
		return queryRole == HistSubject || queryRole == HistObject ||
			queryRole == HistTopic || queryRole == HistComplement ||
			queryRole == HistScope
	case HistComplement:
		return true
	}
	return true
}

func LookupAtomLink(idx *AtomIdx, srcLang, dstLang uint8,
	srcAtom, srcContext string, srcRole int32,
	qArch, qDisc uint8, sigmaArch, sigmaDisc float64,
	stats *IngestStats,
) AtomLinkResult {
	if idx == nil {
		return AtomLinkResult{}
	}
	candidates := idx.FindBySrc(srcLang, srcAtom)
	if len(candidates) == 0 {
		return AtomLinkResult{}
	}

	gauss := func(rArch, rDisc uint8) float64 {
		if sigmaArch <= 0 || sigmaDisc <= 0 {
			return 1.0
		}
		da := float64(int32(rArch) - int32(qArch))
		dd := float64(int32(rDisc) - int32(qDisc))
		exponent := (da*da)/(sigmaArch*sigmaArch) + (dd*dd)/(sigmaDisc*sigmaDisc)
		return math.Exp(-exponent)
	}

	// First pass: per-DstAtom diversity_near. Count distinct corpus
	// coords whose Gaussian factor is above diversityNearThreshold.
	// Coords beyond the threshold are too register-distant to count as
	// supporting evidence for a translation.
	type coordSet map[uint16]bool
	diversity := map[string]coordSet{}
	for i := range candidates {
		e := &candidates[i]
		if e.DstLang != dstLang {
			continue
		}
		if gauss(e.RArchaic, e.RDiscourse) < diversityNearThreshold {
			continue
		}
		coord := uint16(e.RArchaic)<<8 | uint16(e.RDiscourse)
		s := diversity[e.DstAtom]
		if s == nil {
			s = coordSet{}
			diversity[e.DstAtom] = s
		}
		s[coord] = true
	}

	baseScore := func(e *AtomIdxEntry) float64 {
		w := math.Log1p(float64(e.Weight))
		div := float64(len(diversity[e.DstAtom]))
		if div < 1 {
			div = 1
		}
		return w * div * gauss(e.RArchaic, e.RDiscourse)
	}

	// Context-similarity: translate srcContext to dstLang once.
	// When a tier-2 candidate's ContextB matches, the candidate was
	// observed with the same governing head (translated) as the query -
	// strong polysemy disambiguation signal.
	ctxTranslation := ""
	if srcContext != "" {
		ctxCands := idx.FindBySrc(srcLang, srcContext)
		var bestCtxW float64
		for j := range ctxCands {
			cc := &ctxCands[j]
			if cc.DstLang != dstLang {
				continue
			}
			w := float64(cc.Weight) * gauss(cc.RArchaic, cc.RDiscourse)
			if w > bestCtxW {
				bestCtxW = w
				ctxTranslation = cc.DstAtom
			}
		}
	}

	// Bilateral consistency: only check the top candidate per tier to
	// avoid O(K*M) backward lookups on polysemous atoms.
	biCheck := func(dstAtom string) float64 {
		backCands := idx.FindBySrc(dstLang, dstAtom)
		var best, src float64
		for j := range backCands {
			bc := &backCands[j]
			if bc.DstLang != srcLang {
				continue
			}
			w := float64(bc.Weight)
			if w > best {
				best = w
			}
			if bc.DstAtom == srcAtom && w > src {
				src = w
			}
		}
		if best <= 0 {
			return 1.0
		}
		return (src + 1) / (best + 1)
	}

	// Per-DstAtom aggregation: sum baseScore across all entries for the
	// same destination atom within each tier. Polysemous atoms observed
	// in many contexts accumulate evidence.
	type atomAgg struct {
		bestEntry *AtomIdxEntry
		aggScore  float64
		tier      int32
	}
	tierAtoms := map[string]*atomAgg{}

	for i := range candidates {
		e := &candidates[i]
		if e.DstLang != dstLang {
			continue
		}
		s := baseScore(e)
		ti := -1
		if e.Gen == GenContexted {
			if e.ContextA == srcContext && int32(e.RoleA) == srcRole {
				ti = 0
			} else if int32(e.RoleA) == srcRole {
				ti = 1
			} else {
				ti = 2
			}
		} else if e.Gen == GenLegacy {
			ti = 3
		} else if e.Gen == GenDictionary {
			ti = 4
		}
		if ti < 0 {
			continue
		}
		key := string([]byte{byte(ti), ':'}) | e.DstAtom
		a := tierAtoms[key]
		if a == nil {
			a = &atomAgg{tier: ti}
			tierAtoms[key] = a
		}
		a.aggScore += s
		if a.bestEntry == nil || s > baseScore(a.bestEntry) {
			a.bestEntry = e
		}
	}

	// Collect top-N per tier for bilateral scoring.
	const topN = 8
	type ranked struct {
		entry    *AtomIdxEntry
		aggScore float64
		dstAtom  string
	}
	var top [5][topN]ranked
	for _, a := range tierAtoms {
		ti := a.tier
		s := a.aggScore
		slot := -1
		for k := 0; k < topN; k++ {
			if top[ti][k].entry == nil {
				slot = k
				break
			}
			if s > top[ti][k].aggScore {
				slot = k
				break
			}
		}
		if slot < 0 {
			continue
		}
		for k := topN - 1; k > slot; k-- {
			top[ti][k] = top[ti][k-1]
		}
		top[ti][slot] = ranked{entry: a.bestEntry, aggScore: s, dstAtom: a.bestEntry.DstAtom}
	}

	// triConfirm checks whether srcAtom->dstAtom is confirmed by a
	// 2-hop path through an intermediate language.
	intermediateLangs := [2]uint8{0x03, 0x04} // KO, ZH
	triConfirm := func(dstAtom string) int32 {
		confirms := 0
		for _, mid := range intermediateLangs {
			if mid == srcLang || mid == dstLang {
				continue
			}
			srcMid := topAtomVia(idx, srcLang, mid, srcAtom)
			if srcMid == "" {
				continue
			}
			dstMid := topAtomVia(idx, dstLang, mid, dstAtom)
			if dstMid == "" {
				continue
			}
			if srcMid == dstMid {
				confirms++
			}
		}
		return confirms
	}

	ctxMatch := func(e *AtomIdxEntry) bool {
		return ctxTranslation != "" && e.ContextB == ctxTranslation
	}

	dictConfirm := func(dstAtom string) bool {
		for i := range candidates {
			e := &candidates[i]
			if e.Gen != GenDictionary || e.DstLang != dstLang {
				continue
			}
			if !dictPOSMatch(int32(e.RoleA), srcRole) {
				continue
			}
			if e.DstAtom == dstAtom {
				return true
			}
			// Fuzzy: corpus "search" matches dict "search for",
			// or corpus "carry out" matches dict "carry".
			if dstLang == 1 {
				da := e.DstAtom
				if len(dstAtom) < len(da) && da[:len(dstAtom)] == dstAtom && da[len(dstAtom)] == ' ' {
					return true
				}
				if len(da) < len(dstAtom) && dstAtom[:len(da)] == da && dstAtom[len(da)] == ' ' {
					return true
				}
			}
		}
		return false
	}

	// Combined scoring: bilateral ratio modulates aggregate score.
	// ctx-sim, triangulation, and dictionary confirmation are bonuses.
	type scored struct {
		entry    *AtomIdxEntry
		combined float64
		tier     int32
		dictOK   bool
	}
	var viable []scored

	for ti := 0; ti < 4; ti++ {
		topAgg := 0.0
		if top[ti][0].entry != nil {
			topAgg = top[ti][0].aggScore
		}
		for k := 0; k < topN; k++ {
			r := &top[ti][k]
			if r.entry == nil {
				continue
			}
			if topAgg > 0 && r.aggScore < topAgg*0.1 {
				continue
			}
			bi := biCheck(r.dstAtom)
			combined := r.aggScore * (bi + 0.05)
			if ctxMatch(r.entry) {
				combined *= 1.5
				stats.CtxSimFired++
			}
			dc := dictConfirm(r.dstAtom)
			if dc {
				combined *= 1.6
				stats.DictConfirmFired++
			}
			viable = append(viable, scored{entry: r.entry, combined: combined, tier: ti, dictOK: dc})
		}
		if len(viable) > 0 {
			break
		}
	}
	// Dictionary authority counter: track when dict-confirmed candidates
	// exist in the viable set (for diagnostics).
	for _, v := range viable {
		if v.dictOK {
			stats.DictAuthorityFired++
			break
		}
	}
	// Tier-4 (dictionary) only as last resort when corpus tiers empty.
	// Dict entries are pre-validated translations, so use a biRatio floor
	// to prevent polysemous back-indexes from over-penalizing common words.
	if len(viable) == 0 {
		for k := 0; k < topN; k++ {
			r := &top[4][k]
			if r.entry == nil {
				continue
			}
			bi := biCheck(r.dstAtom)
			if bi < 0.25 {
				bi = 0.25
			}
			combined := r.aggScore * (bi + 0.05)
			dc := dictConfirm(r.dstAtom)
			if dc {
				combined *= 1.6
				stats.DictConfirmFired++
			}
			viable = append(viable, scored{entry: r.entry, combined: combined, tier: 4, dictOK: dc})
		}
	}

	// Triangulation bonus on top-2 viable candidates.
	if len(viable) >= 2 {
		stats.TriFired++
		tc0 := triConfirm(viable[0].entry.DstAtom)
		tc1 := triConfirm(viable[1].entry.DstAtom)
		if tc0 > 0 || tc1 > 0 {
			stats.TriConfirmed++
		}
		if tc1 > 0 && tc0 == 0 {
			viable[1].combined *= 1.3
			stats.TriSwapped++
		} else if tc0 > 0 && tc1 == 0 {
			viable[0].combined *= 1.3
		}
	} else if len(viable) == 1 {
		stats.TriFired++
		tc := triConfirm(viable[0].entry.DstAtom)
		if tc > 0 {
			stats.TriConfirmed++
			stats.TriRescued++
		}
	}

	// Pick highest combined score.
	var pick *AtomIdxEntry
	tier := uint8(0)
	bestCombined := 0.0
	for _, v := range viable {
		if v.combined > bestCombined {
			bestCombined = v.combined
			pick = v.entry
			tier = uint8(v.tier + 1)
		}
	}
	if pick == nil {
		return AtomLinkResult{}
	}
	return AtomLinkResult{
		DstAtom:    pick.DstAtom,
		DstRole:    int32(pick.RoleB),
		DstContext: pick.ContextB,
		Weight:     pick.Weight,
		Generation: pick.Gen,
		Tier:       tier,
	}
}

// DiagCandidate holds scoring details for one candidate in the ranked list.
type DiagCandidate struct {
	DstAtom  string
	AggScore float64
	BiRatio  float64
	Combined float64
	CtxSim   bool
	Tier     int32
}

// LookupAtomLinkDiag is LookupAtomLink with full candidate diagnostics.
func LookupAtomLinkDiag(idx *AtomIdx, srcLang, dstLang uint8,
	srcAtom, srcContext string, srcRole int32,
	qArch, qDisc uint8, sigmaArch, sigmaDisc float64,
) (AtomLinkResult, []DiagCandidate) {
	if idx == nil {
		return AtomLinkResult{}, nil
	}
	candidates := idx.FindBySrc(srcLang, srcAtom)
	if len(candidates) == 0 {
		return AtomLinkResult{}, nil
	}

	gauss := func(rArch, rDisc uint8) float64 {
		if sigmaArch <= 0 || sigmaDisc <= 0 {
			return 1.0
		}
		da := float64(int32(rArch) - int32(qArch))
		dd := float64(int32(rDisc) - int32(qDisc))
		exponent := (da*da)/(sigmaArch*sigmaArch) + (dd*dd)/(sigmaDisc*sigmaDisc)
		return math.Exp(-exponent)
	}

	type coordSet map[uint16]bool
	diversity := map[string]coordSet{}
	for i := range candidates {
		e := &candidates[i]
		if e.DstLang != dstLang {
			continue
		}
		if gauss(e.RArchaic, e.RDiscourse) < diversityNearThreshold {
			continue
		}
		coord := uint16(e.RArchaic)<<8 | uint16(e.RDiscourse)
		s := diversity[e.DstAtom]
		if s == nil {
			s = coordSet{}
			diversity[e.DstAtom] = s
		}
		s[coord] = true
	}

	baseScore := func(e *AtomIdxEntry) float64 {
		w := math.Log1p(float64(e.Weight))
		div := float64(len(diversity[e.DstAtom]))
		if div < 1 {
			div = 1
		}
		return w * div * gauss(e.RArchaic, e.RDiscourse)
	}

	ctxTranslation := ""
	if srcContext != "" {
		ctxCands := idx.FindBySrc(srcLang, srcContext)
		var bestCtxW float64
		for j := range ctxCands {
			cc := &ctxCands[j]
			if cc.DstLang != dstLang {
				continue
			}
			w := float64(cc.Weight) * gauss(cc.RArchaic, cc.RDiscourse)
			if w > bestCtxW {
				bestCtxW = w
				ctxTranslation = cc.DstAtom
			}
		}
	}

	biCheck := func(dstAtom string) float64 {
		backCands := idx.FindBySrc(dstLang, dstAtom)
		var best, src float64
		for j := range backCands {
			bc := &backCands[j]
			if bc.DstLang != srcLang {
				continue
			}
			w := float64(bc.Weight)
			if w > best {
				best = w
			}
			if bc.DstAtom == srcAtom && w > src {
				src = w
			}
		}
		if best <= 0 {
			return 1.0
		}
		return (src + 1) / (best + 1)
	}

	type atomAgg struct {
		bestEntry *AtomIdxEntry
		aggScore  float64
		tier      int32
	}
	tierAtoms := map[string]*atomAgg{}
	for i := range candidates {
		e := &candidates[i]
		if e.DstLang != dstLang {
			continue
		}
		s := baseScore(e)
		ti := -1
		if e.Gen == GenContexted {
			if e.ContextA == srcContext && int32(e.RoleA) == srcRole {
				ti = 0
			} else if int32(e.RoleA) == srcRole {
				ti = 1
			} else {
				ti = 2
			}
		} else if e.Gen == GenLegacy {
			ti = 3
		} else if e.Gen == GenDictionary {
			ti = 4
		}
		if ti < 0 {
			continue
		}
		key := string([]byte{byte(ti), ':'}) | e.DstAtom
		a := tierAtoms[key]
		if a == nil {
			a = &atomAgg{tier: ti}
			tierAtoms[key] = a
		}
		a.aggScore += s
		if a.bestEntry == nil || s > baseScore(a.bestEntry) {
			a.bestEntry = e
		}
	}

	type ranked struct {
		entry    *AtomIdxEntry
		aggScore float64
		dstAtom  string
	}
	var top [5][4]ranked
	for _, a := range tierAtoms {
		ti := a.tier
		s := a.aggScore
		slot := -1
		for k := 0; k < 4; k++ {
			if top[ti][k].entry == nil {
				slot = k
				break
			}
			if s > top[ti][k].aggScore {
				slot = k
				break
			}
		}
		if slot < 0 {
			continue
		}
		for k := 3; k > slot; k-- {
			top[ti][k] = top[ti][k-1]
		}
		top[ti][slot] = ranked{entry: a.bestEntry, aggScore: s, dstAtom: a.bestEntry.DstAtom}
	}

	ctxMatch := func(e *AtomIdxEntry) bool {
		return ctxTranslation != "" && e.ContextB == ctxTranslation
	}

	diagDictConfirm := func(dstAtom string) bool {
		for i := range candidates {
			e := &candidates[i]
			if e.Gen != GenDictionary || e.DstLang != dstLang {
				continue
			}
			if !dictPOSMatch(int32(e.RoleA), srcRole) {
				continue
			}
			if e.DstAtom == dstAtom {
				return true
			}
			if dstLang == 1 {
				da := e.DstAtom
				if len(dstAtom) < len(da) && da[:len(dstAtom)] == dstAtom && da[len(dstAtom)] == ' ' {
					return true
				}
				if len(da) < len(dstAtom) && dstAtom[:len(da)] == da && dstAtom[len(da)] == ' ' {
					return true
				}
			}
		}
		return false
	}

	type scoredD struct {
		entry    *AtomIdxEntry
		combined float64
		aggScore float64
		biRatio  float64
		ctxSim   bool
		tier     int32
	}
	var viable []scoredD

	for ti := 0; ti < 4; ti++ {
		for k := 0; k < 4; k++ {
			r := &top[ti][k]
			if r.entry == nil {
				continue
			}
			bi := biCheck(r.dstAtom)
			combined := r.aggScore * (bi + 0.05)
			cm := ctxMatch(r.entry)
			if cm {
				combined *= 1.5
			}
			if diagDictConfirm(r.dstAtom) {
				combined *= 1.6
			}
			viable = append(viable, scoredD{
				entry: r.entry, combined: combined,
				aggScore: r.aggScore, biRatio: bi, ctxSim: cm, tier: ti,
			})
		}
		if len(viable) > 0 {
			break
		}
	}
	if len(viable) == 0 {
		for k := 0; k < 4; k++ {
			r := &top[4][k]
			if r.entry == nil {
				continue
			}
			bi := biCheck(r.dstAtom)
			combined := r.aggScore * (bi + 0.05)
			viable = append(viable, scoredD{
				entry: r.entry, combined: combined,
				aggScore: r.aggScore, biRatio: bi, ctxSim: false, tier: 4,
			})
		}
	}

	var diag []DiagCandidate
	for _, v := range viable {
		diag = append(diag, DiagCandidate{
			DstAtom:  v.entry.DstAtom,
			AggScore: v.aggScore,
			BiRatio:  v.biRatio,
			Combined: v.combined,
			CtxSim:   v.ctxSim,
			Tier:     v.tier,
		})
	}

	if len(viable) >= 2 {
		tc0 := triConfirmStatic(idx, srcLang, dstLang, srcAtom, viable[0].entry.DstAtom)
		tc1 := triConfirmStatic(idx, srcLang, dstLang, srcAtom, viable[1].entry.DstAtom)
		if tc1 > 0 && tc0 == 0 {
			viable[1].combined *= 1.3
		} else if tc0 > 0 && tc1 == 0 {
			viable[0].combined *= 1.3
		}
	}

	var pick *AtomIdxEntry
	tier := uint8(0)
	bestCombined := 0.0
	for _, v := range viable {
		if v.combined > bestCombined {
			bestCombined = v.combined
			pick = v.entry
			tier = uint8(v.tier + 1)
		}
	}

	if pick == nil {
		return AtomLinkResult{}, diag
	}
	return AtomLinkResult{
		DstAtom:    pick.DstAtom,
		DstRole:    int32(pick.RoleB),
		DstContext: pick.ContextB,
		Weight:     pick.Weight,
		Generation: pick.Gen,
		Tier:       tier,
	}, diag
}

// triConfirmStatic is a non-counter-incrementing version for diagnostics.
func triConfirmStatic(idx *AtomIdx, srcLang, dstLang uint8, srcAtom, dstAtom string) int32 {
	intermediateLangs := [2]uint8{0x03, 0x04}
	confirms := 0
	for _, mid := range intermediateLangs {
		if mid == srcLang || mid == dstLang {
			continue
		}
		srcMid := topAtomVia(idx, srcLang, mid, srcAtom)
		if srcMid == "" {
			continue
		}
		dstMid := topAtomVia(idx, dstLang, mid, dstAtom)
		if dstMid == "" {
			continue
		}
		if srcMid == dstMid {
			confirms++
		}
	}
	return confirms
}

// topAtomVia returns the highest-weight DstAtom for srcLang->dstLang
// without full scoring. Used by triangulation to get a quick "what does
// this atom translate to via language M?" answer.
func topAtomVia(idx *AtomIdx, srcLang, dstLang uint8, srcAtom string) string {
	cands := idx.FindBySrc(srcLang, srcAtom)
	var bestAtom string
	var bestScore float64
	for i := range cands {
		e := &cands[i]
		if e.DstLang != dstLang {
			continue
		}
		da := float64(e.RArchaic)
		dd := float64(e.RDiscourse)
		g := math.Exp(-(da*da)/(DefaultSigmaArchaic*DefaultSigmaArchaic) - (dd*dd)/(DefaultSigmaDiscourse*DefaultSigmaDiscourse))
		s := float64(e.Weight) * g
		if s > bestScore {
			bestScore = s
			bestAtom = e.DstAtom
		}
	}
	return bestAtom
}

// IngestAtomLink records a word-level cross-domain correspondence.
// Words are lemmatized before storage so inflected forms collapse to region centers.
// srcRole/dstRole hint whether the word is a verb (needed for JA lemmatization).
//
// This is the legacy bilateral function (GenLegacy generation). It remains
// in place for backward compatibility and as the lookup fallback for atoms
// that have no GenContexted records yet. New ingest paths should call
// IngestContextedAtomLink instead.
func IngestAtomLink(t *Tree, srcDomain, dstDomain uint8, srcWord, dstWord string, srcRole, dstRole int32, rArch, rDisc uint8) {
	srcAtom := srcWord
	dstAtom := dstWord
	if srcDomain == 1 {
		srcAtom = LemmatizeEN(srcWord).Lemma
	} else if srcDomain == 2 {
		srcAtom = LemmatizeJA(srcWord, srcRole == HistVerb).Lemma
	}
	if dstDomain == 1 {
		dstAtom = LemmatizeEN(dstWord).Lemma
	} else if dstDomain == 2 {
		dstAtom = LemmatizeJA(dstWord, dstRole == HistVerb).Lemma
	}
	if isSingleKana(srcAtom) || isSingleKana(dstAtom) {
		return
	}
	if isJunkJAAtom(srcAtom) || isJunkJAAtom(dstAtom) {
		return
	}

	buf := []byte{:5 + len(srcAtom) + len(dstAtom):5 + len(srcAtom) + len(dstAtom)}
	buf[0] = srcDomain
	buf[1] = dstDomain
	buf[2] = 'L'
	buf[3] = rArch
	buf[4] = rDisc
	copy(buf[5:], []byte(srcAtom))
	copy(buf[5+len(srcAtom):], []byte(dstAtom))
	key := lattice.HashKey(buf)

	ri := t.LookupRecIdx(lattice.Bpragmatic, key)
	if ri != lattice.NullRec {
		t.metaInc(ri)
		return
	}
	form := srcAtom | "=" | dstAtom
	var rec lattice.Record
	t.setFormOnRec(&rec, form)
	rec.Branch = uint8(lattice.Bpragmatic)
	ri = t.db.InsertRec(lattice.Bpragmatic, key, rec)
	t.metaSet(ri, MetaEntry{Count: 1, StageTag: srcDomain})
	// GenLegacy records also carry the corpus register coord so the sidecar
	// reader can apply distance weighting to legacy candidates too.
	m := t.metaGet(ri)
	if m != nil {
		m.Extra[2] = rArch
		m.Extra[3] = rDisc
		if t.BulkMetaStore != nil {
			t.BulkMetaStore.dirty[ri] = true
		}
	}
}

func IngestDictAtomLink(t *Tree, srcDomain, dstDomain uint8, srcWord, dstWord string, srcRole, dstRole int32) {
	srcAtom := srcWord
	dstAtom := dstWord
	if srcDomain == 1 {
		srcAtom = LemmatizeEN(srcWord).Lemma
	} else if srcDomain == 2 {
		srcAtom = LemmatizeJA(srcWord, srcRole == HistVerb).Lemma
	}
	if dstDomain == 1 {
		dstAtom = LemmatizeEN(dstWord).Lemma
	} else if dstDomain == 2 {
		dstAtom = LemmatizeJA(dstWord, dstRole == HistVerb).Lemma
	}
	ingestDictAtomLinkInner(t, srcDomain, dstDomain, srcAtom, dstAtom, srcRole, dstRole)
}

func IngestDictAtomLinkRaw(t *Tree, srcDomain, dstDomain uint8, srcWord, dstWord string, srcRole, dstRole int32) {
	ingestDictAtomLinkInner(t, srcDomain, dstDomain, srcWord, dstWord, srcRole, dstRole)
}

func ingestDictAtomLinkInner(t *Tree, srcDomain, dstDomain uint8, srcAtom, dstAtom string, srcRole, dstRole int32) {
	if srcAtom == "" || dstAtom == "" {
		return
	}
	if isSingleKana(srcAtom) || isSingleKana(dstAtom) {
		return
	}
	if isJunkJAAtom(srcAtom) || isJunkJAAtom(dstAtom) {
		return
	}

	buf := []byte{:5 + len(srcAtom) + len(dstAtom):5 + len(srcAtom) + len(dstAtom)}
	buf[0] = srcDomain
	buf[1] = dstDomain
	buf[2] = 'D'
	buf[3] = uint8(srcRole)
	buf[4] = uint8(dstRole)
	copy(buf[5:], []byte(srcAtom))
	copy(buf[5+len(srcAtom):], []byte(dstAtom))
	key := lattice.HashKey(buf)

	ri := t.LookupRecIdx(lattice.Bpragmatic, key)
	if ri != lattice.NullRec {
		t.metaInc(ri)
		return
	}
	form := srcAtom | "=" | dstAtom
	var rec lattice.Record
	t.setFormOnRec(&rec, form)
	rec.Branch = uint8(lattice.Bpragmatic)
	ri = t.db.InsertRec(lattice.Bpragmatic, key, rec)
	stageTag := srcDomain | (GenDictionary << 4)
	t.metaSet(ri, MetaEntry{Count: 1, StageTag: stageTag})
	m := t.metaGet(ri)
	if m != nil {
		m.Extra[0] = uint8(dstRole)
		m.Extra[1] = dstDomain
		m.Extra[4] = uint8(srcRole)
		if t.BulkMetaStore != nil {
			t.BulkMetaStore.dirty[ri] = true
		}
	}
}