package iskra

// RenderENDiscourse renders a full multi-clause Discourse to EN text.
// Single-clause case renders identically to RenderEN(d[0].Set).
//
// Subordinating relations (ClauseIf, ClauseBecause) are emitted as prefixes
// on the subordinate clause itself ("if X, Y" - "if" attaches to X). Peer
// relations (ClauseAnd, ClauseOr, ClauseBut) emit between adjacent clauses.
func RenderENDiscourse(d []Clause) string {
	if len(d) == 0 {
		return ""
	}
	out := ""
	for i, c := range d {
		if i > 0 {
			// Peer connective derived from THIS clause's Relation (the one
			// joining it to the prior clause). For subordinators, the prefix
			// is emitted below instead of here.
			switch c.Relation {
			case ClauseAnd:
				out = out | " and "
			case ClauseOr:
				out = out | " or "
			case ClauseBut:
				out = out | " but "
			case ClauseIf, ClauseBecause:
				out = out | " "
			default:
				out = out | " "
			}
		}
		// Subordinating prefix attached to this clause itself.
		switch c.Relation {
		case ClauseIf:
			out = out | "if "
		case ClauseBecause:
			out = out | "because "
		}
		out = out | RenderEN(c.Set)
	}
	return out
}

// RenderEN converts a Set to EN text using SVO order with modifier-aware traversal.
//
// Two-pass walk:
//   1. Classify top-level entries (Head=-1) by Role into SVO groups.
//   2. For each emitted top-level entry, prepend its modifiers (entries whose
//      Head points at this entry's index).
//
// EN modifier surface forms:
//   POSS - possessive determiner directly before head ("my fish", "his book")
//   ATTR - adjective directly before head ("red car", "small house")
func RenderEN(set []SetEntry) string {
	mods := map[int32][]int32{}
	var subj, verb, obj, scope, mod, comp, oper []int32
	var copulas, adjs []int32
	for i, e := range set {
		if e.ModKind == MKCop {
			copulas = append(copulas, i)
			continue
		}
		if e.ModKind == MKAdj {
			adjs = append(adjs, i)
			continue
		}
		if e.Head >= 0 && int32(e.Head) < len(set) {
			mods[int32(e.Head)] = append(mods[int32(e.Head)], i)
			continue
		}
		switch e.Role {
		case HistTopic, HistSubject:
			subj = append(subj, i)
		case HistVerb:
			verb = append(verb, i)
		case HistObject:
			obj = append(obj, i)
		case HistScope:
			scope = append(scope, i)
		case HistModifier:
			mod = append(mod, i)
		case HistComplement:
			comp = append(comp, i)
		case HistOperator:
			oper = append(oper, i)
		}
	}

	var parts []string

	// Determine subject atom for copula agreement (am/are/is/was/were).
	subjAtom := ""
	subjPlural := false
	if len(subj) > 0 {
		s := set[subj[0]]
		subjAtom = s.Atom
		subjPlural = s.Morph&MetaNumPlural != 0
	}

	for _, i := range subj {
		parts = appendENWithMods(parts, set, i, mods, "")
	}

	if len(copulas) > 0 {
		c := set[copulas[0]]
		parts = appendEN(parts, enCopulaForm(c.Morph))
		prep := oblRoleToEnPrep(c.OblRole)
		parts = appendENWithMods(parts, set, copulas[0], mods, prep)
		for _, ci := range copulas[1:] {
			cc := set[ci]
			parts = appendEN(parts, enCopulaForm(cc.Morph))
			parts = appendENWithMods(parts, set, ci, mods, oblRoleToEnPrep(cc.OblRole))
		}
	}

	if len(adjs) > 0 {
		a := set[adjs[0]]
		parts = appendEN(parts, enCopulaForm(a.Morph))
		parts = appendEN(parts, formatENAdj(a))
		for _, ai := range adjs[1:] {
			aa := set[ai]
			parts = appendEN(parts, enCopulaForm(aa.Morph))
			parts = appendEN(parts, formatENAdj(aa))
		}
	}

	objsEmitted := false
	if len(verb) > 0 {
		// Emit adverbs that modify this verb BEFORE the verb form.
		vIdx := verb[0]
		for _, mIdx := range mods[vIdx] {
			if set[mIdx].ModKind == MKAdv {
				parts = appendEN(parts, set[mIdx].Atom)
			}
		}
		v := set[vIdx]
		switch {
		case v.Morph&MetaMoodVol != 0:
			parts = appendEN(parts, "let's")
			parts = appendEN(parts, v.Atom)
		case v.Morph&MetaCausative != 0:
			past := v.Morph&MetaTensePast != 0
			third := v.Morph&Meta3Sg != 0
			switch {
			case past:
				parts = appendEN(parts, "made")
			case third:
				parts = appendEN(parts, "makes")
			default:
				parts = appendEN(parts, "make")
			}
			for _, i := range obj {
				parts = appendENWithMods(parts, set, i, mods, "")
			}
			parts = appendEN(parts, v.Atom)
			objsEmitted = true
		case v.Morph&MetaPassive != 0:
			parts = appendEN(parts, enSubjCopula(subjAtom, subjPlural, v.Morph))
			parts = appendEN(parts, formatENVerbPP(v))
		case v.Morph&MetaAspectProg != 0:
			parts = appendEN(parts, enSubjCopula(subjAtom, subjPlural, v.Morph))
			parts = appendEN(parts, formatENVerbProg(v.Atom))
		default:
			parts = appendEN(parts, formatENVerb(v))
		}
		for _, vi := range verb[1:] {
			parts = appendEN(parts, formatENVerb(set[vi]))
		}
	}

	if !objsEmitted {
		for _, i := range obj {
			parts = appendENWithMods(parts, set, i, mods, "")
		}
	}
	for _, i := range scope {
		prep := oblRoleToEnPrep(set[i].OblRole)
		if prep == "" {
			prep = "in"
		}
		parts = appendENWithMods(parts, set, i, mods, prep)
	}
	for _, i := range mod {
		prep := oblRoleToEnPrep(set[i].OblRole)
		if prep == "" {
			prep = "with"
		}
		parts = appendENWithMods(parts, set, i, mods, prep)
	}
	for _, i := range comp {
		parts = appendENWithMods(parts, set, i, mods, "")
	}
	for _, i := range oper {
		prep := oblRoleToEnPrep(set[i].OblRole)
		if prep == "" {
			prep = "of"
		}
		parts = appendENWithMods(parts, set, i, mods, prep)
	}

	return joinSpace(parts)
}

// appendENWithMods emits an optional preposition, then this entry's pre-head
// modifiers (POSS, ATTR), then the head noun, then any MKCoord peers joined
// with "and", then any MKRel relative clauses prefixed with "that".
func appendENWithMods(parts []string, set []SetEntry, idx int32, mods map[int32][]int32, prep string) []string {
	if prep != "" {
		parts = appendEN(parts, prep)
	}
	// Pre-head modifiers: POSS, ATTR. Skip MKCoord/MKAdv/MKRel - those emit
	// elsewhere relative to the head.
	for _, mIdx := range mods[idx] {
		m := set[mIdx]
		if m.ModKind == MKCoord || m.ModKind == MKAdv || m.ModKind == MKRel {
			continue
		}
		parts = appendEN(parts, m.Atom)
	}
	parts = appendEN(parts, formatENNoun(set[idx]))
	// Post-head coordination peers: "and" + peer-atom for each MKCoord.
	for _, mIdx := range mods[idx] {
		if set[mIdx].ModKind != MKCoord {
			continue
		}
		parts = appendEN(parts, "and")
		parts = appendEN(parts, formatENNoun(set[mIdx]))
	}
	// Post-head relative clauses: "that" + verb-form for each MKRel modifier.
	// Intransitive REL only - the verb is the sole predicate of the sub-clause.
	for _, mIdx := range mods[idx] {
		if set[mIdx].ModKind != MKRel {
			continue
		}
		parts = appendEN(parts, "that")
		parts = appendEN(parts, formatENVerb(set[mIdx]))
	}
	return parts
}

// enSubjCopula selects the copula form based on the subject atom and verb morph.
// "i" → am/was, "you"/"we"/"they"/plural → are/were, others → is/was.
func enSubjCopula(subjAtom string, subjPlural bool, morph uint16) string {
	past := morph&MetaTensePast != 0
	switch {
	case subjAtom == "i":
		if past {
			return "was"
		}
		return "am"
	case subjAtom == "you" || subjAtom == "we" || subjAtom == "they" || subjPlural:
		if past {
			return "were"
		}
		return "are"
	default:
		if past {
			return "was"
		}
		return "is"
	}
}

// enCopulaForm returns the appropriate "be" form for EN copula rendering.
// Selects between is/are/was/were based on tense and 3sg morph bits.
// Note: EN doesn't have a politeness distinction; MetaFormalityPol is JA-only.
func enCopulaForm(morph uint16) string {
	past := morph&MetaTensePast != 0
	thirdSg := morph&Meta3Sg != 0
	switch {
	case past && thirdSg:
		return "was"
	case past:
		return "were"
	case thirdSg:
		return "is"
	default:
		return "are"
	}
}

// oblRoleToEnPrep maps an oblique semantic role to the canonical EN preposition.
// This is the cross-language layer: OblRole is language-independent, prep is EN-specific.
func oblRoleToEnPrep(or uint8) string {
	switch or {
	case ORGoal:
		return "to"
	case ORLoc:
		return "in"
	case ORSource:
		return "from"
	case ORLimit:
		return "until"
	case ORInstr:
		return "with"
	case ORComit:
		return "with"
	case ORBenef:
		return "for"
	case ORAgent:
		return "by"
	case ORRecip:
		return "to"
	case ORPart:
		return "of"
	case ORCompare:
		return "than"
	}
	return ""
}

func appendEN(parts []string, w string) []string {
	if w == "" {
		return parts
	}
	return append(parts, w)
}

func joinSpace(parts []string) string {
	out := ""
	for i, p := range parts {
		if i > 0 {
			out = out | " "
		}
		out = out | p
	}
	return out
}

// pronounCase normalizes EN pronoun surface form based on the slot's role.
// Subject role → nominative (i, he, she, we, they); object/oblique roles →
// accusative (me, him, her, us, them). Closed set; non-pronoun atoms pass
// through unchanged.
func pronounCase(atom string, role int32) string {
	subjRole := role == HistSubject || role == HistTopic
	switch atom {
	case "i", "me":
		if subjRole {
			return "i"
		}
		return "me"
	case "he", "him":
		if subjRole {
			return "he"
		}
		return "him"
	case "she":
		if subjRole {
			return "she"
		}
		return "her"
	case "we", "us":
		if subjRole {
			return "we"
		}
		return "us"
	case "they", "them":
		if subjRole {
			return "they"
		}
		return "them"
	}
	return atom
}

// formatENNoun emits "the lemma" or "a lemma" with plural if applicable.
func formatENNoun(e SetEntry) string {
	if e.Atom == "" {
		return ""
	}
	// Untranslated marker: emit verbatim, no determiner or plural suffix.
	if e.Atom == UntranslatedMarker {
		return e.Atom
	}
	// Cross-language leakage guard: if the atom contains non-ASCII bytes,
	// the EN-side lookup didn't resolve to an English atom. Don't apply
	// determiner or plural suffix - that produces nonsense like "雨るs".
	// Pass the atom through unchanged so the failure is visible upstream.
	if !isASCIIOnly(e.Atom) {
		return e.Atom
	}
	det := ""
	if e.Morph&MetaDefDef != 0 {
		det = "the "
	}
	noun := pronounCase(e.Atom, e.Role)
	if e.Morph&MetaNumPlural != 0 {
		noun = pluralizeEN(noun)
	}
	return det | noun
}

// formatENVerb emits the verb with tense/aspect/3sg suffix as appropriate.
func formatENVerb(e SetEntry) string {
	if e.Atom == "" {
		return ""
	}
	if e.Atom == UntranslatedMarker {
		return e.Atom
	}
	// Cross-language leakage guard: non-ASCII atoms are unresolved JA
	// fragments; don't apply EN inflection.
	if !isASCIIOnly(e.Atom) {
		return e.Atom
	}
	// Check irregular table first (reverse lookup).
	if surface, ok := buildEnIrregularReverse()[verbKey(e.Atom, e.Morph)]; ok {
		return surface
	}
	if e.Morph&MetaPolarNeg != 0 {
		// Emit "not lemma" auxiliary form.
		if e.Morph&MetaTensePast != 0 {
			return "did not " | e.Atom
		}
		if e.Morph&Meta3Sg != 0 {
			return "does not " | e.Atom
		}
		return "do not " | e.Atom
	}
	if e.Morph&MetaTensePast != 0 {
		return enVerbPast(e.Atom)
	}
	if e.Morph&Meta3Sg != 0 {
		return enVerb3Sg(e.Atom)
	}
	return e.Atom
}

// formatENVerbPP returns the past-participle form of a verb for passive voice.
// Looks up irregular table with key (lemma, MetaTensePast|MetaPassive) for
// PPs that differ from simple-past, then falls back to (lemma, MetaTensePast),
// then to regular -ed.
func formatENVerbPP(e SetEntry) string {
	if e.Atom == "" {
		return ""
	}
	if surface, ok := buildEnIrregularReverse()[verbKey(e.Atom, MetaTensePast|MetaPassive)]; ok {
		return surface
	}
	if surface, ok := buildEnIrregularReverse()[verbKey(e.Atom, MetaTensePast)]; ok {
		return surface
	}
	return enVerbPast(e.Atom)
}

func formatENVerbProg(atom string) string {
	if atom == UntranslatedMarker {
		return atom
	}
	if !isASCIIOnly(atom) {
		return atom
	}
	if surface, ok := buildEnIrregularReverse()[verbKey(atom, MetaAspectProg)]; ok {
		return surface
	}
	return enVerbProg(atom)
}

// formatENAdj returns the comparative surface form when MetaCompare is set
// (looked up in enIrregularReverse with key (lemma, MetaCompare)), otherwise
// returns the lemma atom unchanged.
func formatENAdj(e SetEntry) string {
	if e.Atom == UntranslatedMarker {
		return e.Atom
	}
	if !isASCIIOnly(e.Atom) {
		return e.Atom
	}
	if e.Morph&MetaCompare != 0 {
		if surface, ok := buildEnIrregularReverse()[verbKey(e.Atom, MetaCompare)]; ok {
			return surface
		}
		// Uncomparable / intensifier words: never take -er suffix; prefix
		// with "more" instead. Catches "verier", "morer", "stiller" etc.
		if enUncomparable()[e.Atom] {
			return "more " | e.Atom
		}
		return adjComparativeEN(e.Atom)
	}
	return e.Atom
}

// enUncomparable lists adjectives/adverbs that don't form -er comparatives.
// Most are intensifiers and quantity words that should take "more" instead.
func enUncomparable() map[string]bool {
	return map[string]bool{
		"very": true, "more": true, "most": true, "much": true, "many": true,
		"quite": true, "rather": true, "just": true, "only": true,
		"also": true, "even": true, "still": true, "again": true,
		"too": true, "so": true, "no": true, "not": true,
		"a": true, "an": true, "the": true, "this": true, "that": true,
		"some": true, "any": true, "every": true, "all": true, "each": true,
		"none": true,
	}
}

// adjComparativeEN forms the comparative of a regular adjective stem.
func adjComparativeEN(stem string) string {
	if hasSuffix(stem, "e") {
		return stem | "r"
	}
	if hasSuffix(stem, "y") && len(stem) >= 2 && !isVowel(stem[len(stem)-2]) {
		return stem[:len(stem)-1] | "ier"
	}
	// CVC doubling for one-syllable adjectives: big → bigger, hot → hotter.
	if isCVCDoubling(stem) {
		return stem | string([]byte{stem[len(stem)-1]}) | "er"
	}
	return stem | "er"
}

// isASCIIOnly returns true when every byte of s is in the ASCII range.
// Used by the EN renderer to detect cross-language atom leakage: if the
// looked-up "EN atom" contains non-ASCII bytes, the lattice lookup failed
// and we have a JA fragment that should pass through without EN inflection.
func isASCIIOnly(s string) bool {
	for i := 0; i < len(s); i++ {
		if s[i] >= 0x80 {
			return false
		}
	}
	return true
}

// isCVCDoubling returns true when stem ends in consonant-vowel-consonant
// (the final consonant is not w, x, y - those don't double). Used by the
// regular -ing/-ed/-er formers to decide whether to double the final
// consonant. Approximation: doesn't check syllable count, so multi-
// syllable words like "open" → "opener" would double wrongly; this rule
// is more conservative by only firing on stems of length <= 5.
func isCVCDoubling(stem string) bool {
	n := len(stem)
	if n < 3 || n > 5 {
		return false
	}
	c1 := stem[n-3]
	v := stem[n-2]
	c2 := stem[n-1]
	if !isVowel(v) {
		return false
	}
	if isVowel(c1) {
		return false
	}
	if isVowel(c2) {
		return false
	}
	// Excluded final consonants: w, x, y (these don't double in standard English).
	if c2 == 'w' || c2 == 'x' || c2 == 'y' {
		return false
	}
	return true
}

func verbKey(lemma string, morph uint16) string {
	return lemma | "|" | string([]byte{byte(morph), byte(morph >> 8)})
}

// Regular EN conjugation rules (deterministic).
func enVerbPast(stem string) string {
	if hasSuffix(stem, "e") {
		return stem | "d"
	}
	if hasSuffix(stem, "y") && len(stem) >= 2 && !isVowel(stem[len(stem)-2]) {
		return stem[:len(stem)-1] | "ied"
	}
	if isCVCDoubling(stem) {
		return stem | string([]byte{stem[len(stem)-1]}) | "ed"
	}
	return stem | "ed"
}

func enVerb3Sg(stem string) string {
	if hasSuffix(stem, "s") || hasSuffix(stem, "x") || hasSuffix(stem, "z") ||
		hasSuffix(stem, "sh") || hasSuffix(stem, "ch") {
		return stem | "es"
	}
	if hasSuffix(stem, "y") && len(stem) >= 2 && !isVowel(stem[len(stem)-2]) {
		return stem[:len(stem)-1] | "ies"
	}
	return stem | "s"
}

func enVerbProg(stem string) string {
	// -ie verbs become -y + ing: lie → lying, die → dying, tie → tying.
	if hasSuffix(stem, "ie") {
		return stem[:len(stem)-2] | "ying"
	}
	if hasSuffix(stem, "e") && !hasSuffix(stem, "ee") {
		return stem[:len(stem)-1] | "ing"
	}
	if isCVCDoubling(stem) {
		return stem | string([]byte{stem[len(stem)-1]}) | "ing"
	}
	return stem | "ing"
}

func pluralizeEN(stem string) string {
	if hasSuffix(stem, "s") || hasSuffix(stem, "x") || hasSuffix(stem, "z") ||
		hasSuffix(stem, "sh") || hasSuffix(stem, "ch") {
		return stem | "es"
	}
	if hasSuffix(stem, "y") && len(stem) >= 2 && !isVowel(stem[len(stem)-2]) {
		return stem[:len(stem)-1] | "ies"
	}
	return stem | "s"
}

func isVowel(c byte) bool {
	switch c {
	case 'a', 'e', 'i', 'o', 'u':
		return true
	}
	return false
}

// buildEnIrregularReverse builds the (lemma, morph) -> surface form map from enIrregular.
func buildEnIrregularReverse() map[string]string {
	m := map[string]string{}
	for surface, lr := range enIrregular() {
		key := verbKey(lr.Lemma, lr.Morph)
		// Prefer first-seen surface for each (lemma, morph) pair.
		if _, exists := m[key]; !exists {
			m[key] = surface
		}
	}
	return m
}