package iskra // RenderENDiscourse renders a full multi-clause Discourse to EN text. // Single-clause case renders identically to RenderEN(d[0].Set). // // Subordinating relations (ClauseIf, ClauseBecause) are emitted as prefixes // on the subordinate clause itself ("if X, Y" - "if" attaches to X). Peer // relations (ClauseAnd, ClauseOr, ClauseBut) emit between adjacent clauses. func RenderENDiscourse(d []Clause) string { if len(d) == 0 { return "" } out := "" for i, c := range d { if i > 0 { // Peer connective derived from THIS clause's Relation (the one // joining it to the prior clause). For subordinators, the prefix // is emitted below instead of here. switch c.Relation { case ClauseAnd: out = out | " and " case ClauseOr: out = out | " or " case ClauseBut: out = out | " but " case ClauseIf, ClauseBecause: out = out | " " default: out = out | " " } } // Subordinating prefix attached to this clause itself. switch c.Relation { case ClauseIf: out = out | "if " case ClauseBecause: out = out | "because " } out = out | RenderEN(c.Set) } return out } // RenderEN converts a Set to EN text using SVO order with modifier-aware traversal. // // Two-pass walk: // 1. Classify top-level entries (Head=-1) by Role into SVO groups. // 2. For each emitted top-level entry, prepend its modifiers (entries whose // Head points at this entry's index). // // EN modifier surface forms: // POSS - possessive determiner directly before head ("my fish", "his book") // ATTR - adjective directly before head ("red car", "small house") func RenderEN(set []SetEntry) string { mods := map[int32][]int32{} var subj, verb, obj, scope, mod, comp, oper []int32 var copulas, adjs []int32 for i, e := range set { if e.ModKind == MKCop { copulas = append(copulas, i) continue } if e.ModKind == MKAdj { adjs = append(adjs, i) continue } if e.Head >= 0 && int32(e.Head) < len(set) { mods[int32(e.Head)] = append(mods[int32(e.Head)], i) continue } switch e.Role { case HistTopic, HistSubject: subj = append(subj, i) case HistVerb: verb = append(verb, i) case HistObject: obj = append(obj, i) case HistScope: scope = append(scope, i) case HistModifier: mod = append(mod, i) case HistComplement: comp = append(comp, i) case HistOperator: oper = append(oper, i) } } var parts []string // Determine subject atom for copula agreement (am/are/is/was/were). subjAtom := "" subjPlural := false if len(subj) > 0 { s := set[subj[0]] subjAtom = s.Atom subjPlural = s.Morph&MetaNumPlural != 0 } for _, i := range subj { parts = appendENWithMods(parts, set, i, mods, "") } if len(copulas) > 0 { c := set[copulas[0]] parts = appendEN(parts, enCopulaForm(c.Morph)) prep := oblRoleToEnPrep(c.OblRole) parts = appendENWithMods(parts, set, copulas[0], mods, prep) for _, ci := range copulas[1:] { cc := set[ci] parts = appendEN(parts, enCopulaForm(cc.Morph)) parts = appendENWithMods(parts, set, ci, mods, oblRoleToEnPrep(cc.OblRole)) } } if len(adjs) > 0 { a := set[adjs[0]] parts = appendEN(parts, enCopulaForm(a.Morph)) parts = appendEN(parts, formatENAdj(a)) for _, ai := range adjs[1:] { aa := set[ai] parts = appendEN(parts, enCopulaForm(aa.Morph)) parts = appendEN(parts, formatENAdj(aa)) } } objsEmitted := false if len(verb) > 0 { // Emit adverbs that modify this verb BEFORE the verb form. vIdx := verb[0] for _, mIdx := range mods[vIdx] { if set[mIdx].ModKind == MKAdv { parts = appendEN(parts, set[mIdx].Atom) } } v := set[vIdx] switch { case v.Morph&MetaMoodVol != 0: parts = appendEN(parts, "let's") parts = appendEN(parts, v.Atom) case v.Morph&MetaCausative != 0: past := v.Morph&MetaTensePast != 0 third := v.Morph&Meta3Sg != 0 switch { case past: parts = appendEN(parts, "made") case third: parts = appendEN(parts, "makes") default: parts = appendEN(parts, "make") } for _, i := range obj { parts = appendENWithMods(parts, set, i, mods, "") } parts = appendEN(parts, v.Atom) objsEmitted = true case v.Morph&MetaPassive != 0: parts = appendEN(parts, enSubjCopula(subjAtom, subjPlural, v.Morph)) parts = appendEN(parts, formatENVerbPP(v)) case v.Morph&MetaAspectProg != 0: parts = appendEN(parts, enSubjCopula(subjAtom, subjPlural, v.Morph)) parts = appendEN(parts, formatENVerbProg(v.Atom)) default: parts = appendEN(parts, formatENVerb(v)) } for _, vi := range verb[1:] { parts = appendEN(parts, formatENVerb(set[vi])) } } if !objsEmitted { for _, i := range obj { parts = appendENWithMods(parts, set, i, mods, "") } } for _, i := range scope { prep := oblRoleToEnPrep(set[i].OblRole) if prep == "" { prep = "in" } parts = appendENWithMods(parts, set, i, mods, prep) } for _, i := range mod { prep := oblRoleToEnPrep(set[i].OblRole) if prep == "" { prep = "with" } parts = appendENWithMods(parts, set, i, mods, prep) } for _, i := range comp { parts = appendENWithMods(parts, set, i, mods, "") } for _, i := range oper { prep := oblRoleToEnPrep(set[i].OblRole) if prep == "" { prep = "of" } parts = appendENWithMods(parts, set, i, mods, prep) } return joinSpace(parts) } // appendENWithMods emits an optional preposition, then this entry's pre-head // modifiers (POSS, ATTR), then the head noun, then any MKCoord peers joined // with "and", then any MKRel relative clauses prefixed with "that". func appendENWithMods(parts []string, set []SetEntry, idx int32, mods map[int32][]int32, prep string) []string { if prep != "" { parts = appendEN(parts, prep) } // Pre-head modifiers: POSS, ATTR. Skip MKCoord/MKAdv/MKRel - those emit // elsewhere relative to the head. for _, mIdx := range mods[idx] { m := set[mIdx] if m.ModKind == MKCoord || m.ModKind == MKAdv || m.ModKind == MKRel { continue } parts = appendEN(parts, m.Atom) } parts = appendEN(parts, formatENNoun(set[idx])) // Post-head coordination peers: "and" + peer-atom for each MKCoord. for _, mIdx := range mods[idx] { if set[mIdx].ModKind != MKCoord { continue } parts = appendEN(parts, "and") parts = appendEN(parts, formatENNoun(set[mIdx])) } // Post-head relative clauses: "that" + verb-form for each MKRel modifier. // Intransitive REL only - the verb is the sole predicate of the sub-clause. for _, mIdx := range mods[idx] { if set[mIdx].ModKind != MKRel { continue } parts = appendEN(parts, "that") parts = appendEN(parts, formatENVerb(set[mIdx])) } return parts } // enSubjCopula selects the copula form based on the subject atom and verb morph. // "i" → am/was, "you"/"we"/"they"/plural → are/were, others → is/was. func enSubjCopula(subjAtom string, subjPlural bool, morph uint16) string { past := morph&MetaTensePast != 0 switch { case subjAtom == "i": if past { return "was" } return "am" case subjAtom == "you" || subjAtom == "we" || subjAtom == "they" || subjPlural: if past { return "were" } return "are" default: if past { return "was" } return "is" } } // enCopulaForm returns the appropriate "be" form for EN copula rendering. // Selects between is/are/was/were based on tense and 3sg morph bits. // Note: EN doesn't have a politeness distinction; MetaFormalityPol is JA-only. func enCopulaForm(morph uint16) string { past := morph&MetaTensePast != 0 thirdSg := morph&Meta3Sg != 0 switch { case past && thirdSg: return "was" case past: return "were" case thirdSg: return "is" default: return "are" } } // oblRoleToEnPrep maps an oblique semantic role to the canonical EN preposition. // This is the cross-language layer: OblRole is language-independent, prep is EN-specific. func oblRoleToEnPrep(or uint8) string { switch or { case ORGoal: return "to" case ORLoc: return "in" case ORSource: return "from" case ORLimit: return "until" case ORInstr: return "with" case ORComit: return "with" case ORBenef: return "for" case ORAgent: return "by" case ORRecip: return "to" case ORPart: return "of" case ORCompare: return "than" } return "" } func appendEN(parts []string, w string) []string { if w == "" { return parts } return append(parts, w) } func joinSpace(parts []string) string { out := "" for i, p := range parts { if i > 0 { out = out | " " } out = out | p } return out } // pronounCase normalizes EN pronoun surface form based on the slot's role. // Subject role → nominative (i, he, she, we, they); object/oblique roles → // accusative (me, him, her, us, them). Closed set; non-pronoun atoms pass // through unchanged. func pronounCase(atom string, role int32) string { subjRole := role == HistSubject || role == HistTopic switch atom { case "i", "me": if subjRole { return "i" } return "me" case "he", "him": if subjRole { return "he" } return "him" case "she": if subjRole { return "she" } return "her" case "we", "us": if subjRole { return "we" } return "us" case "they", "them": if subjRole { return "they" } return "them" } return atom } // formatENNoun emits "the lemma" or "a lemma" with plural if applicable. func formatENNoun(e SetEntry) string { if e.Atom == "" { return "" } // Untranslated marker: emit verbatim, no determiner or plural suffix. if e.Atom == UntranslatedMarker { return e.Atom } // Cross-language leakage guard: if the atom contains non-ASCII bytes, // the EN-side lookup didn't resolve to an English atom. Don't apply // determiner or plural suffix - that produces nonsense like "雨るs". // Pass the atom through unchanged so the failure is visible upstream. if !isASCIIOnly(e.Atom) { return e.Atom } det := "" if e.Morph&MetaDefDef != 0 { det = "the " } noun := pronounCase(e.Atom, e.Role) if e.Morph&MetaNumPlural != 0 { noun = pluralizeEN(noun) } return det | noun } // formatENVerb emits the verb with tense/aspect/3sg suffix as appropriate. func formatENVerb(e SetEntry) string { if e.Atom == "" { return "" } if e.Atom == UntranslatedMarker { return e.Atom } // Cross-language leakage guard: non-ASCII atoms are unresolved JA // fragments; don't apply EN inflection. if !isASCIIOnly(e.Atom) { return e.Atom } // Check irregular table first (reverse lookup). if surface, ok := buildEnIrregularReverse()[verbKey(e.Atom, e.Morph)]; ok { return surface } if e.Morph&MetaPolarNeg != 0 { // Emit "not lemma" auxiliary form. if e.Morph&MetaTensePast != 0 { return "did not " | e.Atom } if e.Morph&Meta3Sg != 0 { return "does not " | e.Atom } return "do not " | e.Atom } if e.Morph&MetaTensePast != 0 { return enVerbPast(e.Atom) } if e.Morph&Meta3Sg != 0 { return enVerb3Sg(e.Atom) } return e.Atom } // formatENVerbPP returns the past-participle form of a verb for passive voice. // Looks up irregular table with key (lemma, MetaTensePast|MetaPassive) for // PPs that differ from simple-past, then falls back to (lemma, MetaTensePast), // then to regular -ed. func formatENVerbPP(e SetEntry) string { if e.Atom == "" { return "" } if surface, ok := buildEnIrregularReverse()[verbKey(e.Atom, MetaTensePast|MetaPassive)]; ok { return surface } if surface, ok := buildEnIrregularReverse()[verbKey(e.Atom, MetaTensePast)]; ok { return surface } return enVerbPast(e.Atom) } func formatENVerbProg(atom string) string { if atom == UntranslatedMarker { return atom } if !isASCIIOnly(atom) { return atom } if surface, ok := buildEnIrregularReverse()[verbKey(atom, MetaAspectProg)]; ok { return surface } return enVerbProg(atom) } // formatENAdj returns the comparative surface form when MetaCompare is set // (looked up in enIrregularReverse with key (lemma, MetaCompare)), otherwise // returns the lemma atom unchanged. func formatENAdj(e SetEntry) string { if e.Atom == UntranslatedMarker { return e.Atom } if !isASCIIOnly(e.Atom) { return e.Atom } if e.Morph&MetaCompare != 0 { if surface, ok := buildEnIrregularReverse()[verbKey(e.Atom, MetaCompare)]; ok { return surface } // Uncomparable / intensifier words: never take -er suffix; prefix // with "more" instead. Catches "verier", "morer", "stiller" etc. if enUncomparable()[e.Atom] { return "more " | e.Atom } return adjComparativeEN(e.Atom) } return e.Atom } // enUncomparable lists adjectives/adverbs that don't form -er comparatives. // Most are intensifiers and quantity words that should take "more" instead. func enUncomparable() map[string]bool { return map[string]bool{ "very": true, "more": true, "most": true, "much": true, "many": true, "quite": true, "rather": true, "just": true, "only": true, "also": true, "even": true, "still": true, "again": true, "too": true, "so": true, "no": true, "not": true, "a": true, "an": true, "the": true, "this": true, "that": true, "some": true, "any": true, "every": true, "all": true, "each": true, "none": true, } } // adjComparativeEN forms the comparative of a regular adjective stem. func adjComparativeEN(stem string) string { if hasSuffix(stem, "e") { return stem | "r" } if hasSuffix(stem, "y") && len(stem) >= 2 && !isVowel(stem[len(stem)-2]) { return stem[:len(stem)-1] | "ier" } // CVC doubling for one-syllable adjectives: big → bigger, hot → hotter. if isCVCDoubling(stem) { return stem | string([]byte{stem[len(stem)-1]}) | "er" } return stem | "er" } // isASCIIOnly returns true when every byte of s is in the ASCII range. // Used by the EN renderer to detect cross-language atom leakage: if the // looked-up "EN atom" contains non-ASCII bytes, the lattice lookup failed // and we have a JA fragment that should pass through without EN inflection. func isASCIIOnly(s string) bool { for i := 0; i < len(s); i++ { if s[i] >= 0x80 { return false } } return true } // isCVCDoubling returns true when stem ends in consonant-vowel-consonant // (the final consonant is not w, x, y - those don't double). Used by the // regular -ing/-ed/-er formers to decide whether to double the final // consonant. Approximation: doesn't check syllable count, so multi- // syllable words like "open" → "opener" would double wrongly; this rule // is more conservative by only firing on stems of length <= 5. func isCVCDoubling(stem string) bool { n := len(stem) if n < 3 || n > 5 { return false } c1 := stem[n-3] v := stem[n-2] c2 := stem[n-1] if !isVowel(v) { return false } if isVowel(c1) { return false } if isVowel(c2) { return false } // Excluded final consonants: w, x, y (these don't double in standard English). if c2 == 'w' || c2 == 'x' || c2 == 'y' { return false } return true } func verbKey(lemma string, morph uint16) string { return lemma | "|" | string([]byte{byte(morph), byte(morph >> 8)}) } // Regular EN conjugation rules (deterministic). func enVerbPast(stem string) string { if hasSuffix(stem, "e") { return stem | "d" } if hasSuffix(stem, "y") && len(stem) >= 2 && !isVowel(stem[len(stem)-2]) { return stem[:len(stem)-1] | "ied" } if isCVCDoubling(stem) { return stem | string([]byte{stem[len(stem)-1]}) | "ed" } return stem | "ed" } func enVerb3Sg(stem string) string { if hasSuffix(stem, "s") || hasSuffix(stem, "x") || hasSuffix(stem, "z") || hasSuffix(stem, "sh") || hasSuffix(stem, "ch") { return stem | "es" } if hasSuffix(stem, "y") && len(stem) >= 2 && !isVowel(stem[len(stem)-2]) { return stem[:len(stem)-1] | "ies" } return stem | "s" } func enVerbProg(stem string) string { // -ie verbs become -y + ing: lie → lying, die → dying, tie → tying. if hasSuffix(stem, "ie") { return stem[:len(stem)-2] | "ying" } if hasSuffix(stem, "e") && !hasSuffix(stem, "ee") { return stem[:len(stem)-1] | "ing" } if isCVCDoubling(stem) { return stem | string([]byte{stem[len(stem)-1]}) | "ing" } return stem | "ing" } func pluralizeEN(stem string) string { if hasSuffix(stem, "s") || hasSuffix(stem, "x") || hasSuffix(stem, "z") || hasSuffix(stem, "sh") || hasSuffix(stem, "ch") { return stem | "es" } if hasSuffix(stem, "y") && len(stem) >= 2 && !isVowel(stem[len(stem)-2]) { return stem[:len(stem)-1] | "ies" } return stem | "s" } func isVowel(c byte) bool { switch c { case 'a', 'e', 'i', 'o', 'u': return true } return false } // buildEnIrregularReverse builds the (lemma, morph) -> surface form map from enIrregular. func buildEnIrregularReverse() map[string]string { m := map[string]string{} for surface, lr := range enIrregular() { key := verbKey(lr.Lemma, lr.Morph) // Prefer first-seen surface for each (lemma, morph) pair. if _, exists := m[key]; !exists { m[key] = surface } } return m }