package iskra // RenderJA converts a Set (list of SetEntry) back to JA text. // The renderer is deterministic: same Set always produces the same JA output. // // Strategy: // - For non-verb slots: emit atom + particle (from Mark if preserved, else // from Role default). // - For verb slots: emit atom + conjugation suffix derived from Class + Morph. // - Iteration order matches the Set order (preserves slot positions). // RenderJADiscourse renders a multi-clause Discourse to JA text. // Single-clause case renders identically to RenderJA(d[0].Set). // // Subordinating prefixes (もし for ClauseIf, なぜなら for ClauseBecause) // attach to the subordinate clause itself. Peer connectives (、 for AND/OR/ // BUT) join adjacent clauses. func RenderJADiscourse(d []Clause) string { if len(d) == 0 { return "" } out := "" for i, c := range d { if i > 0 { switch c.Relation { case ClauseAnd: out = out | "、" case ClauseOr: out = out | "、または" case ClauseBut: out = out | "、しかし" case ClauseIf, ClauseBecause: out = out | "、" default: out = out | "、" } } // Subordinating prefix on this clause. switch c.Relation { case ClauseIf: out = out | "もし" case ClauseBecause: out = out | "なぜなら" } out = out | RenderJA(c.Set) } return out } // RenderJA emits the Set as JA text in SOV order with modifier-aware traversal. // // Three groups, emitted in order: // 1. Top-level non-verb arguments (with their POSS/ATTR modifiers) // 2. Top-level verbs (with their modifiers) // 3. Copular predicates (MKCop entries) - emitted last as sentence-final // predicate with the copula form (だ/だった/です/でした) per Morph. // // POSS/ATTR modifiers attach to non-verb/verb heads (Head points at head). // MKCop entries have Head pointing at the subject but render at end of clause. func RenderJA(set []SetEntry) string { var topNonVerbs, topVerbs, copulas, adjs []int32 mods := map[int32][]int32{} // non-COP/ADJ modifiers: headIdx -> [modIdx, ...] for i, e := range set { if e.ModKind == MKCop { copulas = append(copulas, i) continue } if e.ModKind == MKAdj { adjs = append(adjs, i) continue } if e.Head >= 0 && int32(e.Head) < len(set) { mods[int32(e.Head)] = append(mods[int32(e.Head)], i) continue } if e.Role == HistVerb { topVerbs = append(topVerbs, i) } else { topNonVerbs = append(topNonVerbs, i) } } // Pair each copula/adj predicate with its nearest preceding subject. // Head indices are unreliable in flattened multi-clause sets, so we // assign subjects to predicates by proximity: each predicate claims // the closest unclaimed subject that precedes it in set order. predSubj := map[int32]int32{} // predicate idx -> subject idx usedSubj := map[int32]bool{} allPreds := []int32{:0:len(copulas)+len(adjs)} allPreds = append(allPreds, copulas...) allPreds = append(allPreds, adjs...) for _, pidx := range allPreds { best := -1 for _, sidx := range topNonVerbs { if sidx >= pidx { break } r := set[sidx].Role if (r == HistSubject || r == HistTopic) && !usedSubj[sidx] { best = sidx } } if best >= 0 { predSubj[pidx] = best usedSubj[best] = true } } var out []byte sepIfNeeded := func() { if len(out) > 0 && !endsWithJAParticle(out) && out[len(out)-1] != ' ' { out = append(out, ' ') } } clauseSep := func() { if len(out) > 0 { out = append(out, 0xe3, 0x80, 0x81) // 、 } } for _, idx := range topNonVerbs { if usedSubj[idx] { continue } sepIfNeeded() emitJANonVerbWithMods(&out, set, idx, mods) } for _, idx := range topVerbs { sepIfNeeded() emitJAVerbWithMods(&out, set, idx, mods) } for i, idx := range copulas { if i > 0 { clauseSep() } else { sepIfNeeded() } if sidx, ok := predSubj[idx]; ok { emitJANonVerbWithMods(&out, set, sidx, mods) } emitJACopula(&out, set, idx, mods) } for i, idx := range adjs { if i > 0 || len(copulas) > 0 { clauseSep() } else { sepIfNeeded() } if sidx, ok := predSubj[idx]; ok { emitJANonVerbWithMods(&out, set, sidx, mods) } emitJAPredAdj(&out, set, idx, mods) } return string(out) } // emitJAPredAdj emits a predicative i-adjective as the sentence-final // predicate. The atom is normally the full i-adj form (面白い); when an // EN→JA translation returns a stem without the い suffix (e.g. red→赤), // append い to restore the predicative-adjective surface. Past/negative // forms would replace い with かった/くない but only non-past is handled. // Synthetic 3sg marker (◯) appended after the adjective. func emitJAPredAdj(out *[]byte, set []SetEntry, idx int32, mods map[int32][]int32) { emitJAModifiers(out, set, idx, mods) e := set[idx] *out = append(*out, []byte(e.Atom)...) if !endsInIKana(e.Atom) { *out = append(*out, 0xe3, 0x81, 0x84) // い } if e.Morph&Meta3Sg != 0 { *out = append(*out, []byte(markerToJA()[Mk3Sg])...) } } // emitJACopula emits a copular predicate: complement_atom + copula form + // trailing morph markers. The morph markers MUST come AFTER the copula form, // not between atom and copula, to avoid splitting the copula compound on // re-extraction (学生◯だ would tokenize as [学生, ◯, だ] with だ as a // separate slot; 学生だ◯ tokenizes as [学生だ, ◯] which strips cleanly). // // Locative variant: when the MKCop entry carries OblRole=ORLoc, emit as // 〜の中にいる (existence-locative) instead of 〜だ (nominal copula). func emitJACopula(out *[]byte, set []SetEntry, idx int32, mods map[int32][]int32) { emitJAModifiers(out, set, idx, mods) e := set[idx] // Definiteness and plural markers belong with the noun (before copula). *out = append(*out, []byte(e.Atom)...) if e.Morph&MetaDefDef != 0 { *out = append(*out, []byte(markerToJA()[MkDef])...) } if e.Morph&MetaNumPlural != 0 { *out = append(*out, []byte(markerToJA()[MkPlural])...) } if e.OblRole == ORLoc { // Locative-existence: 〜の中に + いる. 中 = E4 B8 AD, に = E3 81 AB, // いる = E3 81 84 E3 82 8B. *out = append(*out, 0xe3, 0x81, 0xae) // の *out = append(*out, 0xe4, 0xb8, 0xad) // 中 *out = append(*out, 0xe3, 0x81, 0xab) // に *out = append(*out, 0xe3, 0x81, 0x84, 0xe3, 0x82, 0x8b) // いる } else { *out = append(*out, []byte(jaCopulaForm(e.Morph))...) } // 3sg agreement marker AFTER the copula form so it doesn't split it. if e.Morph&Meta3Sg != 0 { *out = append(*out, []byte(markerToJA()[Mk3Sg])...) } } func jaCopulaForm(morph uint16) string { past := morph&MetaTensePast != 0 pol := morph&MetaFormalityPol != 0 switch { case past && pol: return "\xe3\x81\xa7\xe3\x81\x97\xe3\x81\x9f" // でした case past: return "\xe3\x81\xa0\xe3\x81\xa3\xe3\x81\x9f" // だった case pol: return "\xe3\x81\xa7\xe3\x81\x99" // です default: return "\xe3\x81\xa0" // だ } } // emitJAModifiers emits pre-head modifiers (POSS, ATTR, ADV) of the entry at // idx with the appropriate relation marker. MKCoord peers are emitted by the // caller AFTER the head, so they're skipped here. func emitJAModifiers(out *[]byte, set []SetEntry, idx int32, mods map[int32][]int32) { for _, mIdx := range mods[idx] { m := set[mIdx] if m.ModKind == MKCoord { continue } // MKRel: modifier is a verb forming a relative clause. JA renders the // conjugated verb BEFORE the head noun with no particle between. The // verb's morph drives the surface form; no synthetic morph markers // (★/☆/◯) because those would split the conjugation boundary. if m.ModKind == MKRel { form := renderJAVerb(m.Atom, m.Class, m.Morph) *out = append(*out, []byte(form)...) continue } *out = append(*out, []byte(m.Atom)...) emitMorphMarkersJA(out, m.Morph) switch m.ModKind { case MKPoss: *out = append(*out, []byte(markerToJA()[MkNo])...) // の case MKAttr: // い-adjective sits directly before the head, no particle. case MKAdv: // Adverbial: space delimiter so tokenizer separates adv from verb. *out = append(*out, ' ') } } } func emitJANonVerbWithMods(out *[]byte, set []SetEntry, idx int32, mods map[int32][]int32) { emitJAModifiers(out, set, idx, mods) e := set[idx] *out = append(*out, []byte(e.Atom)...) // Skip morph markers + particles when the atom is untranslated or // foreign-language leak; those would be meaningless decorations on a // placeholder. Particle is still emitted to preserve sentence shape. if e.Atom == UntranslatedMarker || containsASCIIByte(e.Atom) { // Still emit the particle so the sentence parses on round-trip. particle := "" if e.Mark != 0 && e.Mark <= 16 && e.Mark != MkTo { particle = markerToJA()[e.Mark] } if particle == "" && e.OblRole != ORNone { particle = oblRoleToJaParticle(e.OblRole) } if particle == "" { particle = renderJAParticle(e.Role, e.Mark) } if particle != "" { *out = append(*out, []byte(particle)...) } return } emitMorphMarkersJA(out, e.Morph) // Coordination peers: emit と + peer-atom for each MKCoord modifier of // this head. The peer's own morph markers stick to its atom; the head's // particle is emitted AFTER all coord members so it scopes the whole group. for _, mIdx := range mods[idx] { if set[mIdx].ModKind != MKCoord { continue } peer := set[mIdx] *out = append(*out, []byte(markerToJA()[MkTo])...) // と *out = append(*out, []byte(peer.Atom)...) emitMorphMarkersJA(out, peer.Morph) } // Priority for particle selection: Mark first, then OblRole, then Role default. // Mark==MkTo on the head is the coordination signal (consumed above), so // fall through to OblRole or Role for the actual scope-particle. var particle string if e.Mark != 0 && e.Mark <= 16 && e.Mark != MkTo { particle = markerToJA()[e.Mark] } if particle == "" && e.OblRole != ORNone { particle = oblRoleToJaParticle(e.OblRole) } if particle == "" { particle = renderJAParticle(e.Role, e.Mark) } if particle != "" { *out = append(*out, []byte(particle)...) } } func emitJAVerbWithMods(out *[]byte, set []SetEntry, idx int32, mods map[int32][]int32) { emitJAModifiers(out, set, idx, mods) e := set[idx] form := renderJAVerb(e.Atom, e.Class, e.Morph) *out = append(*out, []byte(form)...) // 3sg marker emitted via emitMorphMarkersJA semantics, but verb-side // places it AFTER the conjugated form rather than between atom and particle. if e.Morph&Meta3Sg != 0 { *out = append(*out, []byte(markerToJA()[Mk3Sg])...) } if isSentenceFinalParticle(e.Mark) { *out = append(*out, []byte(markerToJAString(e.Mark))...) } } // emitMorphMarkersJA appends synthetic morph markers (★/☆/◯) to the output // for morph bits that have no native JA surface form. EN-only features like // definiteness, plural, and 3sg agreement need explicit markers in JA to // survive the round-trip. func emitMorphMarkersJA(out *[]byte, morph uint16) { if morph&MetaDefDef != 0 { *out = append(*out, []byte(markerToJA()[MkDef])...) } if morph&MetaNumPlural != 0 { *out = append(*out, []byte(markerToJA()[MkPlural])...) } if morph&Meta3Sg != 0 { *out = append(*out, []byte(markerToJA()[Mk3Sg])...) } } func isSentenceFinalParticle(mk uint8) bool { switch mk { case MkYo, MkNe, MkKa, MkYori, MkKedo, MkMo: return true } return false } // renderJAParticle picks the JA particle for a slot. // Priority: OblRole (semantic) > Mark (within-language preserved) > Role default. func renderJAParticle(role int32, mark uint8) string { if mark != 0 && mark <= 16 { s := markerToJA()[mark] if s != "" { return s } } switch role { case HistTopic: return "\xe3\x81\xaf" // は case HistSubject: return "\xe3\x81\x8c" // が case HistObject: return "\xe3\x82\x92" // を case HistScope: return "\xe3\x81\xab" // に case HistModifier: return "\xe3\x81\xa7" // で case HistOperator: return "\xe3\x81\xae" // の case HistComplement: return "" } return "" } // oblRoleToJaParticle maps an oblique semantic role to the canonical JA particle. func oblRoleToJaParticle(or uint8) string { switch or { case ORGoal: return "\xe3\x81\xab" // に (motion goal) case ORLoc: return "\xe3\x81\xa7" // で (location of action) case ORSource: return "\xe3\x81\x8b\xe3\x82\x89" // から case ORLimit: return "\xe3\x81\xbe\xe3\x81\xa7" // まで case ORInstr: return "\xe3\x81\xa7" // で (instrumental) case ORComit: return "\xe3\x81\xa8" // と case ORBenef: return "\xe3\x81\xab" // に (default; could be のために) case ORAgent: return "\xe3\x81\xab" // に (passive agent) case ORRecip: return "\xe3\x81\xab" // に (dative) case ORPart: return "\xe3\x81\xae" // の case ORCompare: return "\xe3\x82\x88\xe3\x82\x8a" // より } return "" } func endsWithJAParticle(b []byte) bool { n := len(b) if n < 3 { return false } for i := 1; i < len(markerToJA()); i++ { p := markerToJA()[i] if p == "" { continue } pb := []byte(p) if n >= len(pb) && string(b[n-len(pb):]) == p { return true } } return false } func markerToJAString(mk uint8) string { if mk >= 1 && mk <= 16 { return markerToJA()[mk] } return "" } // renderJAVerb reconstructs the verb surface form from stem, class, and morph. // Class=0 means "no class detected" - this happens routinely for atoms // returned by cross-language translation (the lattice atom-link table // carries the lemma but not its class). Recover by inferring class from // the lemma's final character + a small lookup table of common verbs. // Falling back to godan-ru as default for unknown kanji stems produces // a parseable surface form that round-trips, even when the specific // class is wrong. func renderJAVerb(stem string, class uint8, morph uint16) string { if stem == UntranslatedMarker { return stem } if containsASCIIByte(stem) { return stem } if class == 0 { class = inferJAClass(stem) } if class == VClassBare { b := []byte(stem) if endsInIKana(stem) && len(b) > 3 && hasKanji(b) { return renderJAIAdjVerbSlot(stem, morph) } return stem } if class == VClassSuru { return conjugateSuru(stem, morph) } if class == VClassKuru { return conjugateKuru(stem, morph) } // Godan dict-form lemmas (行く, 読む, etc.) include the u-row ending. // Strip it before conjugating to avoid double-suffix (行く+く = 行くく). stem = stripGodanDictEnding(stem, class) connector, suffix := jaConjugate(class, morph) return stem | connector | suffix } func renderJAIAdjVerbSlot(stem string, morph uint16) string { b := []byte(stem) root := string(b[:len(b)-3]) neg := morph&MetaPolarNeg != 0 past := morph&MetaTensePast != 0 if neg && past { return root | "\xe3\x81\x8f\xe3\x81\xaa\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f" // くなかった } if neg { return root | "\xe3\x81\x8f\xe3\x81\xaa\xe3\x81\x84" // くない } if past { return root | "\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f" // かった } return stem } func conjugateSuru(stem string, morph uint16) string { // stem is "する" for standalone, or "勉強する" for compounds. // Strip する suffix to get the noun-stem (empty for standalone). base := "" sb := []byte(stem) suruBytes := []byte("\xe3\x81\x99\xe3\x82\x8b") // する if len(sb) > 6 && string(sb[len(sb)-6:]) == string(suruBytes) { base = string(sb[:len(sb)-6]) } past := morph&MetaTensePast != 0 prog := morph&MetaAspectProg != 0 neg := morph&MetaPolarNeg != 0 pol := morph&MetaFormalityPol != 0 pass := morph&MetaPassive != 0 caus := morph&MetaCausative != 0 if caus && pass { if past { return base | "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x89\xe3\x82\x8c\xe3\x81\x9f" // させられた } return base | "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x89\xe3\x82\x8c\xe3\x82\x8b" // させられる } if pass { if past { return base | "\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f" // された } return base | "\xe3\x81\x95\xe3\x82\x8c\xe3\x82\x8b" // される } if caus { if past { return base | "\xe3\x81\x95\xe3\x81\x9b\xe3\x81\x9f" // させた } return base | "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x8b" // させる } if pol { if past && neg { return base | "\xe3\x81\x97\xe3\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x81\xa7\xe3\x81\x97\xe3\x81\x9f" // しませんでした } if neg { return base | "\xe3\x81\x97\xe3\x81\xbe\xe3\x81\x9b\xe3\x82\x93" // しません } if past { return base | "\xe3\x81\x97\xe3\x81\xbe\xe3\x81\x97\xe3\x81\x9f" // しました } return base | "\xe3\x81\x97\xe3\x81\xbe\xe3\x81\x99" // します } if prog { if past { return base | "\xe3\x81\x97\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x9f" // していた } return base | "\xe3\x81\x97\xe3\x81\xa6\xe3\x81\x84\xe3\x82\x8b" // している } if neg { if past { return base | "\xe3\x81\x97\xe3\x81\xaa\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f" // しなかった } return base | "\xe3\x81\x97\xe3\x81\xaa\xe3\x81\x84" // しない } if past { return base | "\xe3\x81\x97\xe3\x81\x9f" // した } return base | "\xe3\x81\x99\xe3\x82\x8b" // する } func conjugateKuru(stem string, morph uint16) string { past := morph&MetaTensePast != 0 prog := morph&MetaAspectProg != 0 neg := morph&MetaPolarNeg != 0 pol := morph&MetaFormalityPol != 0 if pol { if past { return "\xe6\x9d\xa5\xe3\x81\xbe\xe3\x81\x97\xe3\x81\x9f" // 来ました } if neg { return "\xe6\x9d\xa5\xe3\x81\xbe\xe3\x81\x9b\xe3\x82\x93" // 来ません } return "\xe6\x9d\xa5\xe3\x81\xbe\xe3\x81\x99" // 来ます } if prog { if past { return "\xe6\x9d\xa5\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x9f" // 来ていた } return "\xe6\x9d\xa5\xe3\x81\xa6\xe3\x81\x84\xe3\x82\x8b" // 来ている } if neg { if past { return "\xe6\x9d\xa5\xe3\x81\xaa\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f" // 来なかった } return "\xe6\x9d\xa5\xe3\x81\xaa\xe3\x81\x84" // 来ない } if past { return "\xe6\x9d\xa5\xe3\x81\x9f" // 来た } return "\xe6\x9d\xa5\xe3\x82\x8b" // 来る } func stripGodanDictEnding(stem string, class uint8) string { s := godanDictSuffix(class) if s == "" { return stem } b := []byte(stem) sb := []byte(s) if len(b) > len(sb) && string(b[len(b)-len(sb):]) == s { return string(b[:len(b)-len(sb)]) } return stem } // containsASCIIByte returns true when at least one byte of s is in the // ASCII range. Used by the JA renderer to refuse inflection on atoms that // appear to be foreign (cross-language leakage). Mirrors the EN renderer's // isASCIIOnly guard but from the JA side. func containsASCIIByte(s string) bool { for i := 0; i < len(s); i++ { if s[i] < 0x80 { return true } } return false } // inferJAClass guesses verb class from the lemma. Lemma is now in dict form // for godan verbs (行く, 読む, etc.), so the trailing kana directly identifies // the class. Ichidan lemmas keep the え/い-row stem (食べ, 見, 起き). // // Rules: // 1. Ends in u-row kana (く,ぐ,す,つ,ぬ,ぶ,む,う) → godan of that class // 2. Ends in る: preceding kana is え/い-row → ichidan, else → godan-ru // 3. Ends in え/い-row (no る) → ichidan // 4. Kanji-final → legacy bare-stem, check table or default godan-ru func inferJAClass(lemma string) uint8 { b := []byte(lemma) if len(b) >= 6 && string(b[len(b)-6:]) == "\xe3\x81\x99\xe3\x82\x8b" { // ~する return VClassSuru } if lemma == "\xe6\x9d\xa5\xe3\x82\x8b" { // 来る return VClassKuru } if len(b) < 3 { return VClassBare } last := string(b[len(b)-3:]) switch last { case "\xe3\x81\x8f": // く return VClassGodanKu case "\xe3\x81\x90": // ぐ return VClassGodanGu case "\xe3\x81\x99": // す return VClassGodanSu case "\xe3\x81\xa4": // つ return VClassGodanTsu case "\xe3\x81\xac": // ぬ return VClassGodanNu case "\xe3\x81\xb6": // ぶ return VClassGodanBu case "\xe3\x82\x80": // む return VClassGodanMu case "\xe3\x81\x86": // う return VClassGodanU case "\xe3\x82\x8b": // る - ambiguous: ichidan or godan-ru if len(b) >= 6 { prev := string(b[len(b)-6 : len(b)-3]) if isERowOrIRowKana(prev) { return VClassIchidan } } return VClassGodanRu } if isERowOrIRowKana(last) { return VClassIchidan } if c, ok := jaVerbClassTable()[lemma]; ok { return c } return VClassBare } // jaVerbClassTable maps bare-kanji stems to verb class. Legacy fallback // for atoms that reach inferJAClass without a dict-form ending (e.g. // pre-existing data or non-verb tokens misclassified as verbs). // The primary path now infers class from the lemma's trailing kana. func jaVerbClassTable() map[string]uint8 { return map[string]uint8{ "\xe8\xa1\x8c": VClassGodanKu, // 行 "\xe6\x9b\xb8": VClassGodanKu, // 書 "\xe8\x81\x9e": VClassGodanKu, // 聞 "\xe9\x96\x8b": VClassGodanKu, // 開 "\xe5\x83\x8d": VClassGodanKu, // 働 "\xe7\xb6\x9a": VClassGodanKu, // 続 "\xe6\xad\xa9": VClassGodanKu, // 歩 "\xe7\x9d\x80": VClassGodanKu, // 着 "\xe6\x8f\x8f": VClassGodanKu, // 描 "\xe6\xb3\xb3": VClassGodanGu, // 泳 "\xe6\x80\xa5": VClassGodanGu, // 急 "\xe8\xa9\xb1": VClassGodanSu, // 話 "\xe6\x8a\xbc": VClassGodanSu, // 押 "\xe6\x8e\xa2": VClassGodanSu, // 探 "\xe7\x9b\xb4": VClassGodanSu, // 直 "\xe6\xb8\xa1": VClassGodanSu, // 渡 "\xe6\xb6\x88": VClassGodanSu, // 消 "\xe7\xab\x8b": VClassGodanTsu, // 立 "\xe5\xbe\x85": VClassGodanTsu, // 待 "\xe6\x8c\x81": VClassGodanTsu, // 持 "\xe6\x89\x93": VClassGodanTsu, // 打 "\xe5\x8b\x9d": VClassGodanTsu, // 勝 "\xe8\x82\xb2": VClassGodanTsu, // 育 "\xe6\xad\xbb": VClassGodanNu, // 死 "\xe9\xa3\x9b": VClassGodanBu, // 飛 "\xe5\x91\xbc": VClassGodanBu, // 呼 "\xe9\x81\x8a": VClassGodanBu, // 遊 "\xe7\xb5\x90": VClassGodanBu, // 結 "\xe5\xad\xa6": VClassGodanBu, // 学 "\xe5\x96\x9c": VClassGodanBu, // 喜 "\xe8\xaa\xad": VClassGodanMu, // 読 "\xe9\xa3\xb2": VClassGodanMu, // 飲 "\xe4\xbd\x8f": VClassGodanMu, // 住 "\xe9\x80\xb2": VClassGodanMu, // 進 "\xe5\x8c\x85": VClassGodanMu, // 包 "\xe4\xbc\x91": VClassGodanMu, // 休 "\xe6\x9c\x9b": VClassGodanMu, // 望 "\xe9\xa0\xbc": VClassGodanMu, // 頼 "\xe6\x88\xbb": VClassGodanRu, // 戻 "\xe7\x9f\xa5": VClassGodanRu, // 知 "\xe8\xb5\xb0": VClassGodanRu, // 走 "\xe5\x8f\x96": VClassGodanRu, // 取 "\xe5\x88\x87": VClassGodanRu, // 切 "\xe5\xa3\xb2": VClassGodanRu, // 売 "\xe5\x85\xa5": VClassGodanRu, // 入 "\xe5\xb8\xb0": VClassGodanRu, // 帰 "\xe7\xb5\x82": VClassGodanRu, // 終 "\xe5\xa7\x8b": VClassGodanRu, // 始 "\xe6\xae\x8b": VClassGodanRu, // 残 "\xe4\xb9\x97": VClassGodanRu, // 乗 "\xe6\x8c\xaf": VClassGodanRu, // 振 "\xe9\x80\x81": VClassGodanRu, // 送 "\xe4\xbd\x9c": VClassGodanRu, // 作 "\xe6\x80\x9d": VClassGodanU, // 思 "\xe8\xa8\x80": VClassGodanU, // 言 "\xe8\xb2\xb7": VClassGodanU, // 買 "\xe4\xbd\xbf": VClassGodanU, // 使 "\xe7\xac\x91": VClassGodanU, // 笑 "\xe6\x89\x95": VClassGodanU, // 払 "\xe9\xa1\x98": VClassGodanU, // 願 "\xe6\xad\x8c": VClassGodanU, // 歌 "\xe4\xbc\x9a": VClassGodanU, // 会 "\xe8\xbf\xbd": VClassGodanU, // 追 "\xe9\x80\x9a": VClassGodanU, // 通 "\xe6\x95\x91": VClassGodanU, // 救 "\xe9\x81\x95": VClassGodanU, // 違 "\xe6\x88\xa6": VClassGodanU, // 戦 } } // rawMorphSuffix emits a class-independent surface for morph bits. // Used when class detection failed (class=0) - includes the case of a // cross-language-translated atom that lost its JA class. func rawMorphSuffix(morph uint16) string { if morph&MetaMoodVol != 0 { // Default to ichidan よう - the only class-neutral volitional form // available without verb-class knowledge. Re-extraction recognizes // よう as ichidan, so this round-trips even if not original-faithful. return "\xe3\x82\x88\xe3\x81\x86" // よう } if morph&MetaPassive != 0 { if morph&MetaTensePast != 0 { return "\xe3\x82\x89\xe3\x82\x8c\xe3\x81\x9f" // られた } return "\xe3\x82\x89\xe3\x82\x8c\xe3\x82\x8b" // られる } if morph&MetaCausative != 0 { if morph&MetaTensePast != 0 { return "\xe3\x81\x95\xe3\x81\x9b\xe3\x81\x9f" // させた } return "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x8b" // させる } out := "" if morph&MetaAspectProg != 0 { if morph&MetaTensePast != 0 { out = out | "\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x9f" // ていた } else { out = out | "\xe3\x81\xa6\xe3\x81\x84\xe3\x82\x8b" // ている } return out } if morph&MetaPolarNeg != 0 { if morph&MetaTensePast != 0 { out = out | "\xe3\x81\xaa\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f" // なかった } else { out = out | "\xe3\x81\xaa\xe3\x81\x84" // ない } return out } if morph&MetaFormalityPol != 0 { if morph&MetaTensePast != 0 { return "\xe3\x81\xbe\xe3\x81\x97\xe3\x81\x9f" // ました } return "\xe3\x81\xbe\xe3\x81\x99" // ます } if morph&MetaTensePast != 0 { return "\xe3\x81\x9f" // た } return "" } // jaConjugate returns (connector, suffix) for a verb of given class and morph. // connector is the kana between stem and suffix (e.g. for ichidan: empty for past, // for godan: i-row for polite, etc). func jaConjugate(class uint8, morph uint16) (string, string) { past := morph&MetaTensePast != 0 prog := morph&MetaAspectProg != 0 neg := morph&MetaPolarNeg != 0 pol := morph&MetaFormalityPol != 0 vol := morph&MetaMoodVol != 0 pass := morph&MetaPassive != 0 caus := morph&MetaCausative != 0 // Causative-passive (made to V): させられる if caus && pass { if past { return jaCausPassPast(class) } return jaCausPass(class) } // Passive alone if pass { if past { return jaPassivePast(class) } return jaPassive(class) } // Causative alone if caus { if past { return jaCausativePast(class) } return jaCausative(class) } if vol { return jaVolitional(class) } // Polite forms: ます, ました, ません if pol { istem := jaIStem(class) if past && neg { return istem, "\xe3\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x81\xa7\xe3\x81\x97\xe3\x81\x9f" // ませんでした (rare; default to ません) } if neg { return istem, "\xe3\x81\xbe\xe3\x81\x9b\xe3\x82\x93" // ません } if past { return istem, "\xe3\x81\xbe\xe3\x81\x97\xe3\x81\x9f" // ました } return istem, "\xe3\x81\xbe\xe3\x81\x99" // ます } // Progressive: ている, ていた if prog { teconn, te := jaTeForm(class) if past { return teconn, te | "\xe3\x81\x84\xe3\x81\x9f" // ていた / でいた } return teconn, te | "\xe3\x81\x84\xe3\x82\x8b" // ている / でいる } // Negative if neg { nstem := jaNegStem(class) if past { return nstem, "\xe3\x81\xaa\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f" // なかった } return nstem, "\xe3\x81\xaa\xe3\x81\x84" // ない } // Past (plain) if past { return jaPastForm(class) } // Dict form (non-past affirmative plain) return jaDictForm(class) } // jaDictForm returns the dict-form ending for each class. // For ichidan, the connector (え/い-row) stays in the stem, suffix is る. // For godan, the kanji-stem gets the u-row ending appended directly. func jaDictForm(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x82\x8b" // る case VClassGodanKu: return "", "\xe3\x81\x8f" // く case VClassGodanGu: return "", "\xe3\x81\x90" // ぐ case VClassGodanSu: return "", "\xe3\x81\x99" // す case VClassGodanTsu: return "", "\xe3\x81\xa4" // つ case VClassGodanNu: return "", "\xe3\x81\xac" // ぬ case VClassGodanBu: return "", "\xe3\x81\xb6" // ぶ case VClassGodanMu: return "", "\xe3\x82\x80" // む case VClassGodanRu: return "", "\xe3\x82\x8b" // る case VClassGodanU: return "", "\xe3\x81\x86" // う } return "", "\xe3\x82\x8b" // default る } // jaPastForm returns (connector, suffix) for past-tense plain form. // Ichidan: stem + た. Godan: contracted forms. func jaPastForm(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x81\x9f" // た case VClassGodanKu: return "\xe3\x81\x84", "\xe3\x81\x9f" // いた case VClassGodanGu: return "\xe3\x81\x84", "\xe3\x81\xa0" // いだ case VClassGodanSu: return "\xe3\x81\x97", "\xe3\x81\x9f" // した case VClassGodanTsu, VClassGodanRu, VClassGodanU: return "\xe3\x81\xa3", "\xe3\x81\x9f" // った case VClassGodanNu, VClassGodanBu, VClassGodanMu: return "\xe3\x82\x93", "\xe3\x81\xa0" // んだ } return "", "\xe3\x81\x9f" } // jaTeForm returns (connector, te-form-suffix) - basically same as past but te/で. func jaTeForm(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x81\xa6" // て case VClassGodanKu: return "\xe3\x81\x84", "\xe3\x81\xa6" // いて case VClassGodanGu: return "\xe3\x81\x84", "\xe3\x81\xa7" // いで case VClassGodanSu: return "\xe3\x81\x97", "\xe3\x81\xa6" // して case VClassGodanTsu, VClassGodanRu, VClassGodanU: return "\xe3\x81\xa3", "\xe3\x81\xa6" // って case VClassGodanNu, VClassGodanBu, VClassGodanMu: return "\xe3\x82\x93", "\xe3\x81\xa7" // んで } return "", "\xe3\x81\xa6" } // jaIStem returns the i-row stem connector for polite forms. func jaIStem(class uint8) string { switch class { case VClassIchidan: return "" // ichidan stem connects directly case VClassGodanKu: return "\xe3\x81\x8d" // き case VClassGodanGu: return "\xe3\x81\x8e" // ぎ case VClassGodanSu: return "\xe3\x81\x97" // し case VClassGodanTsu: return "\xe3\x81\xa1" // ち case VClassGodanNu: return "\xe3\x81\xab" // に case VClassGodanBu: return "\xe3\x81\xb3" // び case VClassGodanMu: return "\xe3\x81\xbf" // み case VClassGodanRu: return "\xe3\x82\x8a" // り case VClassGodanU: return "\xe3\x81\x84" // い } return "" } // jaNegStem returns the a-row stem connector for negative forms. func jaNegStem(class uint8) string { switch class { case VClassIchidan: return "" case VClassGodanKu: return "\xe3\x81\x8b" // か case VClassGodanGu: return "\xe3\x81\x8c" // が case VClassGodanSu: return "\xe3\x81\x95" // さ case VClassGodanTsu: return "\xe3\x81\x9f" // た - wait, this collides with た suffix. // Actually godan-tsu negative is た+ない = たない. But that conflicts with past suffix た. // Use 立つ → 立たない (the あ-row of つ is た). Need to be careful. case VClassGodanNu: return "\xe3\x81\xaa" // な case VClassGodanBu: return "\xe3\x81\xb0" // ば case VClassGodanMu: return "\xe3\x81\xbe" // ま case VClassGodanRu: return "\xe3\x82\x89" // ら case VClassGodanU: return "\xe3\x82\x8f" // わ } return "" } // jaVolitional returns (connector, suffix) for volitional form (let's...). func jaVolitional(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x82\x88\xe3\x81\x86" // よう case VClassGodanKu: return "\xe3\x81\x93", "\xe3\x81\x86" // こう case VClassGodanGu: return "\xe3\x81\x94", "\xe3\x81\x86" // ごう case VClassGodanSu: return "\xe3\x81\x9d", "\xe3\x81\x86" // そう case VClassGodanTsu: return "\xe3\x81\xa8", "\xe3\x81\x86" // とう case VClassGodanNu: return "\xe3\x81\xae", "\xe3\x81\x86" // のう case VClassGodanBu: return "\xe3\x81\xbc", "\xe3\x81\x86" // ぼう case VClassGodanMu: return "\xe3\x82\x82", "\xe3\x81\x86" // もう case VClassGodanRu: return "\xe3\x82\x8d", "\xe3\x81\x86" // ろう case VClassGodanU: return "\xe3\x81\x8a", "\xe3\x81\x86" // おう } return "", "\xe3\x82\x88\xe3\x81\x86" } // jaPassive: ichidan stem + られる, godan a-row stem + れる. func jaPassive(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x82\x89\xe3\x82\x8c\xe3\x82\x8b" // られる } // Godan: a-row connector + れる return jaNegStem(class), "\xe3\x82\x8c\xe3\x82\x8b" // a-row + れる } func jaPassivePast(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x82\x89\xe3\x82\x8c\xe3\x81\x9f" // られた } return jaNegStem(class), "\xe3\x82\x8c\xe3\x81\x9f" // a-row + れた } // jaCausative: ichidan stem + させる, godan a-row stem + せる. func jaCausative(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x8b" // させる } return jaNegStem(class), "\xe3\x81\x9b\xe3\x82\x8b" // a-row + せる } func jaCausativePast(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x81\x95\xe3\x81\x9b\xe3\x81\x9f" // させた } return jaNegStem(class), "\xe3\x81\x9b\xe3\x81\x9f" // a-row + せた } // jaCausPass: ichidan stem + させられる, godan a-row + せられる. func jaCausPass(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x89\xe3\x82\x8c\xe3\x82\x8b" // させられる } return jaNegStem(class), "\xe3\x81\x9b\xe3\x82\x89\xe3\x82\x8c\xe3\x82\x8b" // a-row + せられる } func jaCausPassPast(class uint8) (string, string) { switch class { case VClassIchidan: return "", "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x89\xe3\x82\x8c\xe3\x81\x9f" // させられた } return jaNegStem(class), "\xe3\x81\x9b\xe3\x82\x89\xe3\x82\x8c\xe3\x81\x9f" // a-row + せられた }