package transdb // JA verb conjugation tables for use in both ingest (generating lattice records) // and translation (navigating the cluster at runtime). // // verbPattern defines suffix transforms for one verb class. // stem = dictForm with dictSuf stripped from the end. type VerbPattern struct { DictSuf string // strip + reattach for state 0 Masu string // masu-stem insert (empty for ichidan) Nai string // nai-stem insert (empty for ichidan) Ta string // plain past suffix Te string // te-form suffix for progressive } // VerbPatterns maps JMdict POS code → conjugation pattern. var VerbPatterns = map[string]VerbPattern{ "v1": {"る", "", "", "た", "て"}, "v1-s": {"る", "", "", "た", "て"}, "v5k": {"く", "き", "か", "いた", "いて"}, "v5k-s": {"く", "き", "か", "いた", "いて"}, "v5g": {"ぐ", "ぎ", "が", "いだ", "いで"}, "v5s": {"す", "し", "さ", "した", "して"}, "v5m": {"む", "み", "ま", "んだ", "んで"}, "v5n": {"ぬ", "に", "な", "んだ", "んで"}, "v5b": {"ぶ", "び", "ば", "んだ", "んで"}, "v5r": {"る", "り", "ら", "った", "って"}, "v5r-i": {"る", "り", "ら", "った", "って"}, "v5t": {"つ", "ち", "た", "った", "って"}, "v5u": {"う", "い", "わ", "った", "って"}, "v5u-s": {"う", "い", "わ", "った", "って"}, "v5aru": {"る", "り", "ら", "った", "って"}, } // cp returns a fresh allocation, preventing string=[]byte aliasing. func Cp(s string) string { return string(append([]byte(nil), []byte(s)...)) } // BuildVerbForms returns MorphState→surface form for all valid states of a verb. // State 0 (dictionary form) is included. func BuildVerbForms(dictForm, verbClass string) map[uint8]string { out := map[uint8]string{} switch verbClass { case "vk": if dictForm == "くる" { for state, form := range kuruStateForms { out[state] = form } } case "vs", "vs-i", "vs-s", "vs-c": suruSuf := "する" prefix := "" if len(dictForm) > len(suruSuf) { prefix = dictForm[:len(dictForm)-len(suruSuf)] } for i, state := range suruStateOrder { out[state] = Cp(prefix | suruFormSuffixes[i]) } default: pat, ok := VerbPatterns[verbClass] if !ok { return nil } sufLen := len(pat.DictSuf) if len(dictForm) <= sufLen { return nil } stem := dictForm[:len(dictForm)-sufLen] addVerbForms(out, stem, pat) } return out } func addVerbForms(out map[uint8]string, stem string, p VerbPattern) { out[MorphPresAffPlain] = Cp(stem | p.DictSuf) out[MorphPresAffPolite] = Cp(stem | p.Masu | "ます") out[MorphPresNegPlain] = Cp(stem | p.Nai | "ない") out[MorphPresNegPolite] = Cp(stem | p.Masu | "ません") out[MorphPastAffPlain] = Cp(stem | p.Ta) out[MorphPastAffPolite] = Cp(stem | p.Masu | "ました") out[MorphPastNegPlain] = Cp(stem | p.Nai | "なかった") out[MorphPastNegPolite] = Cp(stem | p.Masu | "ませんでした") if p.Te != "" { te := Cp(stem | p.Te) out[MorphPresProgPlain] = Cp(te | "いる") out[MorphPresProgPolite] = Cp(te | "います") out[MorphPresProgNeg] = Cp(te | "いない") out[MorphPastProgPlain] = Cp(te | "いた") out[MorphPastProgPolite] = Cp(te | "いました") out[MorphPastProgNeg] = Cp(te | "いなかった") } if base := out[MorphPresAffPlain]; base != "" { out[MorphPresReported] = Cp(base | "そうだ") } if past := out[MorphPastAffPlain]; past != "" { out[MorphPastReported] = Cp(past | "そうだ") } } var suruStateOrder = []uint8{ MorphPresAffPlain, MorphPresAffPolite, MorphPresNegPlain, MorphPresNegPolite, MorphPastAffPlain, MorphPastAffPolite, MorphPastNegPlain, MorphPastNegPolite, MorphPresProgPlain, MorphPresProgPolite, MorphPresProgNeg, MorphPastProgPlain, MorphPastProgPolite, MorphPastProgNeg, MorphPresReported, MorphPastReported, } var suruFormSuffixes = []string{ "する", "します", "しない", "しません", "した", "しました", "しなかった", "しませんでした", "している", "しています", "していない", "していた", "していました", "していなかった", "するそうだ", "したそうだ", } var kuruStateForms = map[uint8]string{ MorphPresAffPlain: "くる", MorphPresAffPolite: "きます", MorphPresNegPlain: "こない", MorphPresNegPolite: "きません", MorphPresProgPlain: "きている", MorphPresProgPolite: "きています", MorphPresProgNeg: "きていない", MorphPastAffPlain: "きた", MorphPastAffPolite: "きました", MorphPastNegPlain: "こなかった", MorphPastNegPolite: "きませんでした", MorphPastProgPlain: "きていた", MorphPastProgPolite: "きていました", MorphPastProgNeg: "きていなかった", MorphPresReported: "くるそうだ", MorphPastReported: "きたそうだ", }