morph.mx raw
1 package transdb
2
3 // JA verb conjugation tables for use in both ingest (generating lattice records)
4 // and translation (navigating the cluster at runtime).
5 //
6 // verbPattern defines suffix transforms for one verb class.
7 // stem = dictForm with dictSuf stripped from the end.
8 type VerbPattern struct {
9 DictSuf string // strip + reattach for state 0
10 Masu string // masu-stem insert (empty for ichidan)
11 Nai string // nai-stem insert (empty for ichidan)
12 Ta string // plain past suffix
13 Te string // te-form suffix for progressive
14 }
15
16 // VerbPatterns maps JMdict POS code → conjugation pattern.
17 var VerbPatterns = map[string]VerbPattern{
18 "v1": {"る", "", "", "た", "て"},
19 "v1-s": {"る", "", "", "た", "て"},
20 "v5k": {"く", "き", "か", "いた", "いて"},
21 "v5k-s": {"く", "き", "か", "いた", "いて"},
22 "v5g": {"ぐ", "ぎ", "が", "いだ", "いで"},
23 "v5s": {"す", "し", "さ", "した", "して"},
24 "v5m": {"む", "み", "ま", "んだ", "んで"},
25 "v5n": {"ぬ", "に", "な", "んだ", "んで"},
26 "v5b": {"ぶ", "び", "ば", "んだ", "んで"},
27 "v5r": {"る", "り", "ら", "った", "って"},
28 "v5r-i": {"る", "り", "ら", "った", "って"},
29 "v5t": {"つ", "ち", "た", "った", "って"},
30 "v5u": {"う", "い", "わ", "った", "って"},
31 "v5u-s": {"う", "い", "わ", "った", "って"},
32 "v5aru": {"る", "り", "ら", "った", "って"},
33 }
34
35 // cp returns a fresh allocation, preventing string=[]byte aliasing.
36 func Cp(s string) string {
37 return string(append([]byte(nil), []byte(s)...))
38 }
39
40 // BuildVerbForms returns MorphState→surface form for all valid states of a verb.
41 // State 0 (dictionary form) is included.
42 func BuildVerbForms(dictForm, verbClass string) map[uint8]string {
43 out := map[uint8]string{}
44
45 switch verbClass {
46 case "vk":
47 if dictForm == "くる" {
48 for state, form := range kuruStateForms {
49 out[state] = form
50 }
51 }
52
53 case "vs", "vs-i", "vs-s", "vs-c":
54 suruSuf := "する"
55 prefix := ""
56 if len(dictForm) > len(suruSuf) {
57 prefix = dictForm[:len(dictForm)-len(suruSuf)]
58 }
59 for i, state := range suruStateOrder {
60 out[state] = Cp(prefix | suruFormSuffixes[i])
61 }
62
63 default:
64 pat, ok := VerbPatterns[verbClass]
65 if !ok {
66 return nil
67 }
68 sufLen := len(pat.DictSuf)
69 if len(dictForm) <= sufLen {
70 return nil
71 }
72 stem := dictForm[:len(dictForm)-sufLen]
73 addVerbForms(out, stem, pat)
74 }
75
76 return out
77 }
78
79 func addVerbForms(out map[uint8]string, stem string, p VerbPattern) {
80 out[MorphPresAffPlain] = Cp(stem | p.DictSuf)
81 out[MorphPresAffPolite] = Cp(stem | p.Masu | "ます")
82 out[MorphPresNegPlain] = Cp(stem | p.Nai | "ない")
83 out[MorphPresNegPolite] = Cp(stem | p.Masu | "ません")
84 out[MorphPastAffPlain] = Cp(stem | p.Ta)
85 out[MorphPastAffPolite] = Cp(stem | p.Masu | "ました")
86 out[MorphPastNegPlain] = Cp(stem | p.Nai | "なかった")
87 out[MorphPastNegPolite] = Cp(stem | p.Masu | "ませんでした")
88 if p.Te != "" {
89 te := Cp(stem | p.Te)
90 out[MorphPresProgPlain] = Cp(te | "いる")
91 out[MorphPresProgPolite] = Cp(te | "います")
92 out[MorphPresProgNeg] = Cp(te | "いない")
93 out[MorphPastProgPlain] = Cp(te | "いた")
94 out[MorphPastProgPolite] = Cp(te | "いました")
95 out[MorphPastProgNeg] = Cp(te | "いなかった")
96 }
97 if base := out[MorphPresAffPlain]; base != "" {
98 out[MorphPresReported] = Cp(base | "そうだ")
99 }
100 if past := out[MorphPastAffPlain]; past != "" {
101 out[MorphPastReported] = Cp(past | "そうだ")
102 }
103 }
104
105 var suruStateOrder = []uint8{
106 MorphPresAffPlain, MorphPresAffPolite, MorphPresNegPlain, MorphPresNegPolite,
107 MorphPastAffPlain, MorphPastAffPolite, MorphPastNegPlain, MorphPastNegPolite,
108 MorphPresProgPlain, MorphPresProgPolite, MorphPresProgNeg,
109 MorphPastProgPlain, MorphPastProgPolite, MorphPastProgNeg,
110 MorphPresReported, MorphPastReported,
111 }
112
113 var suruFormSuffixes = []string{
114 "する", "します", "しない", "しません",
115 "した", "しました", "しなかった", "しませんでした",
116 "している", "しています", "していない",
117 "していた", "していました", "していなかった",
118 "するそうだ", "したそうだ",
119 }
120
121 var kuruStateForms = map[uint8]string{
122 MorphPresAffPlain: "くる", MorphPresAffPolite: "きます",
123 MorphPresNegPlain: "こない", MorphPresNegPolite: "きません",
124 MorphPresProgPlain: "きている", MorphPresProgPolite: "きています",
125 MorphPresProgNeg: "きていない",
126 MorphPastAffPlain: "きた", MorphPastAffPolite: "きました",
127 MorphPastNegPlain: "こなかった", MorphPastNegPolite: "きませんでした",
128 MorphPastProgPlain: "きていた", MorphPastProgPolite: "きていました",
129 MorphPastProgNeg: "きていなかった",
130 MorphPresReported: "くるそうだ", MorphPastReported: "きたそうだ",
131 }
132