lemma_en.mx raw
1 package iskra
2
3 // LemmaResult is the output of lemmatization.
4 type LemmaResult struct {
5 Lemma string // region center (stem/dict form)
6 Morph uint16 // morph bits (see Meta* constants)
7 Class uint8 // verb class (for verbs); 0=non-verb
8 }
9
10 // Morph bit layout (16 bits).
11 // Low byte: cross-language inflection (tense/aspect/etc).
12 // High byte: derivational/voice morphology (passive, causative, etc).
13 //
14 // bit 0: tense 0=non-past 1=past
15 // bit 1: aspect 0=simple 1=progressive
16 // bit 2: polarity 0=affirm 1=negative
17 // bit 3: formality 0=plain 1=polite (JA ます-form)
18 // bit 4: number 0=singular 1=plural
19 // bit 5: definiteness 0=indef 1=def
20 // bit 6: mood 0=indicative 1=volitional (let's...)
21 // bit 7: person/3sg 0=unmarked 1=3rd-singular (EN -s on verbs)
22 // bit 8: voice 0=active 1=passive (JA れる/られる, EN be+V-ed)
23 // bit 9: voice 0=non-causa 1=causative (JA せる/させる, EN make/have)
24 // bits 10-15: reserved
25 const (
26 MetaTensePast uint16 = 1 << 0
27 MetaAspectProg uint16 = 1 << 1
28 MetaPolarNeg uint16 = 1 << 2
29 MetaFormalityPol uint16 = 1 << 3
30 MetaNumPlural uint16 = 1 << 4
31 MetaDefDef uint16 = 1 << 5
32 MetaMoodVol uint16 = 1 << 6
33 Meta3Sg uint16 = 1 << 7
34 MetaPassive uint16 = 1 << 8
35 MetaCausative uint16 = 1 << 9
36 MetaCompare uint16 = 1 << 10 // comparative (bigger / 〜より大きい)
37 )
38
39 // Verb classes (used for JA conjugation reconstruction).
40 const (
41 VClassNone uint8 = 0
42 VClassIchidan uint8 = 1
43 VClassGodanKu uint8 = 2 // 行く
44 VClassGodanGu uint8 = 3 // 泳ぐ
45 VClassGodanSu uint8 = 4 // 話す
46 VClassGodanTsu uint8 = 5 // 立つ
47 VClassGodanNu uint8 = 6 // 死ぬ
48 VClassGodanBu uint8 = 7 // 遊ぶ
49 VClassGodanMu uint8 = 8 // 飲む
50 VClassGodanRu uint8 = 9 // 売る
51 VClassGodanU uint8 = 10 // 買う
52 VClassSuru uint8 = 11 // する (irregular)
53 VClassKuru uint8 = 12 // 来る (irregular)
54 VClassIAdj uint8 = 13 // i-adjective (赤い, 楽しい)
55 VClassBare uint8 = 14 // not a verb - emit unchanged
56 )
57
58 // Kept for backwards-compat; person field was previously bits 5-7.
59 const MetaPersonShift = 7
60
61 func (r *Registry) RegisterPhrasalVerb(s string) {
62 r.phrasalVerbs[s] = true
63 }
64
65 func (r *Registry) PhrasalVerbCount() int32 {
66 return len(r.phrasalVerbs)
67 }
68
69 func (r *Registry) HeadWordEN(atom string) string {
70 if r.phrasalVerbs[atom] {
71 return atom
72 }
73 for i := 0; i < len(atom); i++ {
74 if atom[i] == ' ' {
75 return atom[:i]
76 }
77 }
78 return atom
79 }
80
81 // LemmatizeEN reduces an EN surface form to its lemma and morph state.
82 // Only strips inflectional suffixes; derivational morphology preserved.
83 func LemmatizeEN(word string) LemmaResult {
84 // Untranslated-atom marker is a sealed sentinel; pass through unchanged.
85 if word == UntranslatedMarker {
86 return LemmaResult{Lemma: word, Morph: 0}
87 }
88 low := toLowerEN(word)
89
90 // Check irregular table first.
91 if entry, ok := enIrregular()[low]; ok {
92 return entry
93 }
94
95 // Progressive: -ing
96 if len(low) > 4 && hasSuffix(low, "ing") {
97 stem := stripIng(low)
98 if stem != "" {
99 return LemmaResult{Lemma: stem, Morph: MetaAspectProg}
100 }
101 }
102
103 // Past/participle: -ed
104 if len(low) > 3 && hasSuffix(low, "ed") {
105 stem := stripEd(low)
106 if stem != "" {
107 return LemmaResult{Lemma: stem, Morph: MetaTensePast}
108 }
109 }
110
111 // 3rd person singular: -es, -ies, -s
112 if len(low) > 3 && hasSuffix(low, "ies") {
113 stem := low[:len(low)-3] | "y"
114 return LemmaResult{Lemma: stem, Morph: Meta3Sg}
115 }
116 if len(low) > 3 && hasSuffix(low, "es") {
117 base := low[:len(low)-2]
118 last := base[len(base)-1]
119 if last == 's' || last == 'x' || last == 'z' || hasSuffix(base, "sh") || hasSuffix(base, "ch") {
120 return LemmaResult{Lemma: base, Morph: Meta3Sg}
121 }
122 return LemmaResult{Lemma: low[:len(low)-1], Morph: Meta3Sg}
123 }
124 if len(low) > 2 && low[len(low)-1] == 's' && low[len(low)-2] != 's' &&
125 low[len(low)-2] != '\'' {
126 // Avoid stripping from words like "less", "miss", "boss".
127 // Apostrophe-s contractions (let's, it's, he's) are not plurals.
128 c := low[len(low)-2]
129 if c != 'u' && c != 'i' { // avoid "bus" -> "bu", "this" -> "thi"
130 return LemmaResult{Lemma: low[:len(low)-1], Morph: MetaNumPlural}
131 }
132 }
133
134 return LemmaResult{Lemma: low, Morph: 0}
135 }
136
137 func stripIng(word string) string {
138 // word ends in "ing", len > 4
139 base := word[:len(word)-3]
140 if len(base) < 3 {
141 return word
142 }
143 last := base[len(base)-1]
144
145 // Double consonant: running -> run, sitting -> sit
146 if len(base) > 2 && base[len(base)-1] == base[len(base)-2] && isConsonant(last) {
147 return base[:len(base)-1]
148 }
149
150 if !isConsonant(last) || last == 'e' {
151 return base
152 }
153
154 // 'y' at word-end acts as vowel: play, stay, enjoy
155 if last == 'y' {
156 return base
157 }
158
159 // English words never end in bare 'c', 'v', or 'z' - always silent-e
160 if last == 'c' || last == 'v' || last == 'z' {
161 return base | "e"
162 }
163
164 // Two consonants at end: talk, walk, work, help, want, think
165 if len(base) >= 2 && isConsonant(base[len(base)-2]) {
166 return base
167 }
168
169 // Vowel-vowel-consonant (digraph): eat, beat, read, clean, meet
170 if len(base) >= 3 && !isConsonant(base[len(base)-2]) && !isConsonant(base[len(base)-3]) {
171 return base
172 }
173
174 // Single vowel + consonant (CVC): make, give, take, come, write
175 return base | "e"
176 }
177
178 func stripEd(word string) string {
179 // word ends in "ed", len > 3
180 base := word[:len(word)-2]
181 last := base[len(base)-1]
182
183 // -ied: tried -> try
184 if last == 'i' && len(base) > 1 {
185 return base[:len(base)-1] | "y"
186 }
187
188 // Double consonant: stopped -> stop
189 // Guard against natural double-s: guessed -> guess (not gues)
190 if len(base) > 2 && base[len(base)-1] == base[len(base)-2] && isConsonant(last) {
191 // If char before the double is a vowel, the double was likely added
192 // for -ed (stopped, dropped). If consonant, the double is natural
193 // (guessed, passed, missed).
194 if len(base) > 2 && !isConsonant(base[len(base)-3]) {
195 return base[:len(base)-1]
196 }
197 return base
198 }
199
200 // If already ends in 'e': danced -> dance (strip only 'd')
201 if last == 'e' {
202 return base
203 }
204
205 if !isConsonant(last) {
206 return base
207 }
208
209 // English words never end in bare 'c', 'v', or 'z' - always silent-e
210 if last == 'c' || last == 'v' || last == 'z' {
211 return base | "e"
212 }
213
214 // Two consonants at end: walked, helped, worked
215 if len(base) >= 2 && isConsonant(base[len(base)-2]) {
216 return base
217 }
218
219 // Vowel-vowel-consonant (digraph): cleaned, feared, appeared
220 if len(base) >= 3 && !isConsonant(base[len(base)-2]) && !isConsonant(base[len(base)-3]) {
221 return base
222 }
223
224 // Single vowel + consonant (CVC): hoped -> hope, used -> use
225 return base | "e"
226 }
227
228 func isConsonant(c byte) bool {
229 switch c {
230 case 'a', 'e', 'i', 'o', 'u':
231 return false
232 }
233 return c >= 'a' && c <= 'z'
234 }
235
236 // enIrregular maps surface forms to their lemma + morph state.
237 func enIrregular() map[string]LemmaResult {
238 return map[string]LemmaResult{
239 // be
240 "am": {Lemma: "be"},
241 "is": {Lemma: "be", Morph: Meta3Sg},
242 "are": {Lemma: "be"},
243 "was": {Lemma: "be", Morph: MetaTensePast | Meta3Sg},
244 "were": {Lemma: "be", Morph: MetaTensePast},
245 "been": {Lemma: "be", Morph: MetaTensePast | MetaAspectProg},
246 "being": {Lemma: "be", Morph: MetaAspectProg},
247 // have
248 "has": {Lemma: "have", Morph: Meta3Sg},
249 "had": {Lemma: "have", Morph: MetaTensePast},
250 "having": {Lemma: "have", Morph: MetaAspectProg},
251 // do
252 "does": {Lemma: "do", Morph: Meta3Sg},
253 "did": {Lemma: "do", Morph: MetaTensePast},
254 "done": {Lemma: "do", Morph: MetaTensePast},
255 // go
256 "went": {Lemma: "go", Morph: MetaTensePast},
257 "gone": {Lemma: "go", Morph: MetaTensePast},
258 "goes": {Lemma: "go", Morph: Meta3Sg},
259 // get
260 "got": {Lemma: "get", Morph: MetaTensePast},
261 "gotten": {Lemma: "get", Morph: MetaTensePast},
262 // make
263 "made": {Lemma: "make", Morph: MetaTensePast},
264 // take
265 "took": {Lemma: "take", Morph: MetaTensePast},
266 "taken": {Lemma: "take", Morph: MetaTensePast},
267 // come
268 "came": {Lemma: "come", Morph: MetaTensePast},
269 // see
270 "saw": {Lemma: "see", Morph: MetaTensePast},
271 "seen": {Lemma: "see", Morph: MetaTensePast},
272 // know
273 "knew": {Lemma: "know", Morph: MetaTensePast},
274 "known": {Lemma: "know", Morph: MetaTensePast},
275 // give
276 "gave": {Lemma: "give", Morph: MetaTensePast},
277 "given": {Lemma: "give", Morph: MetaTensePast},
278 // say
279 "said": {Lemma: "say", Morph: MetaTensePast},
280 // tell
281 "told": {Lemma: "tell", Morph: MetaTensePast},
282 // think
283 "thought": {Lemma: "think", Morph: MetaTensePast},
284 // find
285 "found": {Lemma: "find", Morph: MetaTensePast},
286 // leave
287 "left": {Lemma: "leave", Morph: MetaTensePast},
288 // feel
289 "felt": {Lemma: "feel", Morph: MetaTensePast},
290 // become
291 "became": {Lemma: "become", Morph: MetaTensePast},
292 // keep
293 "kept": {Lemma: "keep", Morph: MetaTensePast},
294 // begin
295 "began": {Lemma: "begin", Morph: MetaTensePast},
296 "begun": {Lemma: "begin", Morph: MetaTensePast},
297 // hear
298 "heard": {Lemma: "hear", Morph: MetaTensePast},
299 // hold
300 "held": {Lemma: "hold", Morph: MetaTensePast},
301 // bring
302 "brought": {Lemma: "bring", Morph: MetaTensePast},
303 // write
304 "wrote": {Lemma: "write", Morph: MetaTensePast},
305 "written": {Lemma: "write", Morph: MetaTensePast},
306 // sit
307 "sat": {Lemma: "sit", Morph: MetaTensePast},
308 // stand
309 "stood": {Lemma: "stand", Morph: MetaTensePast},
310 // lose
311 "lost": {Lemma: "lose", Morph: MetaTensePast},
312 // pay
313 "paid": {Lemma: "pay", Morph: MetaTensePast},
314 // meet
315 "met": {Lemma: "meet", Morph: MetaTensePast},
316 // lead
317 "led": {Lemma: "lead", Morph: MetaTensePast},
318 // understand
319 "understood": {Lemma: "understand", Morph: MetaTensePast},
320 // speak
321 "spoke": {Lemma: "speak", Morph: MetaTensePast},
322 "spoken": {Lemma: "speak", Morph: MetaTensePast},
323 // grow
324 "grew": {Lemma: "grow", Morph: MetaTensePast},
325 "grown": {Lemma: "grow", Morph: MetaTensePast},
326 // win
327 "won": {Lemma: "win", Morph: MetaTensePast},
328 // teach
329 "taught": {Lemma: "teach", Morph: MetaTensePast},
330 // buy
331 "bought": {Lemma: "buy", Morph: MetaTensePast},
332 // send
333 "sent": {Lemma: "send", Morph: MetaTensePast},
334 // build
335 "built": {Lemma: "build", Morph: MetaTensePast},
336 // fall
337 "fell": {Lemma: "fall", Morph: MetaTensePast},
338 "fallen": {Lemma: "fall", Morph: MetaTensePast},
339 // sell
340 "sold": {Lemma: "sell", Morph: MetaTensePast},
341 // run
342 "ran": {Lemma: "run", Morph: MetaTensePast},
343 // mean
344 "meant": {Lemma: "mean", Morph: MetaTensePast},
345 // spend
346 "spent": {Lemma: "spend", Morph: MetaTensePast},
347 // catch
348 "caught": {Lemma: "catch", Morph: MetaTensePast},
349 // fly
350 "flew": {Lemma: "fly", Morph: MetaTensePast},
351 "flown": {Lemma: "fly", Morph: MetaTensePast},
352 // eat
353 "ate": {Lemma: "eat", Morph: MetaTensePast},
354 "eaten": {Lemma: "eat", Morph: MetaTensePast},
355 // bite (PP "bitten" gets MetaPassive to distinguish from simple-past "bit")
356 "bit": {Lemma: "bite", Morph: MetaTensePast},
357 "bitten": {Lemma: "bite", Morph: MetaTensePast | MetaPassive},
358 // drink
359 "drank": {Lemma: "drink", Morph: MetaTensePast},
360 "drunk": {Lemma: "drink", Morph: MetaTensePast},
361 // drive
362 "drove": {Lemma: "drive", Morph: MetaTensePast},
363 "driven": {Lemma: "drive", Morph: MetaTensePast},
364 // sing
365 "sang": {Lemma: "sing", Morph: MetaTensePast},
366 "sung": {Lemma: "sing", Morph: MetaTensePast},
367 // swim
368 "swam": {Lemma: "swim", Morph: MetaTensePast},
369 "swum": {Lemma: "swim", Morph: MetaTensePast},
370 // break
371 "broke": {Lemma: "break", Morph: MetaTensePast},
372 "broken": {Lemma: "break", Morph: MetaTensePast},
373 // choose
374 "chose": {Lemma: "choose", Morph: MetaTensePast},
375 "chosen": {Lemma: "choose", Morph: MetaTensePast},
376 // draw
377 "drew": {Lemma: "draw", Morph: MetaTensePast},
378 "drawn": {Lemma: "draw", Morph: MetaTensePast},
379 // forget
380 "forgot": {Lemma: "forget", Morph: MetaTensePast},
381 "forgotten": {Lemma: "forget", Morph: MetaTensePast},
382 // freeze
383 "froze": {Lemma: "freeze", Morph: MetaTensePast},
384 "frozen": {Lemma: "freeze", Morph: MetaTensePast},
385 // hide
386 "hid": {Lemma: "hide", Morph: MetaTensePast},
387 "hidden": {Lemma: "hide", Morph: MetaTensePast},
388 // lie (recline)
389 "lay": {Lemma: "lie", Morph: MetaTensePast},
390 "lain": {Lemma: "lie", Morph: MetaTensePast},
391 // ride
392 "rode": {Lemma: "ride", Morph: MetaTensePast},
393 "ridden": {Lemma: "ride", Morph: MetaTensePast},
394 // rise
395 "rose": {Lemma: "rise", Morph: MetaTensePast},
396 "risen": {Lemma: "rise", Morph: MetaTensePast},
397 // shake
398 "shook": {Lemma: "shake", Morph: MetaTensePast},
399 "shaken": {Lemma: "shake", Morph: MetaTensePast},
400 // steal
401 "stole": {Lemma: "steal", Morph: MetaTensePast},
402 "stolen": {Lemma: "steal", Morph: MetaTensePast},
403 // tear
404 "tore": {Lemma: "tear", Morph: MetaTensePast},
405 "torn": {Lemma: "tear", Morph: MetaTensePast},
406 // throw
407 "threw": {Lemma: "throw", Morph: MetaTensePast},
408 "thrown": {Lemma: "throw", Morph: MetaTensePast},
409 // wake
410 "woke": {Lemma: "wake", Morph: MetaTensePast},
411 "woken": {Lemma: "wake", Morph: MetaTensePast},
412 // wear
413 "wore": {Lemma: "wear", Morph: MetaTensePast},
414 "worn": {Lemma: "wear", Morph: MetaTensePast},
415 // irregular nouns
416 "children": {Lemma: "child", Morph: MetaNumPlural},
417 "men": {Lemma: "man", Morph: MetaNumPlural},
418 "women": {Lemma: "woman", Morph: MetaNumPlural},
419 "people": {Lemma: "person", Morph: MetaNumPlural},
420 "mice": {Lemma: "mouse", Morph: MetaNumPlural},
421 "feet": {Lemma: "foot", Morph: MetaNumPlural},
422 "teeth": {Lemma: "tooth", Morph: MetaNumPlural},
423 "geese": {Lemma: "goose", Morph: MetaNumPlural},
424 "lives": {Lemma: "life", Morph: MetaNumPlural},
425 "wives": {Lemma: "wife", Morph: MetaNumPlural},
426 "knives": {Lemma: "knife", Morph: MetaNumPlural},
427 "wolves": {Lemma: "wolf", Morph: MetaNumPlural},
428 "halves": {Lemma: "half", Morph: MetaNumPlural},
429 "leaves": {Lemma: "leaf", Morph: MetaNumPlural},
430 // modals/negative contractions
431 "can't": {Lemma: "can", Morph: MetaPolarNeg},
432 "won't": {Lemma: "will", Morph: MetaPolarNeg},
433 "don't": {Lemma: "do", Morph: MetaPolarNeg},
434 "doesn't": {Lemma: "do", Morph: MetaPolarNeg | Meta3Sg},
435 "didn't": {Lemma: "do", Morph: MetaTensePast | MetaPolarNeg},
436 "isn't": {Lemma: "be", Morph: MetaPolarNeg | Meta3Sg},
437 "aren't": {Lemma: "be", Morph: MetaPolarNeg},
438 "wasn't": {Lemma: "be", Morph: MetaTensePast | MetaPolarNeg},
439 "weren't": {Lemma: "be", Morph: MetaTensePast | MetaPolarNeg},
440 "haven't": {Lemma: "have", Morph: MetaPolarNeg},
441 "hasn't": {Lemma: "have", Morph: MetaPolarNeg | Meta3Sg},
442 "hadn't": {Lemma: "have", Morph: MetaTensePast | MetaPolarNeg},
443 "couldn't": {Lemma: "can", Morph: MetaTensePast | MetaPolarNeg},
444 "wouldn't": {Lemma: "will", Morph: MetaTensePast | MetaPolarNeg},
445 "shouldn't": {Lemma: "shall", Morph: MetaTensePast | MetaPolarNeg},
446
447 // CVC-rule overrides: multi-syllable verbs whose -ed/-ing forms
448 // produce wrong lemma because the CVC heuristic adds a spurious -e.
449 // (happen -> happene, open -> opene, etc.)
450 "happened": {Lemma: "happen", Morph: MetaTensePast},
451 "happening": {Lemma: "happen", Morph: MetaAspectProg},
452 "opened": {Lemma: "open", Morph: MetaTensePast},
453 "opening": {Lemma: "open", Morph: MetaAspectProg},
454 "listened": {Lemma: "listen", Morph: MetaTensePast},
455 "listening": {Lemma: "listen", Morph: MetaAspectProg},
456 "offered": {Lemma: "offer", Morph: MetaTensePast},
457 "offering": {Lemma: "offer", Morph: MetaAspectProg},
458 "differed": {Lemma: "differ", Morph: MetaTensePast},
459 "suffered": {Lemma: "suffer", Morph: MetaTensePast},
460 "suffering": {Lemma: "suffer", Morph: MetaAspectProg},
461 "wandered": {Lemma: "wander", Morph: MetaTensePast},
462 "wandering": {Lemma: "wander", Morph: MetaAspectProg},
463 "wondered": {Lemma: "wonder", Morph: MetaTensePast},
464 "wondering": {Lemma: "wonder", Morph: MetaAspectProg},
465 "ordered": {Lemma: "order", Morph: MetaTensePast},
466 "ordering": {Lemma: "order", Morph: MetaAspectProg},
467 "murdered": {Lemma: "murder", Morph: MetaTensePast},
468 "answered": {Lemma: "answer", Morph: MetaTensePast},
469 "answering": {Lemma: "answer", Morph: MetaAspectProg},
470 "entered": {Lemma: "enter", Morph: MetaTensePast},
471 "entering": {Lemma: "enter", Morph: MetaAspectProg},
472 "remembered": {Lemma: "remember", Morph: MetaTensePast},
473 "remembering": {Lemma: "remember", Morph: MetaAspectProg},
474 "considered": {Lemma: "consider", Morph: MetaTensePast},
475 "considering": {Lemma: "consider", Morph: MetaAspectProg},
476 "discovered": {Lemma: "discover", Morph: MetaTensePast},
477 "discovering": {Lemma: "discover", Morph: MetaAspectProg},
478 "delivered": {Lemma: "deliver", Morph: MetaTensePast},
479 "developed": {Lemma: "develop", Morph: MetaTensePast},
480 "developing": {Lemma: "develop", Morph: MetaAspectProg},
481 "gathered": {Lemma: "gather", Morph: MetaTensePast},
482 "gathering": {Lemma: "gather", Morph: MetaAspectProg},
483 "threatened": {Lemma: "threaten", Morph: MetaTensePast},
484 "threatening": {Lemma: "threaten", Morph: MetaAspectProg},
485 "strengthened":{Lemma: "strengthen", Morph: MetaTensePast},
486 "loosened": {Lemma: "loosen", Morph: MetaTensePast},
487 "sharpened": {Lemma: "sharpen", Morph: MetaTensePast},
488 "flattened": {Lemma: "flatten", Morph: MetaTensePast},
489 "softened": {Lemma: "soften", Morph: MetaTensePast},
490 "modeled": {Lemma: "model", Morph: MetaTensePast},
491 "modeling": {Lemma: "model", Morph: MetaAspectProg},
492 "traveled": {Lemma: "travel", Morph: MetaTensePast},
493 "traveling": {Lemma: "travel", Morph: MetaAspectProg},
494 "cancelled": {Lemma: "cancel", Morph: MetaTensePast},
495 "canceling": {Lemma: "cancel", Morph: MetaAspectProg},
496
497 // Pronoun objective case: collapse to nominative.
498 // Only objective forms, not possessives (my/your/his/her/their/its serve as
499 // modifiers and are distinct atoms from the subject pronoun).
500 "me": {Lemma: "i"},
501 "myself": {Lemma: "i"},
502 "him": {Lemma: "he"},
503 "himself": {Lemma: "he"},
504 "herself": {Lemma: "she"},
505 "us": {Lemma: "we"},
506 "ourselves": {Lemma: "we"},
507 "them": {Lemma: "they"},
508 "themselves": {Lemma: "they"},
509 "yourself": {Lemma: "you"},
510 "itself": {Lemma: "it"},
511 // Pronoun contractions: collapse to the pronoun. The verb part is
512 // auxiliary (be/have/will) whose morph belongs on the clause's main verb.
513 // The pronoun IS the atom; the auxiliary is structural, not lexical.
514 "i'm": {Lemma: "i"},
515 "i've": {Lemma: "i"},
516 "i'll": {Lemma: "i"},
517 "i'd": {Lemma: "i"},
518 "it's": {Lemma: "it"},
519 "he's": {Lemma: "he"},
520 "she's": {Lemma: "she"},
521 "we're": {Lemma: "we"},
522 "we've": {Lemma: "we"},
523 "we'll": {Lemma: "we"},
524 "they're": {Lemma: "they"},
525 "they've": {Lemma: "they"},
526 "they'll": {Lemma: "they"},
527 "you're": {Lemma: "you"},
528 "you've": {Lemma: "you"},
529 "you'll": {Lemma: "you"},
530 "let's": {Lemma: "let", Morph: MetaMoodVol},
531 "that's": {Lemma: "that"},
532 "there's": {Lemma: "there"},
533 "what's": {Lemma: "what"},
534 "who's": {Lemma: "who"},
535 "here's": {Lemma: "here"},
536
537 // -ing irregulars: ie->y before -ing, and digraph+silent-e cases
538 "dying": {Lemma: "die", Morph: MetaAspectProg},
539 "lying": {Lemma: "lie", Morph: MetaAspectProg},
540 "tying": {Lemma: "tie", Morph: MetaAspectProg},
541 "choosing": {Lemma: "choose", Morph: MetaAspectProg},
542 "going": {Lemma: "go", Morph: MetaAspectProg},
543 "doing": {Lemma: "do", Morph: MetaAspectProg},
544 // Informal contractions
545 "gonna": {Lemma: "go", Morph: MetaMoodVol},
546 "wanna": {Lemma: "want", Morph: MetaMoodVol},
547 "gotta": {Lemma: "get", Morph: MetaMoodVol},
548 "kinda": {Lemma: "kind"},
549 "sorta": {Lemma: "sort"},
550 "lemme": {Lemma: "let"},
551 "gimme": {Lemma: "give"},
552 // Words naturally ending in -ing (not progressive forms)
553 "thing": {Lemma: "thing"},
554 "nothing": {Lemma: "nothing"},
555 "something": {Lemma: "something"},
556 "anything": {Lemma: "anything"},
557 "everything": {Lemma: "everything"},
558 "morning": {Lemma: "morning"},
559 "evening": {Lemma: "evening"},
560 "spring": {Lemma: "spring"},
561 "string": {Lemma: "string"},
562 "during": {Lemma: "during"},
563 "ceiling": {Lemma: "ceiling"},
564 "king": {Lemma: "king"},
565 // Adverbs ending in -s (not plurals)
566 "always": {Lemma: "always"},
567 "sometimes": {Lemma: "sometimes"},
568 "perhaps": {Lemma: "perhaps"},
569 "thus": {Lemma: "thus"},
570 "besides": {Lemma: "besides"},
571 "nowadays": {Lemma: "nowadays"},
572 "towards": {Lemma: "towards"},
573 "upstairs": {Lemma: "upstairs"},
574 "downstairs": {Lemma: "downstairs"},
575 "outdoors": {Lemma: "outdoors"},
576 // Common -ed words with natural double consonants
577 "guessed": {Lemma: "guess", Morph: MetaTensePast},
578 "passed": {Lemma: "pass", Morph: MetaTensePast},
579 "missed": {Lemma: "miss", Morph: MetaTensePast},
580 "kissed": {Lemma: "kiss", Morph: MetaTensePast},
581 "crossed": {Lemma: "cross", Morph: MetaTensePast},
582 "dressed": {Lemma: "dress", Morph: MetaTensePast},
583 "pressed": {Lemma: "press", Morph: MetaTensePast},
584 "stressed": {Lemma: "stress", Morph: MetaTensePast},
585 // -ed words needing silent-e restoration
586 "phrased": {Lemma: "phrase", Morph: MetaTensePast},
587 "increased": {Lemma: "increase", Morph: MetaTensePast},
588 "decreased": {Lemma: "decrease", Morph: MetaTensePast},
589 "pleased": {Lemma: "please", Morph: MetaTensePast},
590 "released": {Lemma: "release", Morph: MetaTensePast},
591 "surprised": {Lemma: "surprise", Morph: MetaTensePast},
592
593 // Comparative forms (Adj-er) mapped to base + MetaCompare. The renderer
594 // reverse-lookup by (lemma, MetaCompare) produces the surface form.
595 "bigger": {Lemma: "big", Morph: MetaCompare},
596 "smaller": {Lemma: "small", Morph: MetaCompare},
597 "taller": {Lemma: "tall", Morph: MetaCompare},
598 "shorter": {Lemma: "short", Morph: MetaCompare},
599 "longer": {Lemma: "long", Morph: MetaCompare},
600 "wider": {Lemma: "wide", Morph: MetaCompare},
601 "thicker": {Lemma: "thick", Morph: MetaCompare},
602 "thinner": {Lemma: "thin", Morph: MetaCompare},
603 "higher": {Lemma: "high", Morph: MetaCompare},
604 "lower": {Lemma: "low", Morph: MetaCompare},
605 "hotter": {Lemma: "hot", Morph: MetaCompare},
606 "colder": {Lemma: "cold", Morph: MetaCompare},
607 "warmer": {Lemma: "warm", Morph: MetaCompare},
608 "cooler": {Lemma: "cool", Morph: MetaCompare},
609 "faster": {Lemma: "fast", Morph: MetaCompare},
610 "slower": {Lemma: "slow", Morph: MetaCompare},
611 "older": {Lemma: "old", Morph: MetaCompare},
612 "younger": {Lemma: "young", Morph: MetaCompare},
613 "happier": {Lemma: "happy", Morph: MetaCompare},
614 "sadder": {Lemma: "sad", Morph: MetaCompare},
615 "easier": {Lemma: "easy", Morph: MetaCompare},
616 "harder": {Lemma: "hard", Morph: MetaCompare},
617 "larger": {Lemma: "large", Morph: MetaCompare},
618 "better": {Lemma: "good", Morph: MetaCompare},
619 "worse": {Lemma: "bad", Morph: MetaCompare},
620 "stronger": {Lemma: "strong", Morph: MetaCompare},
621 "weaker": {Lemma: "weak", Morph: MetaCompare},
622 "heavier": {Lemma: "heavy", Morph: MetaCompare},
623 "lighter": {Lemma: "light", Morph: MetaCompare},
624 "cheaper": {Lemma: "cheap", Morph: MetaCompare},
625 "richer": {Lemma: "rich", Morph: MetaCompare},
626 "poorer": {Lemma: "poor", Morph: MetaCompare},
627 "nicer": {Lemma: "nice", Morph: MetaCompare},
628 "newer": {Lemma: "new", Morph: MetaCompare},
629 }
630 }
631