lemma_en.mx raw

   1  package iskra
   2  
   3  // LemmaResult is the output of lemmatization.
   4  type LemmaResult struct {
   5  	Lemma string // region center (stem/dict form)
   6  	Morph uint16 // morph bits (see Meta* constants)
   7  	Class uint8  // verb class (for verbs); 0=non-verb
   8  }
   9  
  10  // Morph bit layout (16 bits).
  11  // Low byte: cross-language inflection (tense/aspect/etc).
  12  // High byte: derivational/voice morphology (passive, causative, etc).
  13  //
  14  //   bit  0: tense        0=non-past    1=past
  15  //   bit  1: aspect       0=simple      1=progressive
  16  //   bit  2: polarity     0=affirm      1=negative
  17  //   bit  3: formality    0=plain       1=polite (JA ます-form)
  18  //   bit  4: number       0=singular    1=plural
  19  //   bit  5: definiteness 0=indef       1=def
  20  //   bit  6: mood         0=indicative  1=volitional (let's...)
  21  //   bit  7: person/3sg   0=unmarked    1=3rd-singular (EN -s on verbs)
  22  //   bit  8: voice        0=active      1=passive (JA れる/られる, EN be+V-ed)
  23  //   bit  9: voice        0=non-causa   1=causative (JA せる/させる, EN make/have)
  24  //   bits 10-15: reserved
  25  const (
  26  	MetaTensePast    uint16 = 1 << 0
  27  	MetaAspectProg   uint16 = 1 << 1
  28  	MetaPolarNeg     uint16 = 1 << 2
  29  	MetaFormalityPol uint16 = 1 << 3
  30  	MetaNumPlural    uint16 = 1 << 4
  31  	MetaDefDef       uint16 = 1 << 5
  32  	MetaMoodVol      uint16 = 1 << 6
  33  	Meta3Sg          uint16 = 1 << 7
  34  	MetaPassive      uint16 = 1 << 8
  35  	MetaCausative    uint16 = 1 << 9
  36  	MetaCompare      uint16 = 1 << 10 // comparative (bigger / 〜より大きい)
  37  )
  38  
  39  // Verb classes (used for JA conjugation reconstruction).
  40  const (
  41  	VClassNone     uint8 = 0
  42  	VClassIchidan  uint8 = 1
  43  	VClassGodanKu  uint8 = 2 // 行く
  44  	VClassGodanGu  uint8 = 3 // 泳ぐ
  45  	VClassGodanSu  uint8 = 4 // 話す
  46  	VClassGodanTsu uint8 = 5 // 立つ
  47  	VClassGodanNu  uint8 = 6 // 死ぬ
  48  	VClassGodanBu  uint8 = 7 // 遊ぶ
  49  	VClassGodanMu  uint8 = 8 // 飲む
  50  	VClassGodanRu  uint8 = 9 // 売る
  51  	VClassGodanU   uint8 = 10 // 買う
  52  	VClassSuru     uint8 = 11 // する (irregular)
  53  	VClassKuru     uint8 = 12 // 来る (irregular)
  54  	VClassIAdj     uint8 = 13 // i-adjective (赤い, 楽しい)
  55  	VClassBare     uint8 = 14 // not a verb - emit unchanged
  56  )
  57  
  58  // Kept for backwards-compat; person field was previously bits 5-7.
  59  const MetaPersonShift = 7
  60  
  61  func (r *Registry) RegisterPhrasalVerb(s string) {
  62  	r.phrasalVerbs[s] = true
  63  }
  64  
  65  func (r *Registry) PhrasalVerbCount() int32 {
  66  	return len(r.phrasalVerbs)
  67  }
  68  
  69  func (r *Registry) HeadWordEN(atom string) string {
  70  	if r.phrasalVerbs[atom] {
  71  		return atom
  72  	}
  73  	for i := 0; i < len(atom); i++ {
  74  		if atom[i] == ' ' {
  75  			return atom[:i]
  76  		}
  77  	}
  78  	return atom
  79  }
  80  
  81  // LemmatizeEN reduces an EN surface form to its lemma and morph state.
  82  // Only strips inflectional suffixes; derivational morphology preserved.
  83  func LemmatizeEN(word string) LemmaResult {
  84  	// Untranslated-atom marker is a sealed sentinel; pass through unchanged.
  85  	if word == UntranslatedMarker {
  86  		return LemmaResult{Lemma: word, Morph: 0}
  87  	}
  88  	low := toLowerEN(word)
  89  
  90  	// Check irregular table first.
  91  	if entry, ok := enIrregular()[low]; ok {
  92  		return entry
  93  	}
  94  
  95  	// Progressive: -ing
  96  	if len(low) > 4 && hasSuffix(low, "ing") {
  97  		stem := stripIng(low)
  98  		if stem != "" {
  99  			return LemmaResult{Lemma: stem, Morph: MetaAspectProg}
 100  		}
 101  	}
 102  
 103  	// Past/participle: -ed
 104  	if len(low) > 3 && hasSuffix(low, "ed") {
 105  		stem := stripEd(low)
 106  		if stem != "" {
 107  			return LemmaResult{Lemma: stem, Morph: MetaTensePast}
 108  		}
 109  	}
 110  
 111  	// 3rd person singular: -es, -ies, -s
 112  	if len(low) > 3 && hasSuffix(low, "ies") {
 113  		stem := low[:len(low)-3] | "y"
 114  		return LemmaResult{Lemma: stem, Morph: Meta3Sg}
 115  	}
 116  	if len(low) > 3 && hasSuffix(low, "es") {
 117  		base := low[:len(low)-2]
 118  		last := base[len(base)-1]
 119  		if last == 's' || last == 'x' || last == 'z' || hasSuffix(base, "sh") || hasSuffix(base, "ch") {
 120  			return LemmaResult{Lemma: base, Morph: Meta3Sg}
 121  		}
 122  		return LemmaResult{Lemma: low[:len(low)-1], Morph: Meta3Sg}
 123  	}
 124  	if len(low) > 2 && low[len(low)-1] == 's' && low[len(low)-2] != 's' &&
 125  		low[len(low)-2] != '\'' {
 126  		// Avoid stripping from words like "less", "miss", "boss".
 127  		// Apostrophe-s contractions (let's, it's, he's) are not plurals.
 128  		c := low[len(low)-2]
 129  		if c != 'u' && c != 'i' { // avoid "bus" -> "bu", "this" -> "thi"
 130  			return LemmaResult{Lemma: low[:len(low)-1], Morph: MetaNumPlural}
 131  		}
 132  	}
 133  
 134  	return LemmaResult{Lemma: low, Morph: 0}
 135  }
 136  
 137  func stripIng(word string) string {
 138  	// word ends in "ing", len > 4
 139  	base := word[:len(word)-3]
 140  	if len(base) < 3 {
 141  		return word
 142  	}
 143  	last := base[len(base)-1]
 144  
 145  	// Double consonant: running -> run, sitting -> sit
 146  	if len(base) > 2 && base[len(base)-1] == base[len(base)-2] && isConsonant(last) {
 147  		return base[:len(base)-1]
 148  	}
 149  
 150  	if !isConsonant(last) || last == 'e' {
 151  		return base
 152  	}
 153  
 154  	// 'y' at word-end acts as vowel: play, stay, enjoy
 155  	if last == 'y' {
 156  		return base
 157  	}
 158  
 159  	// English words never end in bare 'c', 'v', or 'z' - always silent-e
 160  	if last == 'c' || last == 'v' || last == 'z' {
 161  		return base | "e"
 162  	}
 163  
 164  	// Two consonants at end: talk, walk, work, help, want, think
 165  	if len(base) >= 2 && isConsonant(base[len(base)-2]) {
 166  		return base
 167  	}
 168  
 169  	// Vowel-vowel-consonant (digraph): eat, beat, read, clean, meet
 170  	if len(base) >= 3 && !isConsonant(base[len(base)-2]) && !isConsonant(base[len(base)-3]) {
 171  		return base
 172  	}
 173  
 174  	// Single vowel + consonant (CVC): make, give, take, come, write
 175  	return base | "e"
 176  }
 177  
 178  func stripEd(word string) string {
 179  	// word ends in "ed", len > 3
 180  	base := word[:len(word)-2]
 181  	last := base[len(base)-1]
 182  
 183  	// -ied: tried -> try
 184  	if last == 'i' && len(base) > 1 {
 185  		return base[:len(base)-1] | "y"
 186  	}
 187  
 188  	// Double consonant: stopped -> stop
 189  	// Guard against natural double-s: guessed -> guess (not gues)
 190  	if len(base) > 2 && base[len(base)-1] == base[len(base)-2] && isConsonant(last) {
 191  		// If char before the double is a vowel, the double was likely added
 192  		// for -ed (stopped, dropped). If consonant, the double is natural
 193  		// (guessed, passed, missed).
 194  		if len(base) > 2 && !isConsonant(base[len(base)-3]) {
 195  			return base[:len(base)-1]
 196  		}
 197  		return base
 198  	}
 199  
 200  	// If already ends in 'e': danced -> dance (strip only 'd')
 201  	if last == 'e' {
 202  		return base
 203  	}
 204  
 205  	if !isConsonant(last) {
 206  		return base
 207  	}
 208  
 209  	// English words never end in bare 'c', 'v', or 'z' - always silent-e
 210  	if last == 'c' || last == 'v' || last == 'z' {
 211  		return base | "e"
 212  	}
 213  
 214  	// Two consonants at end: walked, helped, worked
 215  	if len(base) >= 2 && isConsonant(base[len(base)-2]) {
 216  		return base
 217  	}
 218  
 219  	// Vowel-vowel-consonant (digraph): cleaned, feared, appeared
 220  	if len(base) >= 3 && !isConsonant(base[len(base)-2]) && !isConsonant(base[len(base)-3]) {
 221  		return base
 222  	}
 223  
 224  	// Single vowel + consonant (CVC): hoped -> hope, used -> use
 225  	return base | "e"
 226  }
 227  
 228  func isConsonant(c byte) bool {
 229  	switch c {
 230  	case 'a', 'e', 'i', 'o', 'u':
 231  		return false
 232  	}
 233  	return c >= 'a' && c <= 'z'
 234  }
 235  
 236  // enIrregular maps surface forms to their lemma + morph state.
 237  func enIrregular() map[string]LemmaResult {
 238  	return map[string]LemmaResult{
 239  	// be
 240  	"am": {Lemma: "be"},
 241  	"is": {Lemma: "be", Morph: Meta3Sg},
 242  	"are": {Lemma: "be"},
 243  	"was": {Lemma: "be", Morph: MetaTensePast | Meta3Sg},
 244  	"were": {Lemma: "be", Morph: MetaTensePast},
 245  	"been": {Lemma: "be", Morph: MetaTensePast | MetaAspectProg},
 246  	"being": {Lemma: "be", Morph: MetaAspectProg},
 247  	// have
 248  	"has": {Lemma: "have", Morph: Meta3Sg},
 249  	"had": {Lemma: "have", Morph: MetaTensePast},
 250  	"having": {Lemma: "have", Morph: MetaAspectProg},
 251  	// do
 252  	"does": {Lemma: "do", Morph: Meta3Sg},
 253  	"did": {Lemma: "do", Morph: MetaTensePast},
 254  	"done": {Lemma: "do", Morph: MetaTensePast},
 255  	// go
 256  	"went": {Lemma: "go", Morph: MetaTensePast},
 257  	"gone": {Lemma: "go", Morph: MetaTensePast},
 258  	"goes": {Lemma: "go", Morph: Meta3Sg},
 259  	// get
 260  	"got": {Lemma: "get", Morph: MetaTensePast},
 261  	"gotten": {Lemma: "get", Morph: MetaTensePast},
 262  	// make
 263  	"made": {Lemma: "make", Morph: MetaTensePast},
 264  	// take
 265  	"took": {Lemma: "take", Morph: MetaTensePast},
 266  	"taken": {Lemma: "take", Morph: MetaTensePast},
 267  	// come
 268  	"came": {Lemma: "come", Morph: MetaTensePast},
 269  	// see
 270  	"saw": {Lemma: "see", Morph: MetaTensePast},
 271  	"seen": {Lemma: "see", Morph: MetaTensePast},
 272  	// know
 273  	"knew": {Lemma: "know", Morph: MetaTensePast},
 274  	"known": {Lemma: "know", Morph: MetaTensePast},
 275  	// give
 276  	"gave": {Lemma: "give", Morph: MetaTensePast},
 277  	"given": {Lemma: "give", Morph: MetaTensePast},
 278  	// say
 279  	"said": {Lemma: "say", Morph: MetaTensePast},
 280  	// tell
 281  	"told": {Lemma: "tell", Morph: MetaTensePast},
 282  	// think
 283  	"thought": {Lemma: "think", Morph: MetaTensePast},
 284  	// find
 285  	"found": {Lemma: "find", Morph: MetaTensePast},
 286  	// leave
 287  	"left": {Lemma: "leave", Morph: MetaTensePast},
 288  	// feel
 289  	"felt": {Lemma: "feel", Morph: MetaTensePast},
 290  	// become
 291  	"became": {Lemma: "become", Morph: MetaTensePast},
 292  	// keep
 293  	"kept": {Lemma: "keep", Morph: MetaTensePast},
 294  	// begin
 295  	"began": {Lemma: "begin", Morph: MetaTensePast},
 296  	"begun": {Lemma: "begin", Morph: MetaTensePast},
 297  	// hear
 298  	"heard": {Lemma: "hear", Morph: MetaTensePast},
 299  	// hold
 300  	"held": {Lemma: "hold", Morph: MetaTensePast},
 301  	// bring
 302  	"brought": {Lemma: "bring", Morph: MetaTensePast},
 303  	// write
 304  	"wrote": {Lemma: "write", Morph: MetaTensePast},
 305  	"written": {Lemma: "write", Morph: MetaTensePast},
 306  	// sit
 307  	"sat": {Lemma: "sit", Morph: MetaTensePast},
 308  	// stand
 309  	"stood": {Lemma: "stand", Morph: MetaTensePast},
 310  	// lose
 311  	"lost": {Lemma: "lose", Morph: MetaTensePast},
 312  	// pay
 313  	"paid": {Lemma: "pay", Morph: MetaTensePast},
 314  	// meet
 315  	"met": {Lemma: "meet", Morph: MetaTensePast},
 316  	// lead
 317  	"led": {Lemma: "lead", Morph: MetaTensePast},
 318  	// understand
 319  	"understood": {Lemma: "understand", Morph: MetaTensePast},
 320  	// speak
 321  	"spoke": {Lemma: "speak", Morph: MetaTensePast},
 322  	"spoken": {Lemma: "speak", Morph: MetaTensePast},
 323  	// grow
 324  	"grew": {Lemma: "grow", Morph: MetaTensePast},
 325  	"grown": {Lemma: "grow", Morph: MetaTensePast},
 326  	// win
 327  	"won": {Lemma: "win", Morph: MetaTensePast},
 328  	// teach
 329  	"taught": {Lemma: "teach", Morph: MetaTensePast},
 330  	// buy
 331  	"bought": {Lemma: "buy", Morph: MetaTensePast},
 332  	// send
 333  	"sent": {Lemma: "send", Morph: MetaTensePast},
 334  	// build
 335  	"built": {Lemma: "build", Morph: MetaTensePast},
 336  	// fall
 337  	"fell": {Lemma: "fall", Morph: MetaTensePast},
 338  	"fallen": {Lemma: "fall", Morph: MetaTensePast},
 339  	// sell
 340  	"sold": {Lemma: "sell", Morph: MetaTensePast},
 341  	// run
 342  	"ran": {Lemma: "run", Morph: MetaTensePast},
 343  	// mean
 344  	"meant": {Lemma: "mean", Morph: MetaTensePast},
 345  	// spend
 346  	"spent": {Lemma: "spend", Morph: MetaTensePast},
 347  	// catch
 348  	"caught": {Lemma: "catch", Morph: MetaTensePast},
 349  	// fly
 350  	"flew": {Lemma: "fly", Morph: MetaTensePast},
 351  	"flown": {Lemma: "fly", Morph: MetaTensePast},
 352  	// eat
 353  	"ate": {Lemma: "eat", Morph: MetaTensePast},
 354  	"eaten": {Lemma: "eat", Morph: MetaTensePast},
 355  	// bite (PP "bitten" gets MetaPassive to distinguish from simple-past "bit")
 356  	"bit":    {Lemma: "bite", Morph: MetaTensePast},
 357  	"bitten": {Lemma: "bite", Morph: MetaTensePast | MetaPassive},
 358  	// drink
 359  	"drank": {Lemma: "drink", Morph: MetaTensePast},
 360  	"drunk": {Lemma: "drink", Morph: MetaTensePast},
 361  	// drive
 362  	"drove": {Lemma: "drive", Morph: MetaTensePast},
 363  	"driven": {Lemma: "drive", Morph: MetaTensePast},
 364  	// sing
 365  	"sang": {Lemma: "sing", Morph: MetaTensePast},
 366  	"sung": {Lemma: "sing", Morph: MetaTensePast},
 367  	// swim
 368  	"swam": {Lemma: "swim", Morph: MetaTensePast},
 369  	"swum": {Lemma: "swim", Morph: MetaTensePast},
 370  	// break
 371  	"broke": {Lemma: "break", Morph: MetaTensePast},
 372  	"broken": {Lemma: "break", Morph: MetaTensePast},
 373  	// choose
 374  	"chose": {Lemma: "choose", Morph: MetaTensePast},
 375  	"chosen": {Lemma: "choose", Morph: MetaTensePast},
 376  	// draw
 377  	"drew": {Lemma: "draw", Morph: MetaTensePast},
 378  	"drawn": {Lemma: "draw", Morph: MetaTensePast},
 379  	// forget
 380  	"forgot": {Lemma: "forget", Morph: MetaTensePast},
 381  	"forgotten": {Lemma: "forget", Morph: MetaTensePast},
 382  	// freeze
 383  	"froze": {Lemma: "freeze", Morph: MetaTensePast},
 384  	"frozen": {Lemma: "freeze", Morph: MetaTensePast},
 385  	// hide
 386  	"hid": {Lemma: "hide", Morph: MetaTensePast},
 387  	"hidden": {Lemma: "hide", Morph: MetaTensePast},
 388  	// lie (recline)
 389  	"lay": {Lemma: "lie", Morph: MetaTensePast},
 390  	"lain": {Lemma: "lie", Morph: MetaTensePast},
 391  	// ride
 392  	"rode": {Lemma: "ride", Morph: MetaTensePast},
 393  	"ridden": {Lemma: "ride", Morph: MetaTensePast},
 394  	// rise
 395  	"rose": {Lemma: "rise", Morph: MetaTensePast},
 396  	"risen": {Lemma: "rise", Morph: MetaTensePast},
 397  	// shake
 398  	"shook": {Lemma: "shake", Morph: MetaTensePast},
 399  	"shaken": {Lemma: "shake", Morph: MetaTensePast},
 400  	// steal
 401  	"stole": {Lemma: "steal", Morph: MetaTensePast},
 402  	"stolen": {Lemma: "steal", Morph: MetaTensePast},
 403  	// tear
 404  	"tore": {Lemma: "tear", Morph: MetaTensePast},
 405  	"torn": {Lemma: "tear", Morph: MetaTensePast},
 406  	// throw
 407  	"threw": {Lemma: "throw", Morph: MetaTensePast},
 408  	"thrown": {Lemma: "throw", Morph: MetaTensePast},
 409  	// wake
 410  	"woke": {Lemma: "wake", Morph: MetaTensePast},
 411  	"woken": {Lemma: "wake", Morph: MetaTensePast},
 412  	// wear
 413  	"wore": {Lemma: "wear", Morph: MetaTensePast},
 414  	"worn": {Lemma: "wear", Morph: MetaTensePast},
 415  	// irregular nouns
 416  	"children": {Lemma: "child", Morph: MetaNumPlural},
 417  	"men": {Lemma: "man", Morph: MetaNumPlural},
 418  	"women": {Lemma: "woman", Morph: MetaNumPlural},
 419  	"people": {Lemma: "person", Morph: MetaNumPlural},
 420  	"mice": {Lemma: "mouse", Morph: MetaNumPlural},
 421  	"feet": {Lemma: "foot", Morph: MetaNumPlural},
 422  	"teeth": {Lemma: "tooth", Morph: MetaNumPlural},
 423  	"geese": {Lemma: "goose", Morph: MetaNumPlural},
 424  	"lives": {Lemma: "life", Morph: MetaNumPlural},
 425  	"wives": {Lemma: "wife", Morph: MetaNumPlural},
 426  	"knives": {Lemma: "knife", Morph: MetaNumPlural},
 427  	"wolves": {Lemma: "wolf", Morph: MetaNumPlural},
 428  	"halves": {Lemma: "half", Morph: MetaNumPlural},
 429  	"leaves": {Lemma: "leaf", Morph: MetaNumPlural},
 430  	// modals/negative contractions
 431  	"can't": {Lemma: "can", Morph: MetaPolarNeg},
 432  	"won't": {Lemma: "will", Morph: MetaPolarNeg},
 433  	"don't": {Lemma: "do", Morph: MetaPolarNeg},
 434  	"doesn't": {Lemma: "do", Morph: MetaPolarNeg | Meta3Sg},
 435  	"didn't": {Lemma: "do", Morph: MetaTensePast | MetaPolarNeg},
 436  	"isn't": {Lemma: "be", Morph: MetaPolarNeg | Meta3Sg},
 437  	"aren't": {Lemma: "be", Morph: MetaPolarNeg},
 438  	"wasn't": {Lemma: "be", Morph: MetaTensePast | MetaPolarNeg},
 439  	"weren't": {Lemma: "be", Morph: MetaTensePast | MetaPolarNeg},
 440  	"haven't": {Lemma: "have", Morph: MetaPolarNeg},
 441  	"hasn't": {Lemma: "have", Morph: MetaPolarNeg | Meta3Sg},
 442  	"hadn't": {Lemma: "have", Morph: MetaTensePast | MetaPolarNeg},
 443  	"couldn't": {Lemma: "can", Morph: MetaTensePast | MetaPolarNeg},
 444  	"wouldn't": {Lemma: "will", Morph: MetaTensePast | MetaPolarNeg},
 445  	"shouldn't": {Lemma: "shall", Morph: MetaTensePast | MetaPolarNeg},
 446  
 447  	// CVC-rule overrides: multi-syllable verbs whose -ed/-ing forms
 448  	// produce wrong lemma because the CVC heuristic adds a spurious -e.
 449  	// (happen -> happene, open -> opene, etc.)
 450  	"happened":    {Lemma: "happen", Morph: MetaTensePast},
 451  	"happening":   {Lemma: "happen", Morph: MetaAspectProg},
 452  	"opened":      {Lemma: "open", Morph: MetaTensePast},
 453  	"opening":     {Lemma: "open", Morph: MetaAspectProg},
 454  	"listened":    {Lemma: "listen", Morph: MetaTensePast},
 455  	"listening":   {Lemma: "listen", Morph: MetaAspectProg},
 456  	"offered":     {Lemma: "offer", Morph: MetaTensePast},
 457  	"offering":    {Lemma: "offer", Morph: MetaAspectProg},
 458  	"differed":    {Lemma: "differ", Morph: MetaTensePast},
 459  	"suffered":    {Lemma: "suffer", Morph: MetaTensePast},
 460  	"suffering":   {Lemma: "suffer", Morph: MetaAspectProg},
 461  	"wandered":    {Lemma: "wander", Morph: MetaTensePast},
 462  	"wandering":   {Lemma: "wander", Morph: MetaAspectProg},
 463  	"wondered":    {Lemma: "wonder", Morph: MetaTensePast},
 464  	"wondering":   {Lemma: "wonder", Morph: MetaAspectProg},
 465  	"ordered":     {Lemma: "order", Morph: MetaTensePast},
 466  	"ordering":    {Lemma: "order", Morph: MetaAspectProg},
 467  	"murdered":    {Lemma: "murder", Morph: MetaTensePast},
 468  	"answered":    {Lemma: "answer", Morph: MetaTensePast},
 469  	"answering":   {Lemma: "answer", Morph: MetaAspectProg},
 470  	"entered":     {Lemma: "enter", Morph: MetaTensePast},
 471  	"entering":    {Lemma: "enter", Morph: MetaAspectProg},
 472  	"remembered":  {Lemma: "remember", Morph: MetaTensePast},
 473  	"remembering": {Lemma: "remember", Morph: MetaAspectProg},
 474  	"considered":  {Lemma: "consider", Morph: MetaTensePast},
 475  	"considering": {Lemma: "consider", Morph: MetaAspectProg},
 476  	"discovered":  {Lemma: "discover", Morph: MetaTensePast},
 477  	"discovering": {Lemma: "discover", Morph: MetaAspectProg},
 478  	"delivered":   {Lemma: "deliver", Morph: MetaTensePast},
 479  	"developed":   {Lemma: "develop", Morph: MetaTensePast},
 480  	"developing":  {Lemma: "develop", Morph: MetaAspectProg},
 481  	"gathered":    {Lemma: "gather", Morph: MetaTensePast},
 482  	"gathering":   {Lemma: "gather", Morph: MetaAspectProg},
 483  	"threatened":  {Lemma: "threaten", Morph: MetaTensePast},
 484  	"threatening": {Lemma: "threaten", Morph: MetaAspectProg},
 485  	"strengthened":{Lemma: "strengthen", Morph: MetaTensePast},
 486  	"loosened":    {Lemma: "loosen", Morph: MetaTensePast},
 487  	"sharpened":   {Lemma: "sharpen", Morph: MetaTensePast},
 488  	"flattened":   {Lemma: "flatten", Morph: MetaTensePast},
 489  	"softened":    {Lemma: "soften", Morph: MetaTensePast},
 490  	"modeled":     {Lemma: "model", Morph: MetaTensePast},
 491  	"modeling":    {Lemma: "model", Morph: MetaAspectProg},
 492  	"traveled":    {Lemma: "travel", Morph: MetaTensePast},
 493  	"traveling":   {Lemma: "travel", Morph: MetaAspectProg},
 494  	"cancelled":   {Lemma: "cancel", Morph: MetaTensePast},
 495  	"canceling":   {Lemma: "cancel", Morph: MetaAspectProg},
 496  
 497  	// Pronoun objective case: collapse to nominative.
 498  	// Only objective forms, not possessives (my/your/his/her/their/its serve as
 499  	// modifiers and are distinct atoms from the subject pronoun).
 500  	"me":      {Lemma: "i"},
 501  	"myself":  {Lemma: "i"},
 502  	"him":     {Lemma: "he"},
 503  	"himself": {Lemma: "he"},
 504  	"herself": {Lemma: "she"},
 505  	"us":      {Lemma: "we"},
 506  	"ourselves": {Lemma: "we"},
 507  	"them":    {Lemma: "they"},
 508  	"themselves": {Lemma: "they"},
 509  	"yourself": {Lemma: "you"},
 510  	"itself":  {Lemma: "it"},
 511  	// Pronoun contractions: collapse to the pronoun. The verb part is
 512  	// auxiliary (be/have/will) whose morph belongs on the clause's main verb.
 513  	// The pronoun IS the atom; the auxiliary is structural, not lexical.
 514  	"i'm":     {Lemma: "i"},
 515  	"i've":    {Lemma: "i"},
 516  	"i'll":    {Lemma: "i"},
 517  	"i'd":     {Lemma: "i"},
 518  	"it's":    {Lemma: "it"},
 519  	"he's":    {Lemma: "he"},
 520  	"she's":   {Lemma: "she"},
 521  	"we're":   {Lemma: "we"},
 522  	"we've":   {Lemma: "we"},
 523  	"we'll":   {Lemma: "we"},
 524  	"they're": {Lemma: "they"},
 525  	"they've": {Lemma: "they"},
 526  	"they'll": {Lemma: "they"},
 527  	"you're":  {Lemma: "you"},
 528  	"you've":  {Lemma: "you"},
 529  	"you'll":  {Lemma: "you"},
 530  	"let's":   {Lemma: "let", Morph: MetaMoodVol},
 531  	"that's":  {Lemma: "that"},
 532  	"there's": {Lemma: "there"},
 533  	"what's":  {Lemma: "what"},
 534  	"who's":   {Lemma: "who"},
 535  	"here's":  {Lemma: "here"},
 536  
 537  	// -ing irregulars: ie->y before -ing, and digraph+silent-e cases
 538  	"dying":    {Lemma: "die", Morph: MetaAspectProg},
 539  	"lying":    {Lemma: "lie", Morph: MetaAspectProg},
 540  	"tying":    {Lemma: "tie", Morph: MetaAspectProg},
 541  	"choosing": {Lemma: "choose", Morph: MetaAspectProg},
 542  	"going":    {Lemma: "go", Morph: MetaAspectProg},
 543  	"doing":    {Lemma: "do", Morph: MetaAspectProg},
 544  	// Informal contractions
 545  	"gonna":  {Lemma: "go", Morph: MetaMoodVol},
 546  	"wanna":  {Lemma: "want", Morph: MetaMoodVol},
 547  	"gotta":  {Lemma: "get", Morph: MetaMoodVol},
 548  	"kinda":  {Lemma: "kind"},
 549  	"sorta":  {Lemma: "sort"},
 550  	"lemme":  {Lemma: "let"},
 551  	"gimme":  {Lemma: "give"},
 552  	// Words naturally ending in -ing (not progressive forms)
 553  	"thing":      {Lemma: "thing"},
 554  	"nothing":    {Lemma: "nothing"},
 555  	"something":  {Lemma: "something"},
 556  	"anything":   {Lemma: "anything"},
 557  	"everything": {Lemma: "everything"},
 558  	"morning":    {Lemma: "morning"},
 559  	"evening":    {Lemma: "evening"},
 560  	"spring":     {Lemma: "spring"},
 561  	"string":     {Lemma: "string"},
 562  	"during":     {Lemma: "during"},
 563  	"ceiling":    {Lemma: "ceiling"},
 564  	"king":       {Lemma: "king"},
 565  	// Adverbs ending in -s (not plurals)
 566  	"always":    {Lemma: "always"},
 567  	"sometimes": {Lemma: "sometimes"},
 568  	"perhaps":   {Lemma: "perhaps"},
 569  	"thus":      {Lemma: "thus"},
 570  	"besides":   {Lemma: "besides"},
 571  	"nowadays":  {Lemma: "nowadays"},
 572  	"towards":   {Lemma: "towards"},
 573  	"upstairs":  {Lemma: "upstairs"},
 574  	"downstairs": {Lemma: "downstairs"},
 575  	"outdoors":  {Lemma: "outdoors"},
 576  	// Common -ed words with natural double consonants
 577  	"guessed":   {Lemma: "guess", Morph: MetaTensePast},
 578  	"passed":    {Lemma: "pass", Morph: MetaTensePast},
 579  	"missed":    {Lemma: "miss", Morph: MetaTensePast},
 580  	"kissed":    {Lemma: "kiss", Morph: MetaTensePast},
 581  	"crossed":   {Lemma: "cross", Morph: MetaTensePast},
 582  	"dressed":   {Lemma: "dress", Morph: MetaTensePast},
 583  	"pressed":   {Lemma: "press", Morph: MetaTensePast},
 584  	"stressed":  {Lemma: "stress", Morph: MetaTensePast},
 585  	// -ed words needing silent-e restoration
 586  	"phrased":   {Lemma: "phrase", Morph: MetaTensePast},
 587  	"increased": {Lemma: "increase", Morph: MetaTensePast},
 588  	"decreased": {Lemma: "decrease", Morph: MetaTensePast},
 589  	"pleased":   {Lemma: "please", Morph: MetaTensePast},
 590  	"released":  {Lemma: "release", Morph: MetaTensePast},
 591  	"surprised": {Lemma: "surprise", Morph: MetaTensePast},
 592  
 593  	// Comparative forms (Adj-er) mapped to base + MetaCompare. The renderer
 594  	// reverse-lookup by (lemma, MetaCompare) produces the surface form.
 595  	"bigger":   {Lemma: "big", Morph: MetaCompare},
 596  	"smaller":  {Lemma: "small", Morph: MetaCompare},
 597  	"taller":   {Lemma: "tall", Morph: MetaCompare},
 598  	"shorter":  {Lemma: "short", Morph: MetaCompare},
 599  	"longer":   {Lemma: "long", Morph: MetaCompare},
 600  	"wider":    {Lemma: "wide", Morph: MetaCompare},
 601  	"thicker":  {Lemma: "thick", Morph: MetaCompare},
 602  	"thinner":  {Lemma: "thin", Morph: MetaCompare},
 603  	"higher":   {Lemma: "high", Morph: MetaCompare},
 604  	"lower":    {Lemma: "low", Morph: MetaCompare},
 605  	"hotter":   {Lemma: "hot", Morph: MetaCompare},
 606  	"colder":   {Lemma: "cold", Morph: MetaCompare},
 607  	"warmer":   {Lemma: "warm", Morph: MetaCompare},
 608  	"cooler":   {Lemma: "cool", Morph: MetaCompare},
 609  	"faster":   {Lemma: "fast", Morph: MetaCompare},
 610  	"slower":   {Lemma: "slow", Morph: MetaCompare},
 611  	"older":    {Lemma: "old", Morph: MetaCompare},
 612  	"younger":  {Lemma: "young", Morph: MetaCompare},
 613  	"happier":  {Lemma: "happy", Morph: MetaCompare},
 614  	"sadder":   {Lemma: "sad", Morph: MetaCompare},
 615  	"easier":   {Lemma: "easy", Morph: MetaCompare},
 616  	"harder":   {Lemma: "hard", Morph: MetaCompare},
 617  	"larger":   {Lemma: "large", Morph: MetaCompare},
 618  	"better":   {Lemma: "good", Morph: MetaCompare},
 619  	"worse":    {Lemma: "bad", Morph: MetaCompare},
 620  	"stronger": {Lemma: "strong", Morph: MetaCompare},
 621  	"weaker":   {Lemma: "weak", Morph: MetaCompare},
 622  	"heavier":  {Lemma: "heavy", Morph: MetaCompare},
 623  	"lighter":  {Lemma: "light", Morph: MetaCompare},
 624  	"cheaper":  {Lemma: "cheap", Morph: MetaCompare},
 625  	"richer":   {Lemma: "rich", Morph: MetaCompare},
 626  	"poorer":   {Lemma: "poor", Morph: MetaCompare},
 627  	"nicer":    {Lemma: "nice", Morph: MetaCompare},
 628  	"newer":    {Lemma: "new", Morph: MetaCompare},
 629  	}
 630  }
 631