morph.mx raw

   1  package transdb
   2  
   3  // JA verb conjugation tables for use in both ingest (generating lattice records)
   4  // and translation (navigating the cluster at runtime).
   5  //
   6  // verbPattern defines suffix transforms for one verb class.
   7  // stem = dictForm with dictSuf stripped from the end.
   8  type VerbPattern struct {
   9  	DictSuf string // strip + reattach for state 0
  10  	Masu    string // masu-stem insert (empty for ichidan)
  11  	Nai     string // nai-stem insert  (empty for ichidan)
  12  	Ta      string // plain past suffix
  13  	Te      string // te-form suffix for progressive
  14  }
  15  
  16  // VerbPatterns maps JMdict POS code → conjugation pattern.
  17  var VerbPatterns = map[string]VerbPattern{
  18  	"v1":    {"る", "", "", "た", "て"},
  19  	"v1-s":  {"る", "", "", "た", "て"},
  20  	"v5k":   {"く", "き", "か", "いた", "いて"},
  21  	"v5k-s": {"く", "き", "か", "いた", "いて"},
  22  	"v5g":   {"ぐ", "ぎ", "が", "いだ", "いで"},
  23  	"v5s":   {"す", "し", "さ", "した", "して"},
  24  	"v5m":   {"む", "み", "ま", "んだ", "んで"},
  25  	"v5n":   {"ぬ", "に", "な", "んだ", "んで"},
  26  	"v5b":   {"ぶ", "び", "ば", "んだ", "んで"},
  27  	"v5r":   {"る", "り", "ら", "った", "って"},
  28  	"v5r-i": {"る", "り", "ら", "った", "って"},
  29  	"v5t":   {"つ", "ち", "た", "った", "って"},
  30  	"v5u":   {"う", "い", "わ", "った", "って"},
  31  	"v5u-s": {"う", "い", "わ", "った", "って"},
  32  	"v5aru": {"る", "り", "ら", "った", "って"},
  33  }
  34  
  35  // cp returns a fresh allocation, preventing string=[]byte aliasing.
  36  func Cp(s string) string {
  37  	return string(append([]byte(nil), []byte(s)...))
  38  }
  39  
  40  // BuildVerbForms returns MorphState→surface form for all valid states of a verb.
  41  // State 0 (dictionary form) is included.
  42  func BuildVerbForms(dictForm, verbClass string) map[uint8]string {
  43  	out := map[uint8]string{}
  44  
  45  	switch verbClass {
  46  	case "vk":
  47  		if dictForm == "くる" {
  48  			for state, form := range kuruStateForms {
  49  				out[state] = form
  50  			}
  51  		}
  52  
  53  	case "vs", "vs-i", "vs-s", "vs-c":
  54  		suruSuf := "する"
  55  		prefix := ""
  56  		if len(dictForm) > len(suruSuf) {
  57  			prefix = dictForm[:len(dictForm)-len(suruSuf)]
  58  		}
  59  		for i, state := range suruStateOrder {
  60  			out[state] = Cp(prefix | suruFormSuffixes[i])
  61  		}
  62  
  63  	default:
  64  		pat, ok := VerbPatterns[verbClass]
  65  		if !ok {
  66  			return nil
  67  		}
  68  		sufLen := len(pat.DictSuf)
  69  		if len(dictForm) <= sufLen {
  70  			return nil
  71  		}
  72  		stem := dictForm[:len(dictForm)-sufLen]
  73  		addVerbForms(out, stem, pat)
  74  	}
  75  
  76  	return out
  77  }
  78  
  79  func addVerbForms(out map[uint8]string, stem string, p VerbPattern) {
  80  	out[MorphPresAffPlain]  = Cp(stem | p.DictSuf)
  81  	out[MorphPresAffPolite] = Cp(stem | p.Masu | "ます")
  82  	out[MorphPresNegPlain]  = Cp(stem | p.Nai | "ない")
  83  	out[MorphPresNegPolite] = Cp(stem | p.Masu | "ません")
  84  	out[MorphPastAffPlain]  = Cp(stem | p.Ta)
  85  	out[MorphPastAffPolite] = Cp(stem | p.Masu | "ました")
  86  	out[MorphPastNegPlain]  = Cp(stem | p.Nai | "なかった")
  87  	out[MorphPastNegPolite] = Cp(stem | p.Masu | "ませんでした")
  88  	if p.Te != "" {
  89  		te := Cp(stem | p.Te)
  90  		out[MorphPresProgPlain]  = Cp(te | "いる")
  91  		out[MorphPresProgPolite] = Cp(te | "います")
  92  		out[MorphPresProgNeg]    = Cp(te | "いない")
  93  		out[MorphPastProgPlain]  = Cp(te | "いた")
  94  		out[MorphPastProgPolite] = Cp(te | "いました")
  95  		out[MorphPastProgNeg]    = Cp(te | "いなかった")
  96  	}
  97  	if base := out[MorphPresAffPlain]; base != "" {
  98  		out[MorphPresReported] = Cp(base | "そうだ")
  99  	}
 100  	if past := out[MorphPastAffPlain]; past != "" {
 101  		out[MorphPastReported] = Cp(past | "そうだ")
 102  	}
 103  }
 104  
 105  var suruStateOrder = []uint8{
 106  	MorphPresAffPlain, MorphPresAffPolite, MorphPresNegPlain, MorphPresNegPolite,
 107  	MorphPastAffPlain, MorphPastAffPolite, MorphPastNegPlain, MorphPastNegPolite,
 108  	MorphPresProgPlain, MorphPresProgPolite, MorphPresProgNeg,
 109  	MorphPastProgPlain, MorphPastProgPolite, MorphPastProgNeg,
 110  	MorphPresReported, MorphPastReported,
 111  }
 112  
 113  var suruFormSuffixes = []string{
 114  	"する", "します", "しない", "しません",
 115  	"した", "しました", "しなかった", "しませんでした",
 116  	"している", "しています", "していない",
 117  	"していた", "していました", "していなかった",
 118  	"するそうだ", "したそうだ",
 119  }
 120  
 121  var kuruStateForms = map[uint8]string{
 122  	MorphPresAffPlain: "くる", MorphPresAffPolite: "きます",
 123  	MorphPresNegPlain: "こない", MorphPresNegPolite: "きません",
 124  	MorphPresProgPlain: "きている", MorphPresProgPolite: "きています",
 125  	MorphPresProgNeg: "きていない",
 126  	MorphPastAffPlain: "きた", MorphPastAffPolite: "きました",
 127  	MorphPastNegPlain: "こなかった", MorphPastNegPolite: "きませんでした",
 128  	MorphPastProgPlain: "きていた", MorphPastProgPolite: "きていました",
 129  	MorphPastProgNeg: "きていなかった",
 130  	MorphPresReported: "くるそうだ", MorphPastReported: "きたそうだ",
 131  }
 132