package iskra

// volClassFromSuffix returns the godan class implied by a volitional compound
// ending. Returns VClassIchidan for よう (and for unknown patterns).
func volClassFromSuffix(suffix []byte) uint8 {
	switch string(suffix) {
	case "\xe3\x81\x93\xe3\x81\x86":
		return VClassGodanKu
	case "\xe3\x81\x94\xe3\x81\x86":
		return VClassGodanGu
	case "\xe3\x81\x9d\xe3\x81\x86":
		return VClassGodanSu
	case "\xe3\x81\xa8\xe3\x81\x86":
		return VClassGodanTsu
	case "\xe3\x81\xae\xe3\x81\x86":
		return VClassGodanNu
	case "\xe3\x81\xbc\xe3\x81\x86":
		return VClassGodanBu
	case "\xe3\x82\x82\xe3\x81\x86":
		return VClassGodanMu
	case "\xe3\x82\x8d\xe3\x81\x86":
		return VClassGodanRu
	case "\xe3\x81\x8a\xe3\x81\x86":
		return VClassGodanU
	}
	return VClassIchidan
}

// LemmatizeJA reduces a JA verb surface form to its bare stem + morph + class.
// The stem is consistent across all inflected forms of a verb (食べる/食べた/食べている → 食).
// The class identifies the conjugation pattern needed to reconstruct forms.
//
// Verb class detection:
//   ichidan: stem ends in え/い-row hiragana (食べ, 起き). Dict-form: stem + る.
//   godan-X: stem ends in kanji or contracted/i-stem kana. Class indicates u-row ending.
//
// Non-verbs (isVerb=false) pass through unchanged.
func LemmatizeJA(word string, isVerb bool) LemmaResult {
	if word == UntranslatedMarker {
		return LemmaResult{Lemma: word, Morph: 0, Class: 0}
	}
	if !isVerb {
		if stem, morph, ok := stripJAIAdj([]byte(word)); ok {
			return LemmaResult{Lemma: string(stem), Morph: morph, Class: VClassIAdj}
		}
		return LemmaResult{Lemma: word, Morph: 0, Class: 0}
	}
	b := []byte(word)
	if len(b) < 3 {
		return LemmaResult{Lemma: word, Morph: 0, Class: 0}
	}

	stem, morph, stripped := stripJAConjugation(b)
	class := detectJAClass(b, stem, stripped, morph)
	// Volitional: class can be inferred from the specific suffix matched
	// (こう/もう/etc. for godan, よう for ichidan).
	if morph&MetaMoodVol != 0 && stripped {
		suffixLen := len(b) - len(stem)
		if suffixLen >= 6 {
			class = volClassFromSuffix(b[len(b)-suffixLen:])
		}
	}
	// Single-kana stem after stripping (e.g. bare っ from った/って) is a
	// tokenizer artifact, not a standalone verb. Pass through as-is.
	// Multi-kana stems (6+ bytes, 2+ characters) are legitimate ichidan
	// verb stems (くれ, つけ, かけ, etc.) and must not be rejected.
	if stripped && len(stem) <= 3 && isBareSuffix(stem) {
		return LemmaResult{Lemma: word, Morph: 0, Class: 0}
	}
	stem = canonicalizeJAStem(stem, stripped, class)

	if len(stem) == 0 {
		return LemmaResult{Lemma: word, Morph: morph, Class: class}
	}
	return LemmaResult{Lemma: string(stem), Morph: morph, Class: class}
}

// detectJAClass attempts to determine the verb class from the original surface
// form and what suffix was stripped.
func detectJAClass(orig, stem []byte, stripped bool, morph uint16) uint8 {
	if !stripped {
		// Dict form: look at last kana. 食べる → ichidan if pre-る is え/い-row.
		return classFromDictForm(orig)
	}
	// Suffix was stripped. Use residual to infer class.
	return classFromInflectedStem(stem, morph)
}

// classFromDictForm reads a dict-form verb (assumes no suffix stripping done).
// Returns the class based on the last 1-2 kana.
func classFromDictForm(b []byte) uint8 {
	if len(b) < 3 {
		return VClassNone
	}
	last := b[len(b)-3:]
	ls := string(last)
	switch ls {
	case "\xe3\x82\x8b": // る
		// Could be ichidan or godan-ru. Check preceding kana.
		if len(b) >= 6 {
			prev := string(b[len(b)-6 : len(b)-3])
			if isERowOrIRowKana(prev) {
				return VClassIchidan
			}
		}
		return VClassGodanRu
	case "\xe3\x81\x8f": // く
		return VClassGodanKu
	case "\xe3\x81\x90": // ぐ
		return VClassGodanGu
	case "\xe3\x81\x99": // す
		return VClassGodanSu
	case "\xe3\x81\xa4": // つ
		return VClassGodanTsu
	case "\xe3\x81\xac": // ぬ
		return VClassGodanNu
	case "\xe3\x81\xb6": // ぶ
		return VClassGodanBu
	case "\xe3\x82\x80": // む
		return VClassGodanMu
	case "\xe3\x81\x86": // う
		return VClassGodanU
	}
	return VClassNone
}

// classFromInflectedStem guesses class from the stem after suffix removal.
func classFromInflectedStem(stem []byte, morph uint16) uint8 {
	if len(stem) < 3 {
		return VClassNone
	}
	last := stem[len(stem)-3:]
	ls := string(last)

	// Negative or passive/causative: stem ends in あ-row kana for godan, or
	// the stem is a bare ichidan stem (え/い-row).
	// Negative: 走らない (godan-ru: 走ら). Passive: 噛まれた (godan-mu: 噛ま).
	// Causative: 待たせた (godan-tsu: 待た).
	if morph&(MetaPolarNeg|MetaPassive|MetaCausative) != 0 {
		if c := classFromARowKana(ls); c != VClassNone {
			return c
		}
		if isERowKana(ls) || isIRowKana(ls) {
			return VClassIchidan
		}
		return VClassNone
	}

	// Polite (ます-family stripped) leaves i-row kana for godan or bare stem for ichidan.
	if morph&MetaFormalityPol != 0 {
		if c := classFromIStem(ls); c != VClassIchidan {
			return c
		}
		if isERowKana(ls) || isIRowKana(ls) {
			return VClassIchidan
		}
		return VClassNone
	}

	// Te/ta/de/da stripped: contracted forms for godan, bare stem for ichidan.
	// っ contraction is ambiguous: godan-u (買う→買っ), godan-tsu (持つ→持っ),
	// godan-ru (知る→知っ). Disambiguate by looking up the kanji root in a
	// small table; default to godan-ru (most common っ-contracting class).
	if ls == "\xe3\x81\xa3" { // っ
		return classTsuContract(stem)
	}
	if ls == "\xe3\x82\x93" { // ん - godan nu/bu/mu voiced contraction
		return VClassGodanMu
	}
	if ls == "\xe3\x81\x84" { // い - godan ku/gu past
		return VClassGodanKu
	}
	if ls == "\xe3\x81\x97" { // し - godan su past
		return VClassGodanSu
	}
	// Bare stem ending in え/い-row → ichidan
	if isERowKana(ls) || isIRowKana(ls) {
		return VClassIchidan
	}
	return VClassNone
}

// classFromARowKana maps the あ-row kana found before ない to the godan class.
func classFromARowKana(ls string) uint8 {
	switch ls {
	case "\xe3\x81\x8b": // か
		return VClassGodanKu
	case "\xe3\x81\x8c": // が
		return VClassGodanGu
	case "\xe3\x81\x95": // さ
		return VClassGodanSu
	case "\xe3\x81\x9f": // た
		return VClassGodanTsu
	case "\xe3\x81\xaa": // な
		return VClassGodanNu
	case "\xe3\x81\xb0": // ば
		return VClassGodanBu
	case "\xe3\x81\xbe": // ま
		return VClassGodanMu
	case "\xe3\x82\x89": // ら
		return VClassGodanRu
	case "\xe3\x82\x8f": // わ
		return VClassGodanU
	}
	return VClassNone
}

// classFromIStem maps an i-row kana to the corresponding godan class.
func classFromIStem(ls string) uint8 {
	switch ls {
	case "\xe3\x81\x8d": // き
		return VClassGodanKu
	case "\xe3\x81\x8e": // ぎ
		return VClassGodanGu
	case "\xe3\x81\x97": // し
		return VClassGodanSu
	case "\xe3\x81\xa1": // ち
		return VClassGodanTsu
	case "\xe3\x81\xab": // に
		return VClassGodanNu
	case "\xe3\x81\xb3": // び
		return VClassGodanBu
	case "\xe3\x81\xbf": // み
		return VClassGodanMu
	case "\xe3\x82\x8a": // り
		return VClassGodanRu
	case "\xe3\x81\x84": // い (from godan-う i-stem like 買い)
		return VClassGodanU
	}
	return VClassIchidan
}

// classTsuContract disambiguates っ te-form contraction among godan-u, godan-tsu,
// and godan-ru by kanji root lookup. Default: godan-ru (most frequent).
func classTsuContract(stem []byte) uint8 {
	// Strip trailing っ (3 bytes: E3 81 A3) to get kanji root.
	root := stem
	if len(root) >= 3 {
		tail := root[len(root)-3:]
		if tail[0] == 0xe3 && tail[1] == 0x81 && tail[2] == 0xa3 {
			root = root[:len(root)-3]
		}
	}
	if len(root) == 0 {
		return VClassGodanRu
	}
	// Extract last rune (the kanji) for table lookup.
	// CJK kanji are 3-byte UTF-8 sequences (E0-EF range).
	var kanji string
	if len(root) >= 3 && root[len(root)-3] >= 0xe0 {
		kanji = string(root[len(root)-3:])
	} else if len(root) >= 2 && root[len(root)-2] >= 0xc0 {
		kanji = string(root[len(root)-2:])
	} else {
		kanji = string(root[len(root)-1:])
	}
	switch kanji {
	// Godan-tsu (dict form ends in つ)
	case "\xe6\x8c\x81": // 持 (motsu)
		return VClassGodanTsu
	case "\xe7\xab\x8b": // 立 (tatsu)
		return VClassGodanTsu
	case "\xe5\xbe\x85": // 待 (matsu)
		return VClassGodanTsu
	case "\xe6\x89\x93": // 打 (utsu)
		return VClassGodanTsu
	case "\xe5\x8b\x9d": // 勝 (katsu)
		return VClassGodanTsu
	case "\xe8\x82\xb2": // 育 (sodatsu)
		return VClassGodanTsu
	case "\xe7\xb5\x8c": // 経 (tatsu - time passes)
		return VClassGodanTsu
	case "\xe4\xbf\x9d": // 保 (motsu - to last)
		return VClassGodanTsu
	case "\xe7\x99\xba": // 発 (tatsu - depart)
		return VClassGodanTsu
	case "\xe5\xbb\xba": // 建 (tatsu - build)
		return VClassGodanTsu
	// Godan-u (dict form ends in う)
	case "\xe8\xb2\xb7": // 買 (kau)
		return VClassGodanU
	case "\xe4\xbc\x9a": // 会 (au)
		return VClassGodanU
	case "\xe6\xad\x8c": // 歌 (utau)
		return VClassGodanU
	case "\xe8\xa8\x80": // 言 (iu)
		return VClassGodanU
	case "\xe4\xbd\xbf": // 使 (tsukau)
		return VClassGodanU
	case "\xe6\x89\x95": // 払 (harau)
		return VClassGodanU
	case "\xe6\xb4\x97": // 洗 (arau)
		return VClassGodanU
	case "\xe7\xac\x91": // 笑 (warau)
		return VClassGodanU
	case "\xe9\x81\x95": // 違 (chigau)
		return VClassGodanU
	case "\xe6\x80\x9d": // 思 (omou)
		return VClassGodanU
	case "\xe8\xbf\xbd": // 追 (ou)
		return VClassGodanU
	case "\xe8\xaa\x98": // 誘 (sasou)
		return VClassGodanU
	case "\xe6\x8b\xbe": // 拾 (hirou)
		return VClassGodanU
	case "\xe9\xa3\xbc": // 飼 (kau - keep/raise)
		return VClassGodanU
	case "\xe5\x90\xb8": // 吸 (suu)
		return VClassGodanU
	case "\xe5\x90\x88": // 合 (au - match)
		return VClassGodanU
	case "\xe6\x95\x91": // 救 (sukuu)
		return VClassGodanU
	case "\xe6\x8b\x89": // 拉 (hirau - not standard, but appears)
		return VClassGodanU
	}
	// Default: godan-ru (知る, 取る, 送る, 作る, 乗る, 売る, etc.)
	return VClassGodanRu
}

// canonicalizeJAStem normalizes the stem to its canonical form per class.
// For ichidan: keep the え/い-row kana (食べ stays 食べ).
// For godan: canonical form is the dict form (kanji root + u-row ending).
//   Dict form 行く: keep as-is.
//   Inflected 行った: strip to kanji root, re-append く.
// This preserves lexical identity: 行く (iku, go) vs 行う (okonau, do)
// produce distinct lemmas instead of both collapsing to 行.
func canonicalizeJAStem(stem []byte, stripped bool, class uint8) []byte {
	if class == VClassIchidan {
		if !stripped {
			return stripTrailingHiragana(stem)
		}
		return stem // 食べ is already canonical for ichidan
	}
	// Godan: strip to kanji root, then append the dict-form ending.
	root := stem
	if !stripped {
		root = stripTrailingHiragana(stem)
	} else {
		root = stripAllTrailingHiragana(stem)
	}
	suffix := godanDictSuffix(class)
	if suffix == "" {
		return root
	}
	out := []byte{:0:len(root) + 3}
	out = append(out, root...)
	out = append(out, []byte(suffix)...)
	return out
}

func godanDictSuffix(class uint8) string {
	switch class {
	case VClassGodanKu:
		return "\xe3\x81\x8f" // く
	case VClassGodanGu:
		return "\xe3\x81\x90" // ぐ
	case VClassGodanSu:
		return "\xe3\x81\x99" // す
	case VClassGodanTsu:
		return "\xe3\x81\xa4" // つ
	case VClassGodanNu:
		return "\xe3\x81\xac" // ぬ
	case VClassGodanBu:
		return "\xe3\x81\xb6" // ぶ
	case VClassGodanMu:
		return "\xe3\x82\x80" // む
	case VClassGodanRu:
		return "\xe3\x82\x8b" // る
	case VClassGodanU:
		return "\xe3\x81\x86" // う
	}
	return ""
}

// stripTrailingHiragana removes exactly one trailing hiragana char.
func stripTrailingHiragana(b []byte) []byte {
	if len(b) < 3 {
		return b
	}
	tail := b[len(b)-3:]
	if tail[0] == 0xe3 && (tail[1] == 0x81 || tail[1] == 0x82) {
		return b[:len(b)-3]
	}
	return b
}

// stripAllTrailingHiragana removes all trailing hiragana, leaving at least 3 bytes.
func stripAllTrailingHiragana(b []byte) []byte {
	for len(b) > 3 {
		tail := b[len(b)-3:]
		if tail[0] == 0xe3 && (tail[1] == 0x81 || tail[1] == 0x82) {
			b = b[:len(b)-3]
		} else {
			break
		}
	}
	return b
}

// isBareSuffix returns true when the stem after conjugation stripping is all
// hiragana/katakana with no kanji root. Such stems come from tokenizer splits
// (思った -> 思 + った) and shouldn't be canonicalized as standalone verbs.
func isBareSuffix(stem []byte) bool {
	for i := 0; i < len(stem); {
		if i+3 <= len(stem) && stem[i] == 0xe3 {
			b1 := stem[i+1]
			if b1 >= 0x81 && b1 <= 0x83 { // hiragana U+3040-U+309F, katakana U+30A0-U+30FF
				i += 3
				continue
			}
		}
		return false // non-kana byte found = has kanji root
	}
	return len(stem) > 0
}

func isERowKana(s string) bool {
	// え-row hiragana: え (0x81 0x88), け (0x81 0x91), せ (0x81 0x9b), て (0x81 0xa6),
	// ね (0x81 0xad), へ (0x81 0xb8), め (0x82 0x81), れ (0x82 0x8c), ゑ (0x82 0x91)
	// plus voiced: げ (0x81 0x92), ぜ (0x81 0x9c), で (0x81 0xa7), べ (0x81 0xb9), ぺ (0x81 0xba)
	if len(s) != 3 || s[0] != 0xe3 {
		return false
	}
	switch s {
	case "\xe3\x81\x88", "\xe3\x81\x91", "\xe3\x81\x9b", "\xe3\x81\xa6",
		"\xe3\x81\xad", "\xe3\x81\xb8", "\xe3\x82\x81", "\xe3\x82\x8c",
		"\xe3\x81\x92", "\xe3\x81\x9c", "\xe3\x81\xa7", "\xe3\x81\xb9", "\xe3\x81\xba":
		return true
	}
	return false
}

func isIRowKana(s string) bool {
	if len(s) != 3 || s[0] != 0xe3 {
		return false
	}
	switch s {
	case "\xe3\x81\x84", "\xe3\x81\x8d", "\xe3\x81\x97", "\xe3\x81\xa1",
		"\xe3\x81\xab", "\xe3\x81\xb2", "\xe3\x81\xbf", "\xe3\x82\x8a",
		"\xe3\x81\x8e", "\xe3\x81\x98", "\xe3\x81\xa2", "\xe3\x81\xb3", "\xe3\x81\xb4":
		return true
	}
	return false
}

func isERowOrIRowKana(s string) bool {
	return isERowKana(s) || isIRowKana(s)
}

// stripJACopula identifies sentence-final copula forms on a noun-final token
// (彼は学生だ, 学生でした). Returns (atom-with-copula-stripped, true, morph-bits)
// or (input, false, 0) if no copula form found.
//
// Copula forms (longest first to avoid premature short matches):
//   でした → past + polite
//   だった → past + plain
//   です   → non-past + polite
//   だ     → non-past + plain
//
// Caller must verify this is NOT a verb context (verb past-tense た/だ is a
// separate suffix on conjugated verbs; this only fires on noun-final tokens).
func stripJACopula(word string) (string, bool, uint16) {
	b := []byte(word)
	// でした = E3 81 A7 E3 81 97 E3 81 9F (9 bytes)
	deshita := []byte("\xe3\x81\xa7\xe3\x81\x97\xe3\x81\x9f")
	if len(b) > len(deshita) && string(b[len(b)-len(deshita):]) == string(deshita) {
		return string(b[:len(b)-len(deshita)]), true, MetaTensePast | MetaFormalityPol
	}
	// だった = E3 81 A0 E3 81 A3 E3 81 9F (9 bytes)
	datta := []byte("\xe3\x81\xa0\xe3\x81\xa3\xe3\x81\x9f")
	if len(b) > len(datta) && string(b[len(b)-len(datta):]) == string(datta) {
		return string(b[:len(b)-len(datta)]), true, MetaTensePast
	}
	// です = E3 81 A7 E3 81 99 (6 bytes)
	desu := []byte("\xe3\x81\xa7\xe3\x81\x99")
	if len(b) > len(desu) && string(b[len(b)-len(desu):]) == string(desu) {
		return string(b[:len(b)-len(desu)]), true, MetaFormalityPol
	}
	// だ = E3 81 A0 (3 bytes). Bare だ is ambiguous with verb past contractions
	// (噛んだ, 飛んだ, 死んだ for godan-mu/bu/nu). Only treat as copula if the
	// preceding character is not a verb-contraction signal kana (ん or っ).
	da := []byte("\xe3\x81\xa0")
	if len(b) > len(da) && string(b[len(b)-len(da):]) == string(da) {
		stem := b[:len(b)-len(da)]
		if len(stem) >= 3 {
			tail := stem[len(stem)-3:]
			// ん is E3 82 93, っ is E3 81 A3 - verb past contractions.
			if string(tail) == "\xe3\x82\x93" || string(tail) == "\xe3\x81\xa3" {
				return word, false, 0
			}
		}
		return string(stem), true, 0
	}
	return word, false, 0
}

// stripJAIAdj strips i-adjective inflection suffixes and returns the kanji
// stem. Returns (stem, morph, true) on success. Requires the stem to contain
// at least one kanji character to avoid false positives on pure-kana words.
//
// Forms (longest first):
//   くなかった -> stem, neg+past    (赤くなかった -> 赤)
//   かった    -> stem, past         (赤かった -> 赤)
//   くない    -> stem, neg          (赤くない -> 赤)
//   ければ    -> stem, conditional  (赤ければ -> 赤)
//   くて      -> stem, te-form      (赤くて -> 赤)
//   く        -> stem, adverbial    (赤く -> 赤)
//   さ        -> stem, nominal      (赤さ -> 赤)
//   い        -> stem, dict         (赤い -> 赤)
func stripJAIAdj(b []byte) ([]byte, uint16, bool) {
	var stem []byte
	var morph uint16

	// Only strip compound suffixes that are unambiguously i-adjective.
	// Bare い/く/さ create false positives on godan-ku verbs (行く,書く)
	// and other non-adjective words.

	// くなかった = く(E3 81 8F) + な(E3 81 AA) + か(E3 81 8B) + っ(E3 81 A3) + た(E3 81 9F) = 15 bytes
	if len(b) > 15 && hasSuffix(b, "\xe3\x81\x8f\xe3\x81\xaa\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f") {
		stem = b[:len(b)-15]
		morph = MetaPolarNeg | MetaTensePast
	} else if len(b) > 12 && hasSuffix(b, "\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f") {
		// かった = か(E3 81 8B) + っ(E3 81 A3) + た(E3 81 9F) = 9 bytes
		stem = b[:len(b)-9]
		morph = MetaTensePast
	} else if len(b) > 9 && hasSuffix(b, "\xe3\x81\x8f\xe3\x81\xaa\xe3\x81\x84") {
		// くない = く(E3 81 8F) + な(E3 81 AA) + い(E3 81 84) = 9 bytes
		stem = b[:len(b)-9]
		morph = MetaPolarNeg
	} else if len(b) > 9 && hasSuffix(b, "\xe3\x81\x91\xe3\x82\x8c\xe3\x81\xb0") {
		// ければ = け(E3 81 91) + れ(E3 82 8C) + ば(E3 81 B0) = 9 bytes
		stem = b[:len(b)-9]
		morph = 0
	} else if len(b) > 6 && hasSuffix(b, "\xe3\x81\x8f\xe3\x81\xa6") {
		// くて = く(E3 81 8F) + て(E3 81 A6) = 6 bytes
		stem = b[:len(b)-6]
		morph = MetaAspectProg
	} else {
		return nil, 0, false
	}

	if len(stem) < 3 {
		return nil, 0, false
	}
	if !hasKanji(stem) {
		return nil, 0, false
	}
	return stem, morph, true
}

func hasKanji(b []byte) bool {
	for i := 0; i+2 < len(b); {
		if b[i] >= 0xe4 && b[i] <= 0xe9 {
			return true
		}
		if b[i] < 0x80 {
			i++
		} else if b[i] < 0xe0 {
			i += 2
		} else if b[i] < 0xf0 {
			i += 3
		} else {
			i += 4
		}
	}
	return false
}

// endsInIKana returns true if the last UTF-8 character of s is the hiragana い.
// Used as a heuristic for i-adjective detection (赤い, 楽しい).
// Known false positives: な-adjectives ending in い (きれい), nouns ending in
// い (兄=ani; 桜井=name). The positional constraint (modifier immediately
// before a noun, no intervening particle) at the call site filters most.
func endsInIKana(s string) bool {
	b := []byte(s)
	if len(b) < 3 {
		return false
	}
	// い is UTF-8 E3 81 84
	tail := b[len(b)-3:]
	return tail[0] == 0xe3 && tail[1] == 0x81 && tail[2] == 0x84
}

// isJADitransitive returns true if the lemma is a JA ditransitive verb
// taking a に-marked recipient and a を-marked patient: 彼に本をあげる
// (give him a book). When the clause's verb matches, に-marked slots flip
// from ORGoal/ORLoc default to ORRecip. Closed set of JA verb lemmas
// (post-lemmatization, so just the stem).
func isJADitransitive(lemma string) bool {
	switch lemma {
	// あげ (give), やる→や, くれる→くれ, 渡 (hand), 送 (send), 教え (teach/tell),
	// 見せ (show), 売 (sell), 買 (buy), 貸 (lend), 払 (pay), 与え (give/grant).
	case "\xe3\x81\x82\xe3\x81\x92",
		"\xe3\x82\x84",
		"\xe3\x81\x8f\xe3\x82\x8c",
		"\xe6\xb8\xa1",
		"\xe9\x80\x81",
		"\xe6\x95\x99\xe3\x81\x88",
		"\xe8\xa6\x8b\xe3\x81\x9b",
		"\xe5\xa3\xb2",
		"\xe8\xb2\xb7",
		"\xe8\xb2\xb8",
		"\xe6\x89\x95",
		"\xe4\xb8\x8e\xe3\x81\x88":
		return true
	}
	return false
}

// isJATemporalNoun returns true if s is a temporal noun that functions as
// a sentence-initial adverbial (today, yesterday, tomorrow, etc.). These
// surface as bare nouns without a particle but semantically modify the
// clause's verb. Closed set; treat as MKAdv adverbial during extraction.
func isJATemporalNoun(s string) bool {
	switch s {
	// 昨日 (yesterday), 今日 (today), 明日 (tomorrow), 今 (now),
	// 昨夜 (last night), 今朝 (this morning), 今晩 (tonight), 今夜 (tonight),
	// 来週 (next week), 先週 (last week), 来月 (next month), 先月 (last month),
	// 来年 (next year), 去年 (last year), 毎日 (every day), 毎週 (every week),
	// 毎月 (every month), 毎年 (every year), 朝 (morning), 昼 (noon),
	// 夜 (night), 夕方 (evening), いつ (when/always).
	case "\xe6\x98\xa8\xe6\x97\xa5",
		"\xe4\xbb\x8a\xe6\x97\xa5",
		"\xe6\x98\x8e\xe6\x97\xa5",
		"\xe4\xbb\x8a",
		"\xe6\x98\xa8\xe5\xa4\x9c",
		"\xe4\xbb\x8a\xe6\x9c\x9d",
		"\xe4\xbb\x8a\xe6\x99\xa9",
		"\xe4\xbb\x8a\xe5\xa4\x9c",
		"\xe6\x9d\xa5\xe9\x80\xb1",
		"\xe5\x85\x88\xe9\x80\xb1",
		"\xe6\x9d\xa5\xe6\x9c\x88",
		"\xe5\x85\x88\xe6\x9c\x88",
		"\xe6\x9d\xa5\xe5\xb9\xb4",
		"\xe5\x8e\xbb\xe5\xb9\xb4",
		"\xe6\xaf\x8e\xe6\x97\xa5",
		"\xe6\xaf\x8e\xe9\x80\xb1",
		"\xe6\xaf\x8e\xe6\x9c\x88",
		"\xe6\xaf\x8e\xe5\xb9\xb4",
		"\xe6\x9c\x9d",
		"\xe6\x98\xbc",
		"\xe5\xa4\x9c",
		"\xe5\xa4\x95\xe6\x96\xb9",
		"\xe3\x81\x84\xe3\x81\xa4":
		return true
	}
	return false
}

// isJARelationalNoun returns true if s is a relational noun used in the
// の-relational-noun-に locative compound pattern (箱の中に = "in the box").
// Closed set; expansions belong here.
func isJARelationalNoun(s string) bool {
	switch s {
	// 中 (inside), 内 (within), 外 (outside), 上 (on/above), 下 (under),
	// 前 (in front), 後 (behind), 横 (beside), 隣 (next to), 間 (between).
	case "\xe4\xb8\xad",
		"\xe5\x86\x85",
		"\xe5\xa4\x96",
		"\xe4\xb8\x8a",
		"\xe4\xb8\x8b",
		"\xe5\x89\x8d",
		"\xe5\xbe\x8c",
		"\xe6\xa8\xaa",
		"\xe9\x9a\xa3",
		"\xe9\x96\x93":
		return true
	}
	return false
}

// jaRelationalNounToOblRole maps a relational noun to its semantic role.
// Most map to ORLoc with implicit "inside" semantics for the EN renderer;
// position-specific (on, under, in-front) would need a finer-grained
// representation. For now all locative-compounds collapse to ORLoc.
func jaRelationalNounToOblRole(s string) uint8 {
	if isJARelationalNoun(s) {
		return ORLoc
	}
	return ORNone
}

// endsInNaiSuffix returns true if s ends in ない (negative verb suffix).
// Used to exclude negative-verb forms from predicative-i-adjective detection,
// since ない ends in い (false-matching endsInIKana). な = E3 81 AA, い = E3 81 84.
func endsInNaiSuffix(s string) bool {
	b := []byte(s)
	if len(b) < 6 {
		return false
	}
	tail := b[len(b)-6:]
	return tail[0] == 0xe3 && tail[1] == 0x81 && tail[2] == 0xaa &&
		tail[3] == 0xe3 && tail[4] == 0x81 && tail[5] == 0x84
}

// endsInTaiSuffix returns true if s ends in たい (desiderative verb suffix).
// Used to exclude desiderative verb forms (食べたい / want to eat) from
// predicative-i-adjective detection. た = E3 81 9F, い = E3 81 84.
func endsInTaiSuffix(s string) bool {
	b := []byte(s)
	if len(b) < 6 {
		return false
	}
	tail := b[len(b)-6:]
	return tail[0] == 0xe3 && tail[1] == 0x81 && tail[2] == 0x9f &&
		tail[3] == 0xe3 && tail[4] == 0x81 && tail[5] == 0x84
}

// isJABareKanjiAdj returns true if s is a single-kanji color/size adjective
// (赤, 白, 黒, 大, 小, 新, 古, 長, 高, 低). These appear as bare-kanji ATTR
// modifiers in rendered JA when the い-suffix was not included.
func isJABareKanjiAdj(s string) bool {
	if len(s) != 3 {
		return false
	}
	b := []byte(s)
	c1, c2, c3 := b[0], b[1], b[2]
	switch {
	case c1 == 0xe8 && c2 == 0xb5 && c3 == 0xa4: // 赤
	case c1 == 0xe7 && c2 == 0x99 && c3 == 0xbd: // 白
	case c1 == 0xe9 && c2 == 0xbb && c3 == 0x92: // 黒
	case c1 == 0xe5 && c2 == 0xa4 && c3 == 0xa7: // 大
	case c1 == 0xe5 && c2 == 0xb0 && c3 == 0x8f: // 小
	case c1 == 0xe6 && c2 == 0x96 && c3 == 0xb0: // 新
	case c1 == 0xe5 && c2 == 0x8f && c3 == 0xa4: // 古
	case c1 == 0xe9 && c2 == 0x95 && c3 == 0xb7: // 長
	case c1 == 0xe9 && c2 == 0xab && c3 == 0x98: // 高
	case c1 == 0xe4 && c2 == 0xbd && c3 == 0x8e: // 低
	default:
		return false
	}
	return true
}

// endsInTaKana returns true if the last UTF-8 character of s is the hiragana た.
// Used to detect past-tense verb forms preceding a head noun (REL clause):
// 食べた猫 (the cat that ate), 見た本 (the book I saw). The morphological
// splitter exposes the boundary; this checks that the prior slot is a
// past-tense verb. False positives: nouns ending in た are uncommon.
func endsInTaKana(s string) bool {
	b := []byte(s)
	if len(b) < 3 {
		return false
	}
	// た is UTF-8 E3 81 9F
	tail := b[len(b)-3:]
	return tail[0] == 0xe3 && tail[1] == 0x81 && tail[2] == 0x9f
}

// endsInKuKana returns true if the last UTF-8 character of s is the hiragana く.
// Used as a heuristic for く-form adverbial detection: 速く (hayaku=quickly),
// 高く (takaku=highly), 楽しく (tanoshiku=enjoyably). The く-form is the
// adverbial inflection of an i-adjective and binds to the following verb.
func endsInKuKana(s string) bool {
	b := []byte(s)
	if len(b) < 3 {
		return false
	}
	// く is UTF-8 E3 81 8F
	tail := b[len(b)-3:]
	return tail[0] == 0xe3 && tail[1] == 0x81 && tail[2] == 0x8f
}

// stripJAConjugation matches the longest known conjugation suffix.
// Returns (remaining bytes, morph bits, stripped flag).
//
// Order matters: longest forms first. Passive (れる/られる) and causative
// (せる/させる) must be checked before simpler suffix endings to avoid
// premature shorter matches.
func stripJAConjugation(b []byte) ([]byte, uint16, bool) {
	// Passive/causative past combinations.
	// Guard: if the stem after stripping is bare kana (isBareSuffix), the
	// match is likely an ichidan verb whose stem ends in れ/せ colliding
	// with the passive/causative suffix (くれた = くれ+た, not く+れた).
	// Reject and fall through to simpler suffixes.
	if rest, ok := stripJASuffix(b, jaSeraretaPast); ok && !isBareSuffix(rest) {
		return rest, MetaCausative | MetaPassive | MetaTensePast, true
	}
	if rest, ok := stripJASuffix(b, jaRaretaPast); ok && !isBareSuffix(rest) {
		return rest, MetaPassive | MetaTensePast, true
	}
	if rest, ok := stripJASuffix(b, jaSasetaPast); ok && !isBareSuffix(rest) {
		return rest, MetaCausative | MetaTensePast, true
	}
	// Passive/causative non-past
	if rest, ok := stripJASuffix(b, jaSerareru); ok && !isBareSuffix(rest) {
		return rest, MetaCausative | MetaPassive, true
	}
	if rest, ok := stripJASuffix(b, jaRareru); ok && !isBareSuffix(rest) {
		return rest, MetaPassive, true
	}
	if rest, ok := stripJASuffix(b, jaSaseru); ok && !isBareSuffix(rest) {
		return rest, MetaCausative, true
	}
	if rest, ok := stripJASuffix(b, jaReru); ok && !isBareSuffix(rest) {
		return rest, MetaPassive, true
	}
	if rest, ok := stripJASuffix(b, jaSeru); ok && !isBareSuffix(rest) {
		return rest, MetaCausative, true
	}
	if rest, ok := stripJASuffix(b, jaRetaPast); ok && !isBareSuffix(rest) {
		return rest, MetaPassive | MetaTensePast, true
	}
	if rest, ok := stripJASuffix(b, jaSetaPast); ok && !isBareSuffix(rest) {
		return rest, MetaCausative | MetaTensePast, true
	}
	if rest, ok := stripJASuffix(b, jaNakatta); ok {
		return rest, MetaPolarNeg | MetaTensePast, true
	}
	// Desiderative (たい family) - before ない/た to avoid partial match.
	if rest, ok := stripJASuffix(b, jaTakunai); ok {
		return rest, MetaMoodVol | MetaPolarNeg, true
	}
	if rest, ok := stripJASuffix(b, jaTakatta); ok {
		return rest, MetaMoodVol | MetaTensePast, true
	}
	if rest, ok := stripJASuffix(b, jaTai); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaMashita); ok {
		return rest, MetaTensePast | MetaFormalityPol, true
	}
	if rest, ok := stripJASuffix(b, jaMasen); ok {
		return rest, MetaPolarNeg | MetaFormalityPol, true
	}
	if rest, ok := stripJASuffix(b, jaTeiru); ok {
		return rest, MetaAspectProg, true
	}
	if rest, ok := stripJASuffix(b, jaTeita); ok {
		return rest, MetaAspectProg | MetaTensePast, true
	}
	if rest, ok := stripJASuffix(b, jaDeiru); ok {
		return rest, MetaAspectProg, true
	}
	if rest, ok := stripJASuffix(b, jaDeita); ok {
		return rest, MetaAspectProg | MetaTensePast, true
	}
	if rest, ok := stripJASuffix(b, jaMasu); ok {
		return rest, MetaFormalityPol, true
	}
	if rest, ok := stripJASuffix(b, jaNai); ok {
		return rest, MetaPolarNeg, true
	}
	// Godan volitional compounds (longest first; each is お-row kana + う).
	// These must match before bare う suffixes or particles.
	if rest, ok := stripJASuffix(b, jaKoU); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaGoU); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaSoU); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaToU); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaNoU); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaBoU); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaMoU); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaRoU); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaOU); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaYou); ok {
		return rest, MetaMoodVol, true
	}
	if rest, ok := stripJASuffix(b, jaTe); ok {
		return rest, MetaAspectProg, true
	}
	if rest, ok := stripJASuffix(b, jaDe); ok {
		return rest, MetaAspectProg, true
	}
	if rest, ok := stripJASuffix(b, jaTa); ok {
		return rest, MetaTensePast, true
	}
	if rest, ok := stripJASuffix(b, jaDa); ok {
		return rest, MetaTensePast, true
	}
	if rest, ok := stripJASuffix(b, jaBa); ok {
		// ば: provisional conditional. Strip from lemma; the ClauseIf
		// relation lives at the discourse layer, not on the verb morph.
		return rest, 0, true
	}
	return b, 0, false
}

const (
	jaTeiru   = "\xe3\x81\xa6\xe3\x81\x84\xe3\x82\x8b"             // ている
	jaTeita   = "\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x9f"             // ていた
	jaDeiru   = "\xe3\x81\xa7\xe3\x81\x84\xe3\x82\x8b"             // でいる
	jaDeita   = "\xe3\x81\xa7\xe3\x81\x84\xe3\x81\x9f"             // でいた
	jaTai     = "\xe3\x81\x9f\xe3\x81\x84"                         // たい
	jaTakatta = "\xe3\x81\x9f\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f" // たかった
	jaTakunai = "\xe3\x81\x9f\xe3\x81\x8f\xe3\x81\xaa\xe3\x81\x84" // たくない
	jaNai     = "\xe3\x81\xaa\xe3\x81\x84"                         // ない
	jaNakatta = "\xe3\x81\xaa\xe3\x81\x8b\xe3\x81\xa3\xe3\x81\x9f" // なかった
	jaMasu    = "\xe3\x81\xbe\xe3\x81\x99"                         // ます
	jaMashita = "\xe3\x81\xbe\xe3\x81\x97\xe3\x81\x9f"             // ました
	jaMasen   = "\xe3\x81\xbe\xe3\x81\x9b\xe3\x82\x93"             // ません
	jaYou     = "\xe3\x82\x88\xe3\x81\x86"                         // よう
	jaTe      = "\xe3\x81\xa6"                                     // て
	jaDe      = "\xe3\x81\xa7"                                     // で
	jaTa      = "\xe3\x81\x9f"                                     // た
	jaDa      = "\xe3\x81\xa0"                                     // だ
	jaBa      = "\xe3\x81\xb0"                                     // ば (conditional)

	// Passive: れる/られる
	jaRareru     = "\xe3\x82\x89\xe3\x82\x8c\xe3\x82\x8b"                         // られる
	jaReru       = "\xe3\x82\x8c\xe3\x82\x8b"                                     // れる
	jaRaretaPast = "\xe3\x82\x89\xe3\x82\x8c\xe3\x81\x9f"                         // られた
	jaRetaPast   = "\xe3\x82\x8c\xe3\x81\x9f"                                     // れた

	// Causative: せる/させる
	jaSaseru      = "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x8b"                         // させる
	jaSeru        = "\xe3\x81\x9b\xe3\x82\x8b"                                     // せる
	jaSasetaPast  = "\xe3\x81\x95\xe3\x81\x9b\xe3\x81\x9f"                        // させた
	jaSetaPast    = "\xe3\x81\x9b\xe3\x81\x9f"                                     // せた

	// Causative-passive: させられる
	jaSerareru     = "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x89\xe3\x82\x8c\xe3\x82\x8b" // させられる
	jaSeraretaPast = "\xe3\x81\x95\xe3\x81\x9b\xe3\x82\x89\xe3\x82\x8c\xe3\x81\x9f" // させられた

	// Volitional compounds
	jaKoU = "\xe3\x81\x93\xe3\x81\x86" // こう (godan-ku)
	jaGoU = "\xe3\x81\x94\xe3\x81\x86" // ごう (godan-gu)
	jaSoU = "\xe3\x81\x9d\xe3\x81\x86" // そう (godan-su)
	jaToU = "\xe3\x81\xa8\xe3\x81\x86" // とう (godan-tsu)
	jaNoU = "\xe3\x81\xae\xe3\x81\x86" // のう (godan-nu)
	jaBoU = "\xe3\x81\xbc\xe3\x81\x86" // ぼう (godan-bu)
	jaMoU = "\xe3\x82\x82\xe3\x81\x86" // もう (godan-mu)
	jaRoU = "\xe3\x82\x8d\xe3\x81\x86" // ろう (godan-ru)
	jaOU  = "\xe3\x81\x8a\xe3\x81\x86" // おう (godan-u)
)

func stripJASuffix(word, suffix []byte) ([]byte, bool) {
	if len(word) <= len(suffix) {
		return nil, false
	}
	tail := word[len(word)-len(suffix):]
	if string(tail) == string(suffix) {
		return word[:len(word)-len(suffix)], true
	}
	return nil, false
}