key.mx raw
1 package transdb
2
3 import (
4 "git.mleku.dev/iskra"
5 "git.smesh.lol/iskradb/lattice"
6 )
7
8 // Language domain codes.
9 const (
10 LangEN uint8 = 0x01
11 LangJA uint8 = 0x02
12 LangUnknown uint8 = 0x00
13 )
14
15 // Coord layout constants — re-exported from iskra for package-internal use.
16 const (
17 CoordSemanticShift = iskra.CoordSemanticShift
18 CoordGrammaticalShift = iskra.CoordGrammaticalShift
19 CoordCooccurShift = iskra.CoordCooccurShift
20 CoordMorphShift = iskra.CoordMorphShift
21 CoordPragmaticShift = iskra.CoordPragmaticShift
22 CoordValencyShift = iskra.CoordValencyShift
23 CoordRegisterShift = iskra.CoordRegisterShift
24 )
25
26 // Semantic bitfield constants — re-exported from iskra.
27 const (
28 SemanticHumanSubj = iskra.SemanticHumanSubj
29 SemanticHumanObj = iskra.SemanticHumanObj
30 SemanticAnimSubj = iskra.SemanticAnimSubj
31 SemanticAnimObj = iskra.SemanticAnimObj
32 SemanticAbstSubj = iskra.SemanticAbstSubj
33 SemanticAbstObj = iskra.SemanticAbstObj
34 SemanticPlaceSubj = iskra.SemanticPlaceSubj
35 SemanticPlaceObj = iskra.SemanticPlaceObj
36 SemanticArtiSubj = iskra.SemanticArtiSubj
37 SemanticArtiObj = iskra.SemanticArtiObj
38 SemanticNatSubj = iskra.SemanticNatSubj
39 SemanticNatObj = iskra.SemanticNatObj
40 SemanticEventSubj = iskra.SemanticEventSubj
41 SemanticEventObj = iskra.SemanticEventObj
42 SemanticCollSubj = iskra.SemanticCollSubj
43 SemanticCollObj = iskra.SemanticCollObj
44 )
45
46 // Co-occurrence type constants — re-exported from iskra.
47 const (
48 CooccurNone = iskra.CooccurNone
49 CooccurNominal = iskra.CooccurNominal
50 CooccurVerbal = iskra.CooccurVerbal
51 CooccurFunction = iskra.CooccurFunction
52 )
53
54 // Coord functions delegated to iskra.
55 func PackCoord(semantic, grammatical, cooccur, morph, pragmatic, valency, register uint64) uint64 {
56 return iskra.PackCoord(semantic, grammatical, cooccur, morph, pragmatic, valency, register)
57 }
58
59 func RelaxCoord(coord uint64) []uint64 { return iskra.RelaxCoord(coord) }
60
61 func CoordSemantic(coord uint64) uint64 { return iskra.CoordSemantic(coord) }
62
63 func CoordMorph(coord uint64) uint8 { return iskra.CoordMorph(coord) }
64
65 func CoordCooccur(prevType, nextType uint8) uint64 { return iskra.CoordCooccur(prevType, nextType) }
66
67 func CoordPrevType(coord uint64) uint8 { return iskra.CoordPrevType(coord) }
68
69 func CoordNextType(coord uint64) uint8 { return iskra.CoordNextType(coord) }
70
71 // MakeKey returns the 128-bit SipHash key for (lang, coord, word).
72 // Delegates to iskra.MakeKey with lang as the domain byte.
73 func MakeKey(lang uint8, coord uint64, word string) lattice.Key {
74 return iskra.MakeKey(lang, coord, word)
75 }
76
77 // ActiveBranches are the 3 branches used for JA/EN lattice data.
78 var ActiveBranches = [3]lattice.Branch{lattice.Bnoun, lattice.Bverb, lattice.Bmodifier}
79
80 // POSForWord returns the POS type of a word at coord=0 (JMdict baseline).
81 // Returns 1=nominal, 2=verbal, 3=modifier, 0=unknown.
82 func POSForWord(tree *lattice.Tree, lang uint8, word string) uint8 {
83 key := MakeKey(lang, 0, word)
84 for i, b := range ActiveBranches {
85 if tree.LookupRecIdx(b, key) != lattice.NullRec {
86 return uint8(i + 1)
87 }
88 }
89 return 0
90 }
91
92 // POSTypeFor maps POSForWord result to CooccurNominal/Verbal/Function.
93 func POSTypeFor(posResult uint8) uint8 {
94 switch posResult {
95 case 1:
96 return CooccurNominal
97 case 2:
98 return CooccurVerbal
99 case 3:
100 return CooccurFunction
101 }
102 return CooccurNone
103 }
104
105 // branchOrderJA returns branch search order from coord's cooccurrence axis.
106 func branchOrderJA(coord uint64) [3]uint8 {
107 bMod := uint8(lattice.Bmodifier)
108 bNou := uint8(lattice.Bnoun)
109 bVer := uint8(lattice.Bverb)
110 prevType := CoordPrevType(coord)
111 nextType := CoordNextType(coord)
112 switch {
113 case prevType == CooccurNominal:
114 return [3]uint8{bMod, bNou, bVer}
115 case prevType == CooccurVerbal:
116 return [3]uint8{bMod, bVer, bNou}
117 case nextType == CooccurVerbal:
118 return [3]uint8{bNou, bMod, bVer}
119 default:
120 return [3]uint8{bNou, bVer, bMod}
121 }
122 }
123
124 // PackCtx — legacy wrapper kept for callers during migration.
125 func PackCtx(prev, cur, next uint8) uint8 {
126 return (prev << 4) | (cur << 2) | next
127 }
128
129 // CtxToCoord converts old 6-bit ctx to new coord cooccurrence field.
130 func CtxToCoord(ctx uint8) uint64 {
131 prevPOS := (ctx >> 4) & 3
132 nextPOS := ctx & 3
133 return CoordCooccur(prevPOS, nextPOS) << CoordCooccurShift
134 }
135
136 func KeyLang(key lattice.Key) uint8 { return LangUnknown }
137
138 func LangName(lang uint8) string {
139 switch lang {
140 case LangEN:
141 return "EN"
142 case LangJA:
143 return "JA"
144 }
145 return "?"
146 }
147