morph.mx raw
1 package iskra
2
3 import "git.smesh.lol/iskradb/lattice"
4
5 // MorphState is a 5-bit field in Record.DataFile bits 1-5.
6 //
7 // Bit layout (wu xing mapping):
8 // bit 4 (earth, 16): tense 0=present 1=past
9 // bit 3 (wood, 8): aspect 0=simple 1=progressive
10 // bit 2 (metal, 4): polarity 0=affirm 1=negative
11 // bit 1 (water, 2): formality 0=plain 1=polite
12 // bit 0 (fire, 1): evidential 0=direct 1=reported
13 //
14 // State 0 = present simple affirmative plain direct (dictionary/base form).
15 const (
16 MorphPresAffPlain uint8 = 0 // 00000
17 MorphPresAffPolite uint8 = 2 // 00010
18 MorphPresNegPlain uint8 = 4 // 00100
19 MorphPresNegPolite uint8 = 6 // 00110
20 MorphPresProgPlain uint8 = 8 // 01000
21 MorphPresProgPolite uint8 = 10 // 01010
22 MorphPresProgNeg uint8 = 12 // 01100
23 MorphPastAffPlain uint8 = 16 // 10000
24 MorphPastAffPolite uint8 = 18 // 10010
25 MorphPastNegPlain uint8 = 20 // 10100
26 MorphPastNegPolite uint8 = 22 // 10110
27 MorphPastProgPlain uint8 = 24 // 11000
28 MorphPastProgPolite uint8 = 26 // 11010
29 MorphPastProgNeg uint8 = 28 // 11100
30 MorphPresReported uint8 = 1 // 00001
31 MorphPastReported uint8 = 17 // 10001
32
33 // Language activation masks.
34 MaskJA uint8 = 0b11111 // all 5 bits
35 MaskEN uint8 = 0b11100 // tense + aspect + polarity only
36 MaskBG uint8 = 0b11111 // all 5 bits
37 )
38
39 // Register values (Record.Branch bits 3-4).
40 const (
41 RegNeutral uint8 = 0
42 RegFormal uint8 = 1
43 RegInformal uint8 = 2
44 RegVulgar uint8 = 3
45 )
46
47 // Domain values (Record.Branch bits 5-6).
48 const (
49 DomGeneral uint8 = 0
50 DomTechnical uint8 = 1
51 DomMedical uint8 = 2
52 DomLegal uint8 = 3
53 )
54
55 // Special values (Record.Branch bit 7).
56 const (
57 SpecNormal uint8 = 0
58 SpecHonorific uint8 = 1
59 SpecArchaic = SpecNormal
60 SpecHumble = RegFormal
61 )
62
63 // Filter constants for register-based translation filtering.
64 const (
65 FilterNone uint8 = 0
66 FilterFormal uint8 = 1
67 FilterNeutral uint8 = 2
68 FilterCasual uint8 = 3
69 )
70
71 // SetMorphState packs a 5-bit morph state into Record.DataFile bits 1-5.
72 func SetMorphState(rec *lattice.Record, state uint8) {
73 rec.DataFile = (rec.DataFile & 0xFFFFFFC1) | (uint32(state&0x1F) << 1)
74 }
75
76 // GetMorphState extracts the 5-bit morph state from Record.DataFile bits 1-5.
77 func GetMorphState(rec *lattice.Record) uint8 {
78 return uint8((rec.DataFile >> 1) & 0x1F)
79 }
80
81 const semanticDataFileShift = 6
82
83 // SetSemanticInDataFile packs 16-bit semantic flags into Record.DataFile bits 6-21.
84 func SetSemanticInDataFile(rec *lattice.Record, flags uint64) {
85 rec.DataFile = (rec.DataFile &^ (0xFFFF << semanticDataFileShift)) |
86 (uint32(flags&0xFFFF) << semanticDataFileShift)
87 }
88
89 // GetSemanticFromDataFile extracts the 16-bit semantic flags from Record.DataFile bits 6-21.
90 func GetSemanticFromDataFile(rec *lattice.Record) uint64 {
91 return uint64((rec.DataFile >> semanticDataFileShift) & 0xFFFF)
92 }
93
94 // PackBranch encodes POS (3 bits), register, domain, and honorific into a Branch byte.
95 func PackBranch(pos uint8, reg, dom, spec uint8) uint8 {
96 return (pos & 0x07) | ((reg & 0x03) << 3) | ((dom & 0x03) << 5) | ((spec & 0x01) << 7)
97 }
98
99 // POSFromBranch extracts the 3-bit POS (branch index 0-7) from a packed Branch byte.
100 func POSFromBranch(b uint8) uint8 { return b & 0x07 }
101
102 // RegFromBranch extracts the register bits (3-4).
103 func RegFromBranch(b uint8) uint8 { return (b >> 3) & 0x03 }
104
105 // DomFromBranch extracts the domain bits (5-6).
106 func DomFromBranch(b uint8) uint8 { return (b >> 5) & 0x03 }
107
108 // SpecFromBranch extracts the honorific bit (7).
109 func SpecFromBranch(b uint8) uint8 { return (b >> 7) & 0x01 }
110
111 // BranchWeirdness returns a tiebreaker score for register ranking. Lower = more preferred.
112 func BranchWeirdness(b uint8) uint32 {
113 reg := uint32(RegFromBranch(b))
114 spec := uint32(SpecFromBranch(b))
115 return reg*10 + spec*5
116 }
117
118 // MatchesFilter returns true if a packed Branch byte is acceptable under the filter.
119 func MatchesFilter(b uint8, filter uint8) bool {
120 reg := RegFromBranch(b)
121 switch filter {
122 case FilterNone:
123 return true
124 case FilterFormal:
125 return reg != RegVulgar
126 case FilterNeutral:
127 return reg == RegNeutral
128 case FilterCasual:
129 return reg != RegFormal && SpecFromBranch(b) != SpecHonorific
130 }
131 return true
132 }
133