coord.mx raw
1 package iskra
2
3 import (
4 "crypto/siphash"
5
6 "git.smesh.lol/iskradb/lattice"
7 )
8
9 // 64-bit coordinate layout:
10 // bits 63-48 semantic (16 bits): 8 subject|object category pairs, 2 bits each
11 // bits 47-32 (reserved)
12 // bits 31-29 grammatical (3 bits): syntactic role
13 // bits 28-25 cooccur (4 bits): prev_type(2) + next_type(2)
14 // bits 24-20 morphstate (5 bits): tense/aspect/polarity/formality/evidential
15 // bits 19-18 pragmatic (2 bits): domain context
16 // bits 17-16 valency (2 bits): argument count
17 // bits 15-2 (reserved — available for case/number in Slavic declension)
18 // bits 1-0 register (2 bits): social register
19 //
20 // coord=0 is the base key (dictionary form, context-free lookups).
21 const (
22 CoordSemanticShift = 48
23 CoordGrammaticalShift = 29
24 CoordCooccurShift = 25
25 CoordMorphShift = 20
26 CoordPragmaticShift = 18
27 CoordValencyShift = 16
28 CoordRegisterShift = 0
29 )
30
31 // Semantic bitfield — 2 bits per ontological category (subject | object flag).
32 const (
33 SemanticHumanSubj uint64 = 1 << 0
34 SemanticHumanObj uint64 = 1 << 1
35 SemanticAnimSubj uint64 = 1 << 2
36 SemanticAnimObj uint64 = 1 << 3
37 SemanticAbstSubj uint64 = 1 << 4
38 SemanticAbstObj uint64 = 1 << 5
39 SemanticPlaceSubj uint64 = 1 << 6
40 SemanticPlaceObj uint64 = 1 << 7
41 SemanticArtiSubj uint64 = 1 << 8
42 SemanticArtiObj uint64 = 1 << 9
43 SemanticNatSubj uint64 = 1 << 10
44 SemanticNatObj uint64 = 1 << 11
45 SemanticEventSubj uint64 = 1 << 12
46 SemanticEventObj uint64 = 1 << 13
47 SemanticCollSubj uint64 = 1 << 14
48 SemanticCollObj uint64 = 1 << 15
49 )
50
51 // Co-occurrence word-type values (prev/next slot in CoordCooccur).
52 const (
53 CooccurNone uint8 = 0
54 CooccurNominal uint8 = 1
55 CooccurVerbal uint8 = 2
56 CooccurFunction uint8 = 3
57 )
58
59 // PackCoord assembles a 64-bit coordinate from individual axis values.
60 func PackCoord(semantic, grammatical, cooccur, morph, pragmatic, valency, register uint64) uint64 {
61 return ((semantic & 0xFFFF) << CoordSemanticShift) |
62 ((grammatical & 0x7) << CoordGrammaticalShift) |
63 ((cooccur & 0xF) << CoordCooccurShift) |
64 ((morph & 0x1F) << CoordMorphShift) |
65 ((pragmatic & 0x3) << CoordPragmaticShift) |
66 ((valency & 0x3) << CoordValencyShift) |
67 (register & 0x3)
68 }
69
70 // CoordSemantic extracts the 16-bit semantic bitfield.
71 func CoordSemantic(coord uint64) uint64 {
72 return (coord >> CoordSemanticShift) & 0xFFFF
73 }
74
75 // CoordMorph extracts the 5-bit morphological state.
76 func CoordMorph(coord uint64) uint8 {
77 return uint8((coord >> CoordMorphShift) & 0x1F)
78 }
79
80 // CoordCooccur packs (prevType, nextType) into the 4-bit cooccurrence field.
81 func CoordCooccur(prevType, nextType uint8) uint64 {
82 return uint64(prevType&3) | (uint64(nextType&3) << 2)
83 }
84
85 // CoordPrevType extracts the prev word-type from the cooccurrence field.
86 func CoordPrevType(coord uint64) uint8 {
87 return uint8((coord >> CoordCooccurShift) & 3)
88 }
89
90 // CoordNextType extracts the next word-type from the cooccurrence field.
91 func CoordNextType(coord uint64) uint8 {
92 return uint8((coord >> (CoordCooccurShift + 2)) & 3)
93 }
94
95 // RelaxCoord returns coords to try in fallback order (specific → general).
96 // Strips axes in priority order: pragmatic, register, valency, semantic bits
97 // MSB→LSB, grammatical, cooccurrence, morphstate.
98 func RelaxCoord(coord uint64) []uint64 {
99 if coord == 0 {
100 return []uint64{0}
101 }
102 seq := []uint64{coord}
103 add := func(c uint64) {
104 if c != seq[len(seq)-1] {
105 seq = append(seq, c)
106 }
107 }
108 c := coord
109 c = c &^ (uint64(0x3) << CoordPragmaticShift)
110 add(c)
111 c = c &^ uint64(0x3)
112 add(c)
113 c = c &^ (uint64(0x3) << CoordValencyShift)
114 add(c)
115 sem := (c >> CoordSemanticShift) & 0xFFFF
116 for bit := uint64(15); bit < 16; bit-- {
117 if (sem>>bit)&1 == 1 {
118 sem &^= 1 << bit
119 c = (c &^ (uint64(0xFFFF) << CoordSemanticShift)) | (sem << CoordSemanticShift)
120 add(c)
121 }
122 if bit == 0 {
123 break
124 }
125 }
126 c = c &^ (uint64(0x7) << CoordGrammaticalShift)
127 add(c)
128 c = c &^ (uint64(0xF) << CoordCooccurShift)
129 add(c)
130 c = c &^ (uint64(0x1F) << CoordMorphShift)
131 add(c)
132 return seq
133 }
134
135 // MakeKey returns the 128-bit SipHash key for (domain, coord, word).
136 // domain: 0x01=EN, 0x02=JA, 0x10-0x14=Moxie stages, etc.
137 // Hash input: [domain(1), coord_LE(8), word(N)]
138 func MakeKey(domain uint8, coord uint64, word string) lattice.Key {
139 buf := []byte{:9 + len(word):9 + len(word)}
140 buf[0] = domain
141 buf[1] = byte(coord)
142 buf[2] = byte(coord >> 8)
143 buf[3] = byte(coord >> 16)
144 buf[4] = byte(coord >> 24)
145 buf[5] = byte(coord >> 32)
146 buf[6] = byte(coord >> 40)
147 buf[7] = byte(coord >> 48)
148 buf[8] = byte(coord >> 56)
149 copy(buf[9:], []byte(word))
150 return lattice.Key(siphash.Sum128(siphash.DefaultKey, buf))
151 }
152