package iskra import ( "crypto/siphash" "git.smesh.lol/iskradb/lattice" ) // 64-bit coordinate layout: // bits 63-48 semantic (16 bits): 8 subject|object category pairs, 2 bits each // bits 47-32 (reserved) // bits 31-29 grammatical (3 bits): syntactic role // bits 28-25 cooccur (4 bits): prev_type(2) + next_type(2) // bits 24-20 morphstate (5 bits): tense/aspect/polarity/formality/evidential // bits 19-18 pragmatic (2 bits): domain context // bits 17-16 valency (2 bits): argument count // bits 15-2 (reserved — available for case/number in Slavic declension) // bits 1-0 register (2 bits): social register // // coord=0 is the base key (dictionary form, context-free lookups). const ( CoordSemanticShift = 48 CoordGrammaticalShift = 29 CoordCooccurShift = 25 CoordMorphShift = 20 CoordPragmaticShift = 18 CoordValencyShift = 16 CoordRegisterShift = 0 ) // Semantic bitfield — 2 bits per ontological category (subject | object flag). const ( SemanticHumanSubj uint64 = 1 << 0 SemanticHumanObj uint64 = 1 << 1 SemanticAnimSubj uint64 = 1 << 2 SemanticAnimObj uint64 = 1 << 3 SemanticAbstSubj uint64 = 1 << 4 SemanticAbstObj uint64 = 1 << 5 SemanticPlaceSubj uint64 = 1 << 6 SemanticPlaceObj uint64 = 1 << 7 SemanticArtiSubj uint64 = 1 << 8 SemanticArtiObj uint64 = 1 << 9 SemanticNatSubj uint64 = 1 << 10 SemanticNatObj uint64 = 1 << 11 SemanticEventSubj uint64 = 1 << 12 SemanticEventObj uint64 = 1 << 13 SemanticCollSubj uint64 = 1 << 14 SemanticCollObj uint64 = 1 << 15 ) // Co-occurrence word-type values (prev/next slot in CoordCooccur). const ( CooccurNone uint8 = 0 CooccurNominal uint8 = 1 CooccurVerbal uint8 = 2 CooccurFunction uint8 = 3 ) // PackCoord assembles a 64-bit coordinate from individual axis values. func PackCoord(semantic, grammatical, cooccur, morph, pragmatic, valency, register uint64) uint64 { return ((semantic & 0xFFFF) << CoordSemanticShift) | ((grammatical & 0x7) << CoordGrammaticalShift) | ((cooccur & 0xF) << CoordCooccurShift) | ((morph & 0x1F) << CoordMorphShift) | ((pragmatic & 0x3) << CoordPragmaticShift) | ((valency & 0x3) << CoordValencyShift) | (register & 0x3) } // CoordSemantic extracts the 16-bit semantic bitfield. func CoordSemantic(coord uint64) uint64 { return (coord >> CoordSemanticShift) & 0xFFFF } // CoordMorph extracts the 5-bit morphological state. func CoordMorph(coord uint64) uint8 { return uint8((coord >> CoordMorphShift) & 0x1F) } // CoordCooccur packs (prevType, nextType) into the 4-bit cooccurrence field. func CoordCooccur(prevType, nextType uint8) uint64 { return uint64(prevType&3) | (uint64(nextType&3) << 2) } // CoordPrevType extracts the prev word-type from the cooccurrence field. func CoordPrevType(coord uint64) uint8 { return uint8((coord >> CoordCooccurShift) & 3) } // CoordNextType extracts the next word-type from the cooccurrence field. func CoordNextType(coord uint64) uint8 { return uint8((coord >> (CoordCooccurShift + 2)) & 3) } // RelaxCoord returns coords to try in fallback order (specific → general). // Strips axes in priority order: pragmatic, register, valency, semantic bits // MSB→LSB, grammatical, cooccurrence, morphstate. func RelaxCoord(coord uint64) []uint64 { if coord == 0 { return []uint64{0} } seq := []uint64{coord} add := func(c uint64) { if c != seq[len(seq)-1] { seq = append(seq, c) } } c := coord c = c &^ (uint64(0x3) << CoordPragmaticShift) add(c) c = c &^ uint64(0x3) add(c) c = c &^ (uint64(0x3) << CoordValencyShift) add(c) sem := (c >> CoordSemanticShift) & 0xFFFF for bit := uint64(15); bit < 16; bit-- { if (sem>>bit)&1 == 1 { sem &^= 1 << bit c = (c &^ (uint64(0xFFFF) << CoordSemanticShift)) | (sem << CoordSemanticShift) add(c) } if bit == 0 { break } } c = c &^ (uint64(0x7) << CoordGrammaticalShift) add(c) c = c &^ (uint64(0xF) << CoordCooccurShift) add(c) c = c &^ (uint64(0x1F) << CoordMorphShift) add(c) return seq } // MakeKey returns the 128-bit SipHash key for (domain, coord, word). // domain: 0x01=EN, 0x02=JA, 0x10-0x14=Moxie stages, etc. // Hash input: [domain(1), coord_LE(8), word(N)] func MakeKey(domain uint8, coord uint64, word string) lattice.Key { buf := []byte{:9 + len(word):9 + len(word)} buf[0] = domain buf[1] = byte(coord) buf[2] = byte(coord >> 8) buf[3] = byte(coord >> 16) buf[4] = byte(coord >> 24) buf[5] = byte(coord >> 32) buf[6] = byte(coord >> 40) buf[7] = byte(coord >> 48) buf[8] = byte(coord >> 56) copy(buf[9:], []byte(word)) return lattice.Key(siphash.Sum128(siphash.DefaultKey, buf)) }