key.mx raw

   1  package transdb
   2  
   3  import (
   4  	"git.mleku.dev/iskra"
   5  	"git.smesh.lol/iskradb/lattice"
   6  )
   7  
   8  // Language domain codes.
   9  const (
  10  	LangEN      uint8 = 0x01
  11  	LangJA      uint8 = 0x02
  12  	LangUnknown uint8 = 0x00
  13  )
  14  
  15  // Coord layout constants — re-exported from iskra for package-internal use.
  16  const (
  17  	CoordSemanticShift    = iskra.CoordSemanticShift
  18  	CoordGrammaticalShift = iskra.CoordGrammaticalShift
  19  	CoordCooccurShift     = iskra.CoordCooccurShift
  20  	CoordMorphShift       = iskra.CoordMorphShift
  21  	CoordPragmaticShift   = iskra.CoordPragmaticShift
  22  	CoordValencyShift     = iskra.CoordValencyShift
  23  	CoordRegisterShift    = iskra.CoordRegisterShift
  24  )
  25  
  26  // Semantic bitfield constants — re-exported from iskra.
  27  const (
  28  	SemanticHumanSubj = iskra.SemanticHumanSubj
  29  	SemanticHumanObj  = iskra.SemanticHumanObj
  30  	SemanticAnimSubj  = iskra.SemanticAnimSubj
  31  	SemanticAnimObj   = iskra.SemanticAnimObj
  32  	SemanticAbstSubj  = iskra.SemanticAbstSubj
  33  	SemanticAbstObj   = iskra.SemanticAbstObj
  34  	SemanticPlaceSubj = iskra.SemanticPlaceSubj
  35  	SemanticPlaceObj  = iskra.SemanticPlaceObj
  36  	SemanticArtiSubj  = iskra.SemanticArtiSubj
  37  	SemanticArtiObj   = iskra.SemanticArtiObj
  38  	SemanticNatSubj   = iskra.SemanticNatSubj
  39  	SemanticNatObj    = iskra.SemanticNatObj
  40  	SemanticEventSubj = iskra.SemanticEventSubj
  41  	SemanticEventObj  = iskra.SemanticEventObj
  42  	SemanticCollSubj  = iskra.SemanticCollSubj
  43  	SemanticCollObj   = iskra.SemanticCollObj
  44  )
  45  
  46  // Co-occurrence type constants — re-exported from iskra.
  47  const (
  48  	CooccurNone     = iskra.CooccurNone
  49  	CooccurNominal  = iskra.CooccurNominal
  50  	CooccurVerbal   = iskra.CooccurVerbal
  51  	CooccurFunction = iskra.CooccurFunction
  52  )
  53  
  54  // Coord functions delegated to iskra.
  55  func PackCoord(semantic, grammatical, cooccur, morph, pragmatic, valency, register uint64) uint64 {
  56  	return iskra.PackCoord(semantic, grammatical, cooccur, morph, pragmatic, valency, register)
  57  }
  58  
  59  func RelaxCoord(coord uint64) []uint64 { return iskra.RelaxCoord(coord) }
  60  
  61  func CoordSemantic(coord uint64) uint64 { return iskra.CoordSemantic(coord) }
  62  
  63  func CoordMorph(coord uint64) uint8 { return iskra.CoordMorph(coord) }
  64  
  65  func CoordCooccur(prevType, nextType uint8) uint64 { return iskra.CoordCooccur(prevType, nextType) }
  66  
  67  func CoordPrevType(coord uint64) uint8 { return iskra.CoordPrevType(coord) }
  68  
  69  func CoordNextType(coord uint64) uint8 { return iskra.CoordNextType(coord) }
  70  
  71  // MakeKey returns the 128-bit SipHash key for (lang, coord, word).
  72  // Delegates to iskra.MakeKey with lang as the domain byte.
  73  func MakeKey(lang uint8, coord uint64, word string) lattice.Key {
  74  	return iskra.MakeKey(lang, coord, word)
  75  }
  76  
  77  // ActiveBranches are the 3 branches used for JA/EN lattice data.
  78  var ActiveBranches = [3]lattice.Branch{lattice.Bnoun, lattice.Bverb, lattice.Bmodifier}
  79  
  80  // POSForWord returns the POS type of a word at coord=0 (JMdict baseline).
  81  // Returns 1=nominal, 2=verbal, 3=modifier, 0=unknown.
  82  func POSForWord(tree *lattice.Tree, lang uint8, word string) uint8 {
  83  	key := MakeKey(lang, 0, word)
  84  	for i, b := range ActiveBranches {
  85  		if tree.LookupRecIdx(b, key) != lattice.NullRec {
  86  			return uint8(i + 1)
  87  		}
  88  	}
  89  	return 0
  90  }
  91  
  92  // POSTypeFor maps POSForWord result to CooccurNominal/Verbal/Function.
  93  func POSTypeFor(posResult uint8) uint8 {
  94  	switch posResult {
  95  	case 1:
  96  		return CooccurNominal
  97  	case 2:
  98  		return CooccurVerbal
  99  	case 3:
 100  		return CooccurFunction
 101  	}
 102  	return CooccurNone
 103  }
 104  
 105  // branchOrderJA returns branch search order from coord's cooccurrence axis.
 106  func branchOrderJA(coord uint64) [3]uint8 {
 107  	bMod := uint8(lattice.Bmodifier)
 108  	bNou := uint8(lattice.Bnoun)
 109  	bVer := uint8(lattice.Bverb)
 110  	prevType := CoordPrevType(coord)
 111  	nextType := CoordNextType(coord)
 112  	switch {
 113  	case prevType == CooccurNominal:
 114  		return [3]uint8{bMod, bNou, bVer}
 115  	case prevType == CooccurVerbal:
 116  		return [3]uint8{bMod, bVer, bNou}
 117  	case nextType == CooccurVerbal:
 118  		return [3]uint8{bNou, bMod, bVer}
 119  	default:
 120  		return [3]uint8{bNou, bVer, bMod}
 121  	}
 122  }
 123  
 124  // PackCtx — legacy wrapper kept for callers during migration.
 125  func PackCtx(prev, cur, next uint8) uint8 {
 126  	return (prev << 4) | (cur << 2) | next
 127  }
 128  
 129  // CtxToCoord converts old 6-bit ctx to new coord cooccurrence field.
 130  func CtxToCoord(ctx uint8) uint64 {
 131  	prevPOS := (ctx >> 4) & 3
 132  	nextPOS := ctx & 3
 133  	return CoordCooccur(prevPOS, nextPOS) << CoordCooccurShift
 134  }
 135  
 136  func KeyLang(key lattice.Key) uint8 { return LangUnknown }
 137  
 138  func LangName(lang uint8) string {
 139  	switch lang {
 140  	case LangEN:
 141  		return "EN"
 142  	case LangJA:
 143  		return "JA"
 144  	}
 145  	return "?"
 146  }
 147